1
1
mirror of https://github.com/mrabarnett/mrab-regex.git synced 2025-10-05 20:02:39 +02:00

Updated to Unicode 16.0.0.

This commit is contained in:
Matthew Barnett
2024-09-11 17:22:19 +01:00
parent e8a8d28aa3
commit d3510fea2a
9 changed files with 6426 additions and 5441 deletions

View File

@@ -165,7 +165,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@4.1.7
with:
name: regex-files
path: dist

View File

@@ -21,7 +21,7 @@ The regex module releases the GIL during matching on instances of the built-in (
Unicode
-------
This module supports Unicode 15.1.0. Full Unicode case-folding is supported.
This module supports Unicode 16.0.0. Full Unicode case-folding is supported.
Flags
-----

View File

@@ -1,3 +1,7 @@
Version: 2024.9.11
Updated to Unicode 16.0.0.
Version: 2024.7.24
Git issue 539: Bug: Partial matching fails on a simple example

View File

@@ -382,7 +382,7 @@ ul.auto-toc {
</div>
<div class="section" id="unicode">
<h1>Unicode</h1>
<p>This module supports Unicode 15.1.0. Full Unicode case-folding is supported.</p>
<p>This module supports Unicode 16.0.0. Full Unicode case-folding is supported.</p>
</div>
<div class="section" id="flags">
<h1>Flags</h1>

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ typedef unsigned char BOOL;
#define RE_MAX_CASES 4
#define RE_MAX_FOLDED 3
#define RE_MAX_SCX 21
#define RE_MAX_SCX 23
typedef struct RE_Property {
RE_UINT16 name;
@@ -34,9 +34,9 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
#define RE_PROP_GC 0x1E
#define RE_PROP_CASED 0xA
#define RE_PROP_UPPERCASE 0x5B
#define RE_PROP_UPPERCASE 0x5C
#define RE_PROP_LOWERCASE 0x38
#define RE_PROP_SCX 0x55
#define RE_PROP_SCX 0x56
#define RE_PROP_C 30
#define RE_PROP_L 31
@@ -96,15 +96,15 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
#define RE_PROP_DIGIT 0x1E0009
#define RE_PROP_GRAPH 0x1F0001
#define RE_PROP_LOWER 0x380001
#define RE_PROP_PRINT 0x500001
#define RE_PROP_SPACE 0x5E0001
#define RE_PROP_UPPER 0x5B0001
#define RE_PROP_WORD 0x5F0001
#define RE_PROP_XDIGIT 0x610001
#define RE_PROP_POSIX_ALNUM 0x4B0001
#define RE_PROP_POSIX_DIGIT 0x4C0001
#define RE_PROP_POSIX_PUNCT 0x4D0001
#define RE_PROP_POSIX_XDIGIT 0x4E0001
#define RE_PROP_PRINT 0x510001
#define RE_PROP_SPACE 0x5F0001
#define RE_PROP_UPPER 0x5C0001
#define RE_PROP_WORD 0x600001
#define RE_PROP_XDIGIT 0x620001
#define RE_PROP_POSIX_ALNUM 0x4C0001
#define RE_PROP_POSIX_DIGIT 0x4D0001
#define RE_PROP_POSIX_PUNCT 0x4E0001
#define RE_PROP_POSIX_XDIGIT 0x4F0001
#define RE_WBREAK_OTHER 0
#define RE_WBREAK_LF 1
@@ -180,8 +180,8 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
#define RE_LBREAK_NONSTARTER 28
#define RE_LBREAK_AKSARA 29
#define RE_LBREAK_VIRAMA 30
#define RE_LBREAK_IDEOGRAPHIC 31
#define RE_LBREAK_AKSARASTART 32
#define RE_LBREAK_AKSARASTART 31
#define RE_LBREAK_IDEOGRAPHIC 32
#define RE_LBREAK_VIRAMAFINAL 33
#define RE_LBREAK_ZWSPACE 34
#define RE_LBREAK_ZWJ 35
@@ -203,11 +203,11 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
#define RE_INCB_CONSONANT 2
#define RE_INCB_LINKER 3
extern char* re_strings[1506];
extern RE_Property re_properties[183];
extern RE_PropertyValue re_property_values[1651];
extern char* re_strings[1530];
extern RE_Property re_properties[185];
extern RE_PropertyValue re_property_values[1680];
extern RE_UINT16 re_expand_on_folding[104];
extern RE_GetPropertyFunc re_get_property[100];
extern RE_GetPropertyFunc re_get_property[101];
RE_UINT32 re_get_alphabetic(RE_UINT32 codepoint);
RE_UINT32 re_get_alphanumeric(RE_UINT32 codepoint);
@@ -267,6 +267,7 @@ RE_UINT32 re_get_line_break(RE_UINT32 codepoint);
RE_UINT32 re_get_logical_order_exception(RE_UINT32 codepoint);
RE_UINT32 re_get_lowercase(RE_UINT32 codepoint);
RE_UINT32 re_get_math(RE_UINT32 codepoint);
RE_UINT32 re_get_modifier_combining_mark(RE_UINT32 codepoint);
RE_UINT32 re_get_nfc_quick_check(RE_UINT32 codepoint);
RE_UINT32 re_get_nfd_quick_check(RE_UINT32 codepoint);
RE_UINT32 re_get_nfkc_quick_check(RE_UINT32 codepoint);

View File

@@ -241,7 +241,7 @@ __all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall",
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
"__doc__", "RegexFlag"]
__version__ = "2.5.146"
__version__ = "2.5.147"
# --------------------------------------------------------------------
# Public interface.

View File

@@ -8,7 +8,7 @@ with open('README.rst', encoding='utf-8') as file:
setup(
name='regex',
version='2024.7.24',
version='2024.9.11',
description='Alternative regular expression module, to replace re.',
long_description=long_description,
long_description_content_type='text/x-rst',

View File

@@ -425,6 +425,10 @@ def parse_script_extensions(properties, subpath):
prop_name = line.split()[-1]
property = properties[munge(prop_name)]
property['values'] = {}
elif line.startswith('# All code points not explicitly listed for '):
prop_name = line.split()[-1]
property = properties[munge(prop_name)]
property['values'] = {}
elif not line.startswith('#'):
line = line.partition('#')[0]
fields = [field.strip() for field in line.split(';')]
@@ -1764,7 +1768,7 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
h_file.write('int re_get_full_case_folding(RE_UINT32 codepoint, RE_UINT32* folded);\n')
# The Unicode version.
UNICODE_VERSION = '15.1.0'
UNICODE_VERSION = '16.0.0'
this_folder = dirname(__file__)