mirror of
https://github.com/mrabarnett/mrab-regex.git
synced 2025-10-05 20:02:39 +02:00
The escape function no longer escapes \x00. It's not necessary.
Inline flags can now be turned off and apply to what follows. Added \R to match line endings.
This commit is contained in:
10
.github/workflows/main.yml
vendored
10
.github/workflows/main.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
|
||||
env:
|
||||
# macOS archs
|
||||
CIBW_ARCHS_MACOS: "x86_64 arm64"
|
||||
CIBW_ARCHS_MACOS: "x86_64 arm64 universal2"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -64,6 +64,14 @@ jobs:
|
||||
name: regex-files
|
||||
path: wheelhouse/*.whl
|
||||
|
||||
- name: Create GitHub release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ github.ref }}
|
||||
title: Release ${{ github.ref }}
|
||||
|
||||
# Build source distribution & manylinux1_x86_64 wheels
|
||||
# These two jobs build:
|
||||
# 1, build_wheels (above): manylinux1_i686 / manylinux2014_x86_64
|
||||
|
@@ -1,3 +1,11 @@
|
||||
Version: 2023.12.23
|
||||
|
||||
The escape function no longer escapes \x00. It's not necessary.
|
||||
|
||||
Inline flags can now be turned off and apply to what follows.
|
||||
|
||||
Added \R to match line endings.
|
||||
|
||||
Version: 2023.10.3
|
||||
|
||||
Updated to Unicode 15.1.0.
|
||||
|
@@ -1150,22 +1150,7 @@ def parse_flags_subpattern(source, info):
|
||||
|
||||
def parse_positional_flags(source, info, flags_on, flags_off):
|
||||
"Parses positional flags."
|
||||
version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
|
||||
if version == VERSION0:
|
||||
# Positional flags are global and can only be turned on.
|
||||
if flags_off:
|
||||
raise error("bad inline flags: cannot turn flags off",
|
||||
source.string, source.pos)
|
||||
|
||||
new_global_flags = flags_on & ~info.global_flags
|
||||
if new_global_flags:
|
||||
info.global_flags |= new_global_flags
|
||||
|
||||
# A global has been turned on, so reparse the pattern.
|
||||
raise _UnscopedFlagSet(info.global_flags)
|
||||
else:
|
||||
info.flags = (info.flags | flags_on) & ~flags_off
|
||||
|
||||
info.flags = (info.flags | flags_on) & ~flags_off
|
||||
source.ignore_space = bool(info.flags & VERBOSE)
|
||||
|
||||
def parse_name(source, allow_numeric=False, allow_group_0=False):
|
||||
@@ -1233,6 +1218,14 @@ def parse_escape(source, info, in_set):
|
||||
elif ch in "pP":
|
||||
# A Unicode property, positive or negative.
|
||||
return parse_property(source, info, ch == "p", in_set)
|
||||
elif ch == "R" and not in_set:
|
||||
# A line ending.
|
||||
charset = [0x0A, 0x0B, 0x0C, 0x0D]
|
||||
if info.guess_encoding == UNICODE:
|
||||
charset.extend([0x85, 0x2028, 0x2029])
|
||||
|
||||
return Atomic(Branch([String([0x0D, 0x0A]), SetUnion(info, [Character(c)
|
||||
for c in charset])]))
|
||||
elif ch == "X" and not in_set:
|
||||
# A grapheme cluster.
|
||||
return Grapheme()
|
||||
|
@@ -241,7 +241,7 @@ __all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall",
|
||||
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
|
||||
"__doc__", "RegexFlag"]
|
||||
|
||||
__version__ = "2.5.135"
|
||||
__version__ = "2.5.136"
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Public interface.
|
||||
@@ -392,8 +392,6 @@ def escape(pattern, special_only=True, literal_spaces=False):
|
||||
elif c in _METACHARS or c.isspace():
|
||||
s.append("\\")
|
||||
s.append(c)
|
||||
elif c == "\x00":
|
||||
s.append("\\000")
|
||||
else:
|
||||
s.append(c)
|
||||
else:
|
||||
@@ -402,8 +400,6 @@ def escape(pattern, special_only=True, literal_spaces=False):
|
||||
s.append(c)
|
||||
elif c in _ALNUM:
|
||||
s.append(c)
|
||||
elif c == "\x00":
|
||||
s.append("\\000")
|
||||
else:
|
||||
s.append("\\")
|
||||
s.append(c)
|
||||
|
@@ -911,10 +911,9 @@ class RegexTests(unittest.TestCase):
|
||||
p = regex.compile('(?iu)' + lower_char)
|
||||
self.assertEqual(bool(p.match(upper_char)), True)
|
||||
|
||||
# Changed to positional flags in regex 2023.12.23.
|
||||
self.assertEqual(bool(regex.match(r"(?i)a", "A")), True)
|
||||
self.assertEqual(bool(regex.match(r"a(?i)", "A")), True)
|
||||
self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True)
|
||||
self.assertEqual(regex.match(r"a(?iV1)", "A"), None)
|
||||
self.assertEqual(regex.match(r"a(?i)", "A"), None)
|
||||
|
||||
def test_dollar_matches_twice(self):
|
||||
# $ matches the end of string, and just before the terminating \n.
|
||||
@@ -1396,18 +1395,15 @@ class RegexTests(unittest.TestCase):
|
||||
# Issues 433028, 433024, 433027.
|
||||
self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2))
|
||||
self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2))
|
||||
self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2))
|
||||
self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None)
|
||||
|
||||
self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda:
|
||||
regex.search(r"(?V0-i)Ab", "ab", flags=regex.I))
|
||||
# Changed to positional flags in regex 2023.12.23.
|
||||
self.assertEqual(regex.search(r"A(?i)b", "ab"), None)
|
||||
|
||||
self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None)
|
||||
self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None)
|
||||
self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None)
|
||||
self.assertEqual(regex.search(r"(?-i)Ab", "ab", flags=regex.I), None)
|
||||
self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None)
|
||||
self.assertEqual(regex.search(r"A(?V1-i)b", "ab",
|
||||
flags=regex.I).span(), (0, 2))
|
||||
self.assertEqual(regex.search(r"A(?-i)b", "ab", flags=regex.I).span(),
|
||||
(0, 2))
|
||||
|
||||
def test_repeated_repeats(self):
|
||||
# Issue 2537.
|
||||
@@ -1820,12 +1816,10 @@ class RegexTests(unittest.TestCase):
|
||||
('a.*b', 'acc\nccb', '', ascii(None)),
|
||||
('a.{4,5}b', 'acc\nccb', '', ascii(None)),
|
||||
('a.b', 'a\rb', '0', ascii('a\rb')),
|
||||
# The new behaviour is that the inline flag affects only what follows.
|
||||
('a.b(?s)', 'a\nb', '0', ascii('a\nb')),
|
||||
('a.b(?sV1)', 'a\nb', '', ascii(None)),
|
||||
# Changed to positional flags in regex 2023.12.23.
|
||||
('a.b(?s)', 'a\nb', '', ascii(None)),
|
||||
('(?s)a.b', 'a\nb', '0', ascii('a\nb')),
|
||||
('a.*(?s)b', 'acc\nccb', '0', ascii('acc\nccb')),
|
||||
('a.*(?sV1)b', 'acc\nccb', '', ascii(None)),
|
||||
('a.*(?s)b', 'acc\nccb', '', ascii(None)),
|
||||
('(?s)a.*b', 'acc\nccb', '0', ascii('acc\nccb')),
|
||||
('(?s)a.{4,5}b', 'acc\nccb', '0', ascii('acc\nccb')),
|
||||
|
||||
@@ -2345,12 +2339,9 @@ class RegexTests(unittest.TestCase):
|
||||
# Not an error under PCRE/PRE:
|
||||
# When the new behaviour is turned on positional inline flags affect
|
||||
# only what follows.
|
||||
('w(?i)', 'W', '0', ascii('W')),
|
||||
('w(?iV1)', 'W', '0', ascii(None)),
|
||||
('w(?i)', 'W', '0', ascii(None)),
|
||||
('w(?i)', 'w', '0', ascii('w')),
|
||||
('w(?iV1)', 'w', '0', ascii('w')),
|
||||
('(?i)w', 'W', '0', ascii('W')),
|
||||
('(?iV1)w', 'W', '0', ascii('W')),
|
||||
|
||||
# Comments using the x embedded pattern modifier.
|
||||
("""(?x)w# comment 1
|
||||
@@ -2403,14 +2394,10 @@ xyzabc
|
||||
# Bug 114033: nothing to repeat.
|
||||
(r'(x?)?', 'x', '0', ascii('x')),
|
||||
# Bug 115040: rescan if flags are modified inside pattern.
|
||||
# If the new behaviour is turned on then positional inline flags
|
||||
# affect only what follows.
|
||||
(r' (?x)foo ', 'foo', '0', ascii('foo')),
|
||||
(r' (?V1x)foo ', 'foo', '0', ascii(None)),
|
||||
# Changed to positional flags in regex 2023.12.23.
|
||||
(r' (?x)foo ', 'foo', '0', ascii(None)),
|
||||
(r'(?x) foo ', 'foo', '0', ascii('foo')),
|
||||
(r'(?V1x) foo ', 'foo', '0', ascii('foo')),
|
||||
(r'(?x)foo ', 'foo', '0', ascii('foo')),
|
||||
(r'(?V1x)foo ', 'foo', '0', ascii('foo')),
|
||||
# Bug 115618: negative lookahead.
|
||||
(r'(?<!abc)(d.f)', 'abcdefdof', '0', ascii('dof')),
|
||||
# Bug 116251: character class bug.
|
||||
@@ -3154,10 +3141,8 @@ xyzabc
|
||||
|
||||
# Hg issue 39: regex.search("((?i)blah)\\s+\\1", "blah BLAH") doesn't
|
||||
# return None
|
||||
self.assertEqual(regex.search(r"(?V0)((?i)blah)\s+\1",
|
||||
"blah BLAH").group(0, 1), ("blah BLAH", "blah"))
|
||||
self.assertEqual(regex.search(r"(?V1)((?i)blah)\s+\1", "blah BLAH"),
|
||||
None)
|
||||
# Changed to positional flags in regex 2023.12.23.
|
||||
self.assertEqual(regex.search(r"((?i)blah)\s+\1", "blah BLAH"), None)
|
||||
|
||||
# Hg issue 40: regex.search("(\()?[^()]+(?(1)\)|)", "(abcd").group(0)
|
||||
# returns "bcd" instead of "abcd"
|
||||
@@ -4336,10 +4321,10 @@ thing
|
||||
self.assertEqual(regex.search(r"^a?(a?)b?c\1$", "abca").span(), (0, 4))
|
||||
|
||||
# Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
|
||||
self.assertEqual(regex.match(r"(?(?=a).|..)", "ab").span(), (0, 1))
|
||||
self.assertEqual(regex.match(r"(?(?=b).|..)", "ab").span(), (0, 2))
|
||||
self.assertEqual(regex.match(r"(?(?!a).|..)", "ab").span(), (0, 2))
|
||||
self.assertEqual(regex.match(r"(?(?!b).|..)", "ab").span(), (0, 1))
|
||||
self.assertEqual(regex.match(r'(?(?=a).|..)', 'ab').span(), (0, 1))
|
||||
self.assertEqual(regex.match(r'(?(?=b).|..)', 'ab').span(), (0, 2))
|
||||
self.assertEqual(regex.match(r'(?(?!a).|..)', 'ab').span(), (0, 2))
|
||||
self.assertEqual(regex.match(r'(?(?!b).|..)', 'ab').span(), (0, 1))
|
||||
|
||||
def test_fuzzy_ext(self):
|
||||
self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
|
||||
@@ -4460,6 +4445,12 @@ thing
|
||||
self.assertEqual([m.span() for m in regex.finditer(r'(?m)^\s*?$',
|
||||
'foo\n\n\nbar')], [(4, 4), (4, 5), (5, 5)])
|
||||
|
||||
def test_line_ending(self):
|
||||
self.assertEqual(regex.findall(r'\R', '\r\n\n\x0B\f\r\x85\u2028\u2029'),
|
||||
['\r\n', '\n', '\x0B', '\f', '\r', '\x85', '\u2028', '\u2029'])
|
||||
self.assertEqual(regex.findall(br'\R', b'\r\n\n\x0B\f\r\x85'), [b'\r\n',
|
||||
b'\n', b'\x0B', b'\f', b'\r'])
|
||||
|
||||
def test_main():
|
||||
unittest.main(verbosity=2)
|
||||
|
||||
|
2
setup.py
2
setup.py
@@ -7,7 +7,7 @@ with open('README.rst', encoding='utf-8') as file:
|
||||
|
||||
setup(
|
||||
name='regex',
|
||||
version='2023.10.3',
|
||||
version='2023.12.23',
|
||||
description='Alternative regular expression module, to replace re.',
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/x-rst',
|
||||
|
@@ -1781,4 +1781,4 @@ binary_dict = make_binary_dict()
|
||||
|
||||
generate_code(unicode_data, UNICODE_VERSION, this_folder)
|
||||
|
||||
print('\nSuccessfully generated _reges_unicode.h and _reges_unicode.c in %s' % tools_folder)
|
||||
print('\nSuccessfully generated _regex_unicode.h and _regex_unicode.c in %s' % tools_folder)
|
||||
|
Reference in New Issue
Block a user