1
1
mirror of https://github.com/mrabarnett/mrab-regex.git synced 2025-10-05 20:02:39 +02:00

Git issue 508: Regex doesn't build using CPython main (3.13.0a0)

Removed usage of _PyBytes_Join and did a little tidying of the code that makes the result string.
This commit is contained in:
Matthew Barnett
2023-08-08 21:35:46 +01:00
parent 774dbfda42
commit bc73ebb5d7
6 changed files with 120 additions and 70 deletions

View File

@@ -1,6 +1,11 @@
Version: 2023.8.8
Git issue 508: Regex doesn't build using CPython main (3.13.0a0)
Removed usage of _PyBytes_Join and did a little tidying of the code that makes the result string.
Version: 2023.6.3 Version: 2023.6.3
# Git issue 498: Conditional negative lookahead inside positive lookahead fails to match Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
Conditional node needed an additional member that points to the true branch. Conditional node needed an additional member that points to the true branch.
Version: 2023.5.5 Version: 2023.5.5

View File

@@ -18706,8 +18706,8 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index)
if (!item) if (!item)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, 0, item); PyList_SetItem(result, 0, item);
return result; return result;
} }
@@ -18726,8 +18726,8 @@ static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index)
if (!item) if (!item)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, i, item); PyList_SetItem(result, i, item);
} }
return result; return result;
@@ -18786,8 +18786,8 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) {
if (!item) if (!item)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, 0, item); PyList_SetItem(result, 0, item);
return result; return result;
} }
@@ -18806,8 +18806,8 @@ static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) {
if (!item) if (!item)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, i, item); PyList_SetItem(result, i, item);
} }
return result; return result;
@@ -18867,8 +18867,8 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index)
if (!item) if (!item)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, 0, item); PyList_SetItem(result, 0, item);
return result; return result;
} }
@@ -18888,8 +18888,8 @@ static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index)
if (!item) if (!item)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, i, item); PyList_SetItem(result, i, item);
} }
return result; return result;
@@ -18923,8 +18923,8 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t
if (!slice) if (!slice)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, 0, slice); PyList_SetItem(result, 0, slice);
return result; return result;
} }
@@ -18945,8 +18945,8 @@ static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t
if (!slice) if (!slice)
goto error; goto error;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(result, i, slice); PyList_SetItem(result, i, slice);
} }
return result; return result;
@@ -19282,13 +19282,13 @@ static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject*
if (!keys) if (!keys)
goto failed; goto failed;
for (g = 0; g < PyList_GET_SIZE(keys); g++) { for (g = 0; g < PyList_Size(keys); g++) {
PyObject* key; PyObject* key;
PyObject* value; PyObject* value;
int status; int status;
/* PyList_GET_ITEM borrows a reference. */ /* PyList_GetItem borrows a reference. */
key = PyList_GET_ITEM(keys, g); key = PyList_GetItem(keys, g);
if (!key) if (!key)
goto failed; goto failed;
@@ -19326,14 +19326,14 @@ static PyObject* match_capturesdict(MatchObject* self) {
if (!keys) if (!keys)
goto failed; goto failed;
for (g = 0; g < PyList_GET_SIZE(keys); g++) { for (g = 0; g < PyList_Size(keys); g++) {
PyObject* key; PyObject* key;
Py_ssize_t group; Py_ssize_t group;
PyObject* captures; PyObject* captures;
int status; int status;
/* PyList_GET_ITEM borrows a reference. */ /* PyList_GetItem borrows a reference. */
key = PyList_GET_ITEM(keys, g); key = PyList_GetItem(keys, g);
if (!key) if (!key)
goto failed; goto failed;
@@ -19509,12 +19509,12 @@ Py_LOCAL_INLINE(int) add_to_join_list(RE_JoinInfo* join_info, PyObject* item) {
goto error; goto error;
} }
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(join_info->list, 0, join_info->item); PyList_SetItem(join_info->list, 0, join_info->item);
join_info->item = NULL; join_info->item = NULL;
/* PyList_SET_ITEM borrows the reference. */ /* PyList_SetItem borrows the reference. */
PyList_SET_ITEM(join_info->list, 1, new_item); PyList_SetItem(join_info->list, 1, new_item);
return 0; return 0;
} }
@@ -19535,38 +19535,78 @@ Py_LOCAL_INLINE(void) clear_join_list(RE_JoinInfo* join_info) {
Py_XDECREF(join_info->item); Py_XDECREF(join_info->item);
} }
/* Joins a list of bytestrings. */
Py_LOCAL_INLINE(PyObject*) join_bytestrings(PyObject* list) {
Py_ssize_t count;
Py_ssize_t length;
Py_ssize_t i;
PyObject *result;
char* to_bytes;
count = PyList_Size(list);
/* How long will the result be? */
length = 0;
for (i = 0; i < count; i++)
length += PyBytes_Size(PyList_GetItem(list, i));
/* Create the resulting bytestring, but uninitialised. */
result = PyBytes_FromStringAndSize(NULL, length);
if (!result)
return NULL;
/* Fill the resulting bytestring. */
to_bytes = PyBytes_AsString(result);
length = 0;
for (i = 0; i < count; i++) {
PyObject* bytestring;
char* from_bytes;
Py_ssize_t from_length;
bytestring = PyList_GetItem(list, i);
from_bytes = PyBytes_AsString(bytestring);
from_length = PyBytes_Size(bytestring);
memmove(to_bytes + length, from_bytes, from_length);
length += from_length;
}
return result;
}
/* Joins a list of strings. */
Py_LOCAL_INLINE(PyObject*) join_strings(PyObject* list) {
PyObject* joiner;
PyObject* result;
joiner = PyUnicode_FromString("");
if (!joiner)
return NULL;
result = PyUnicode_Join(joiner, list);
Py_DECREF(joiner);
return result;
}
/* Joins together a list of strings for pattern_subx. */ /* Joins together a list of strings for pattern_subx. */
Py_LOCAL_INLINE(PyObject*) join_list_info(RE_JoinInfo* join_info) { Py_LOCAL_INLINE(PyObject*) join_list_info(RE_JoinInfo* join_info) {
/* If the list already exists then just do the join. */ /* If the list already exists then just do the join. */
if (join_info->list) { if (join_info->list) {
PyObject* joiner;
PyObject* result; PyObject* result;
if (join_info->reversed) if (join_info->reversed)
/* The list needs to be reversed before being joined. */ /* The list needs to be reversed before being joined. */
PyList_Reverse(join_info->list); PyList_Reverse(join_info->list);
if (join_info->is_unicode) { if (join_info->is_unicode)
/* Concatenate the Unicode strings. */ /* Concatenate the Unicode strings. */
joiner = PyUnicode_New(0, 0); result = join_strings(join_info->list);
if (!joiner) { else
clear_join_list(join_info);
return NULL;
}
result = PyUnicode_Join(joiner, join_info->list);
} else {
joiner = PyBytes_FromString("");
if (!joiner) {
clear_join_list(join_info);
return NULL;
}
/* Concatenate the bytestrings. */ /* Concatenate the bytestrings. */
result = _PyBytes_Join(joiner, join_info->list); result = join_bytestrings(join_info->list);
}
Py_DECREF(joiner);
clear_join_list(join_info); clear_join_list(join_info);
return result; return result;
@@ -19651,13 +19691,13 @@ static PyObject* match_expand(MatchObject* self, PyObject* str_template) {
init_join_list(&join_info, FALSE, PyUnicode_Check(self->string)); init_join_list(&join_info, FALSE, PyUnicode_Check(self->string));
/* Add each part of the template to the list. */ /* Add each part of the template to the list. */
size = PyList_GET_SIZE(replacement); size = PyList_Size(replacement);
for (i = 0; i < size; i++) { for (i = 0; i < size; i++) {
PyObject* item; PyObject* item;
PyObject* str_item; PyObject* str_item;
/* PyList_GET_ITEM borrows a reference. */ /* PyList_GetItem borrows a reference. */
item = PyList_GET_ITEM(replacement, i); item = PyList_GetItem(replacement, i);
str_item = get_match_replacement(self, item, self->group_count); str_item = get_match_replacement(self, item, self->group_count);
if (!str_item) if (!str_item)
goto error; goto error;
@@ -19727,19 +19767,19 @@ Py_LOCAL_INLINE(PyObject*) make_capture_dict(MatchObject* match, MatchObject**
if (!values) if (!values)
goto failed; goto failed;
for (g = 0; g < PyList_GET_SIZE(keys); g++) { for (g = 0; g < PyList_Size(keys); g++) {
PyObject* key; PyObject* key;
PyObject* value; PyObject* value;
Py_ssize_t v; Py_ssize_t v;
int status; int status;
/* PyList_GET_ITEM borrows a reference. */ /* PyList_GetItem borrows a reference. */
key = PyList_GET_ITEM(keys, g); key = PyList_GetItem(keys, g);
if (!key) if (!key)
goto failed; goto failed;
/* PyList_GET_ITEM borrows a reference. */ /* PyList_GetItem borrows a reference. */
value = PyList_GET_ITEM(values, g); value = PyList_GetItem(values, g);
if (!value) if (!value)
goto failed; goto failed;
@@ -21742,7 +21782,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
Py_ssize_t step; Py_ssize_t step;
/* Add each part of the template to the list. */ /* Add each part of the template to the list. */
count = PyList_GET_SIZE(replacement); count = PyList_Size(replacement);
if (join_info.reversed) { if (join_info.reversed) {
/* We're searching backwards, so we'll be reversing the list /* We're searching backwards, so we'll be reversing the list
* when it's complete. Therefore, we need to add the items of * when it's complete. Therefore, we need to add the items of
@@ -21761,8 +21801,8 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
PyObject* item; PyObject* item;
PyObject* str_item; PyObject* str_item;
/* PyList_GET_ITEM borrows a reference. */ /* PyList_GetItem borrows a reference. */
item = PyList_GET_ITEM(replacement, index); item = PyList_GetItem(replacement, index);
str_item = get_sub_replacement(item, string, &state, str_item = get_sub_replacement(item, string, &state,
self->public_group_count); self->public_group_count);
if (!str_item) if (!str_item)
@@ -25719,7 +25759,7 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) {
unpacked = FALSE; unpacked = FALSE;
/* Read the regex code. */ /* Read the regex code. */
code_len = PyList_GET_SIZE(code_list); code_len = PyList_Size(code_list);
code = (RE_CODE*)re_alloc((size_t)code_len * sizeof(RE_CODE)); code = (RE_CODE*)re_alloc((size_t)code_len * sizeof(RE_CODE));
if (!code) { if (!code) {
if (unpacked) { if (unpacked) {
@@ -25733,8 +25773,8 @@ static PyObject* re_compile(PyObject* self_, PyObject* args) {
PyObject* o; PyObject* o;
size_t value; size_t value;
/* PyList_GET_ITEM borrows a reference. */ /* PyList_GetItem borrows a reference. */
o = PyList_GET_ITEM(code_list, i); o = PyList_GetItem(code_list, i);
value = PyLong_AsUnsignedLong(o); value = PyLong_AsUnsignedLong(o);
if ((Py_ssize_t)value == -1 && PyErr_Occurred()) if ((Py_ssize_t)value == -1 && PyErr_Occurred())

View File

@@ -3000,18 +3000,19 @@ class Group(RegexBase):
def _compile(self, reverse, fuzzy): def _compile(self, reverse, fuzzy):
code = [] code = []
key = self.group, reverse, fuzzy
ref = self.info.call_refs.get(key)
if ref is not None:
code += [(OP.CALL_REF, ref)]
public_group = private_group = self.group public_group = private_group = self.group
if private_group < 0: if private_group < 0:
public_group = self.info.private_groups[private_group] public_group = self.info.private_groups[private_group]
private_group = self.info.group_count - private_group private_group = self.info.group_count - private_group
code += ([(OP.GROUP, int(not reverse), private_group, public_group)] + key = self.group, reverse, fuzzy
self.subpattern.compile(reverse, fuzzy) + [(OP.END, )]) ref = self.info.call_refs.get(key)
if ref is not None:
code += [(OP.CALL_REF, ref)]
code += [(OP.GROUP, int(not reverse), private_group, public_group)]
code += self.subpattern.compile(reverse, fuzzy)
code += [(OP.END, )]
if ref is not None: if ref is not None:
code += [(OP.END, )] code += [(OP.END, )]

View File

@@ -241,7 +241,7 @@ __all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall",
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__", "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
"__doc__", "RegexFlag"] "__doc__", "RegexFlag"]
__version__ = "2.5.129" __version__ = "2.5.132"
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# Public interface. # Public interface.

View File

@@ -3289,7 +3289,7 @@ xyzabc
"female: her dog; male: his cat. asdsasda")], ['female: her dog', "female: her dog; male: his cat. asdsasda")], ['female: her dog',
'male: his cat']) 'male: his cat'])
# Hg issue 78: "Captures"doesn't work for recursive calls # Hg issue 78: "Captures" doesn't work for recursive calls
self.assertEqual(regex.search(r'(?<rec>\((?:[^()]++|(?&rec))*\))', self.assertEqual(regex.search(r'(?<rec>\((?:[^()]++|(?&rec))*\))',
'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)', 'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)',
'(((1+0)+1)+1)']) '(((1+0)+1)+1)'])
@@ -3624,6 +3624,9 @@ xyzabc
self.assertEqual(regex.search(r'(?(DEFINE)(?<quant>\d+)(?<item>\w+))(?&quant) (?&item)', self.assertEqual(regex.search(r'(?(DEFINE)(?<quant>\d+)(?<item>\w+))(?&quant) (?&item)',
'5 elephants')[0], '5 elephants') '5 elephants')[0], '5 elephants')
self.assertEqual(regex.search(r'(?&routine)(?(DEFINE)(?<routine>.))', 'a').group('routine'), None)
self.assertEqual(regex.search(r'(?&routine)(?(DEFINE)(?<routine>.))', 'a').captures('routine'), ['a'])
# Hg issue 153: Request: (*SKIP). # Hg issue 153: Request: (*SKIP).
self.assertEqual(regex.search(r'12(*FAIL)|3', '123')[0], '3') self.assertEqual(regex.search(r'12(*FAIL)|3', '123')[0], '3')
self.assertEqual(regex.search(r'(?r)12(*FAIL)|3', '123')[0], '3') self.assertEqual(regex.search(r'(?r)12(*FAIL)|3', '123')[0], '3')

View File

@@ -7,7 +7,7 @@ with open('README.rst') as file:
setup( setup(
name='regex', name='regex',
version='2023.6.3', version='2023.8.8',
description='Alternative regular expression module, to replace re.', description='Alternative regular expression module, to replace re.',
long_description=long_description, long_description=long_description,
long_description_content_type='text/x-rst', long_description_content_type='text/x-rst',
@@ -27,6 +27,7 @@ setup(
'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Topic :: Scientific/Engineering :: Information Analysis', 'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Text Processing', 'Topic :: Text Processing',