summaryrefslogtreecommitdiffstats
path: root/Modules/_sre.c
diff options
context:
space:
mode:
authorMa Lin <animalize@users.noreply.github.com>2022-03-29 14:31:01 (GMT)
committerGitHub <noreply@github.com>2022-03-29 14:31:01 (GMT)
commit356997cccc21a3391175d20e9ef03d434675b496 (patch)
tree16392c0b0212d7680d04f0ccb85fa6e13d812a9a /Modules/_sre.c
parent788154919c2d843a0a995994bf2aed2d074761ec (diff)
downloadcpython-356997cccc21a3391175d20e9ef03d434675b496.zip
cpython-356997cccc21a3391175d20e9ef03d434675b496.tar.gz
cpython-356997cccc21a3391175d20e9ef03d434675b496.tar.bz2
bpo-35859: Fix a few long-standing bugs in re engine (GH-12427)
In rare cases, capturing group could get wrong result. Regular expression engines in Perl and Java have similar bugs. The new behavior now matches the behavior of more modern RE engines: in the regex module and in PHP, Ruby and Node.js.
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 35bdb4f..48193f8 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -532,6 +532,14 @@ state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
} else {
i = STATE_OFFSET(state, state->mark[index]);
j = STATE_OFFSET(state, state->mark[index+1]);
+
+ /* check wrong span */
+ if (i > j) {
+ PyErr_SetString(PyExc_SystemError,
+ "The span of capturing group is wrong,"
+ " please report a bug for the re module.");
+ return NULL;
+ }
}
return getslice(state->isbytes, state->beginning, string, i, j);
@@ -2477,6 +2485,15 @@ pattern_new_match(_sremodulestate* module_state,
if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
match->mark[j+2] = ((char*) state->mark[j] - base) / n;
match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
+
+ /* check wrong span */
+ if (match->mark[j+2] > match->mark[j+3]) {
+ PyErr_SetString(PyExc_SystemError,
+ "The span of capturing group is wrong,"
+ " please report a bug for the re module.");
+ Py_DECREF(match);
+ return NULL;
+ }
} else
match->mark[j+2] = match->mark[j+3] = -1; /* undefined */