summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/re.rst3
-rw-r--r--Lib/test/test_re.py5
-rw-r--r--Misc/NEWS.d/next/Library/2022-10-27-12-56-38.gh-issue-98740.ZoqqGM.rst3
-rw-r--r--Modules/_sre/sre.c56
4 files changed, 40 insertions, 27 deletions
diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 5b304f7..0e7dda0 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -483,6 +483,9 @@ The special characters are:
some fixed length. Patterns which start with negative lookbehind assertions may
match at the beginning of the string being searched.
+.. _re-conditional-expression:
+.. index:: single: (?(; in regular expressions
+
``(?(id/name)yes-pattern|no-pattern)``
Will try to match with ``yes-pattern`` if the group with given *id* or
*name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 3f0f84e..11628a2 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -630,6 +630,11 @@ class ReTests(unittest.TestCase):
self.checkPatternError(r'()(?(2)a)',
"invalid group reference 2", 5)
+ def test_re_groupref_exists_validation_bug(self):
+ for i in range(256):
+ with self.subTest(code=i):
+ re.compile(r'()(?(1)\x%02x?)' % i)
+
def test_re_groupref_overflow(self):
from re._constants import MAXGROUPS
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
diff --git a/Misc/NEWS.d/next/Library/2022-10-27-12-56-38.gh-issue-98740.ZoqqGM.rst b/Misc/NEWS.d/next/Library/2022-10-27-12-56-38.gh-issue-98740.ZoqqGM.rst
new file mode 100644
index 0000000..887d506
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-10-27-12-56-38.gh-issue-98740.ZoqqGM.rst
@@ -0,0 +1,3 @@
+Fix internal error in the :mod:`re` module which in very rare circumstances
+prevented compilation of a regular expression containing a :ref:`conditional
+expression <re-conditional-expression>` without the "else" branch.
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index aae0265..44039ce 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -1623,7 +1623,7 @@ bad_template:
#endif
/* Report failure */
-#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
+#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
/* Extract opcode, argument, or skip count from code array */
#define GET_OP \
@@ -1647,7 +1647,7 @@ bad_template:
skip = *code; \
VTRACE(("%lu (skip to %p)\n", \
(unsigned long)skip, code+skip)); \
- if (skip-adj > (uintptr_t)(end - code)) \
+ if (skip-adj > (uintptr_t)(end - code)) \
FAIL; \
code++; \
} while (0)
@@ -1736,9 +1736,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
}
}
- return 1;
+ return 0;
}
+/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
static int
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
@@ -1816,7 +1817,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_IN_LOC_IGNORE:
GET_SKIP;
/* Stop 1 before the end; we check the FAILURE below */
- if (!_validate_charset(code, code+skip-2))
+ if (_validate_charset(code, code+skip-2))
FAIL;
if (code[skip-2] != SRE_OP_FAILURE)
FAIL;
@@ -1870,7 +1871,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
}
/* Validate the charset */
if (flags & SRE_INFO_CHARSET) {
- if (!_validate_charset(code, newcode-1))
+ if (_validate_charset(code, newcode-1))
FAIL;
if (newcode[-1] != SRE_OP_FAILURE)
FAIL;
@@ -1891,7 +1892,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (skip == 0)
break;
/* Stop 2 before the end; we check the JUMP below */
- if (!_validate_inner(code, code+skip-3, groups))
+ if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
/* Check that it ends with a JUMP, and that each JUMP
@@ -1905,6 +1906,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
else if (code+skip-1 != target)
FAIL;
}
+ if (code != target)
+ FAIL;
}
break;
@@ -1920,7 +1923,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
- if (!_validate_inner(code, code+skip-4, groups))
+ if (_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
GET_OP;
@@ -1940,7 +1943,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
- if (!_validate_inner(code, code+skip-3, groups))
+ if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
GET_OP;
@@ -1958,7 +1961,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_ATOMIC_GROUP:
{
GET_SKIP;
- if (!_validate_inner(code, code+skip-2, groups))
+ if (_validate_inner(code, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
@@ -2010,24 +2013,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
to allow arbitrary jumps anywhere in the code; so we just look
for a JUMP opcode preceding our skip target.
*/
- if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
- code[skip-3] == SRE_OP_JUMP)
- {
- VTRACE(("both then and else parts present\n"));
- if (!_validate_inner(code+1, code+skip-3, groups))
- FAIL;
+ VTRACE(("then part:\n"));
+ int rc = _validate_inner(code+1, code+skip-1, groups);
+ if (rc == 1) {
+ VTRACE(("else part:\n"));
code += skip-2; /* Position after JUMP, at <skipno> */
GET_SKIP;
- if (!_validate_inner(code, code+skip-1, groups))
- FAIL;
- code += skip-1;
- }
- else {
- VTRACE(("only a then part present\n"));
- if (!_validate_inner(code+1, code+skip-1, groups))
- FAIL;
- code += skip-1;
+ rc = _validate_inner(code, code+skip-1, groups);
}
+ if (rc)
+ FAIL;
+ code += skip-1;
break;
case SRE_OP_ASSERT:
@@ -2038,7 +2034,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (arg & 0x80000000)
FAIL; /* Width too large */
/* Stop 1 before the end; we check the SUCCESS below */
- if (!_validate_inner(code+1, code+skip-2, groups))
+ if (_validate_inner(code+1, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
@@ -2046,6 +2042,12 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
break;
+ case SRE_OP_JUMP:
+ if (code + 1 != end)
+ FAIL;
+ VTRACE(("JUMP: %d\n", __LINE__));
+ return 1;
+
default:
FAIL;
@@ -2053,7 +2055,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
}
VTRACE(("okay\n"));
- return 1;
+ return 0;
}
static int
@@ -2068,7 +2070,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
static int
_validate(PatternObject *self)
{
- if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
+ if (_validate_outer(self->code, self->code+self->codesize, self->groups))
{
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
return 0;