summaryrefslogtreecommitdiffstats
path: root/Modules/_sre
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2022-11-03 07:23:46 (GMT)
committerGitHub <noreply@github.com>2022-11-03 07:23:46 (GMT)
commite9ac890c0273aee413aa528cc202c3efa29f1d7a (patch)
treee9dc563c58e25de5dd3f5b592fa5575ea486349d /Modules/_sre
parent41bc101dd6726edc5fbd4a934ea057ec712d5498 (diff)
downloadcpython-e9ac890c0273aee413aa528cc202c3efa29f1d7a.zip
cpython-e9ac890c0273aee413aa528cc202c3efa29f1d7a.tar.gz
cpython-e9ac890c0273aee413aa528cc202c3efa29f1d7a.tar.bz2
gh-98740: Fix validation of conditional expressions in RE (GH-98764)
In very rare circumstances the JUMP opcode could be confused with the argument of the opcode in the "then" part which doesn't end with the JUMP opcode. This led to incorrect detection of the final JUMP opcode and incorrect calculation of the size of the subexpression. NOTE: Changed return value of functions _validate_inner() and _validate_charset() in Modules/_sre/sre.c. Now they return 0 on success, -1 on failure, and 1 if the last op is JUMP (which usually is a failure). Previously they returned 1 on success and 0 on failure.
Diffstat (limited to 'Modules/_sre')
-rw-r--r--Modules/_sre/sre.c56
1 files changed, 29 insertions, 27 deletions
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index aae0265..44039ce 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -1623,7 +1623,7 @@ bad_template:
#endif
/* Report failure */
-#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
+#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
/* Extract opcode, argument, or skip count from code array */
#define GET_OP \
@@ -1647,7 +1647,7 @@ bad_template:
skip = *code; \
VTRACE(("%lu (skip to %p)\n", \
(unsigned long)skip, code+skip)); \
- if (skip-adj > (uintptr_t)(end - code)) \
+ if (skip-adj > (uintptr_t)(end - code)) \
FAIL; \
code++; \
} while (0)
@@ -1736,9 +1736,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
}
}
- return 1;
+ return 0;
}
+/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
static int
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
@@ -1816,7 +1817,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_IN_LOC_IGNORE:
GET_SKIP;
/* Stop 1 before the end; we check the FAILURE below */
- if (!_validate_charset(code, code+skip-2))
+ if (_validate_charset(code, code+skip-2))
FAIL;
if (code[skip-2] != SRE_OP_FAILURE)
FAIL;
@@ -1870,7 +1871,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
}
/* Validate the charset */
if (flags & SRE_INFO_CHARSET) {
- if (!_validate_charset(code, newcode-1))
+ if (_validate_charset(code, newcode-1))
FAIL;
if (newcode[-1] != SRE_OP_FAILURE)
FAIL;
@@ -1891,7 +1892,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (skip == 0)
break;
/* Stop 2 before the end; we check the JUMP below */
- if (!_validate_inner(code, code+skip-3, groups))
+ if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
/* Check that it ends with a JUMP, and that each JUMP
@@ -1905,6 +1906,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
else if (code+skip-1 != target)
FAIL;
}
+ if (code != target)
+ FAIL;
}
break;
@@ -1920,7 +1923,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
- if (!_validate_inner(code, code+skip-4, groups))
+ if (_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
GET_OP;
@@ -1940,7 +1943,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
- if (!_validate_inner(code, code+skip-3, groups))
+ if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
GET_OP;
@@ -1958,7 +1961,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_ATOMIC_GROUP:
{
GET_SKIP;
- if (!_validate_inner(code, code+skip-2, groups))
+ if (_validate_inner(code, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
@@ -2010,24 +2013,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
to allow arbitrary jumps anywhere in the code; so we just look
for a JUMP opcode preceding our skip target.
*/
- if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
- code[skip-3] == SRE_OP_JUMP)
- {
- VTRACE(("both then and else parts present\n"));
- if (!_validate_inner(code+1, code+skip-3, groups))
- FAIL;
+ VTRACE(("then part:\n"));
+ int rc = _validate_inner(code+1, code+skip-1, groups);
+ if (rc == 1) {
+ VTRACE(("else part:\n"));
code += skip-2; /* Position after JUMP, at <skipno> */
GET_SKIP;
- if (!_validate_inner(code, code+skip-1, groups))
- FAIL;
- code += skip-1;
- }
- else {
- VTRACE(("only a then part present\n"));
- if (!_validate_inner(code+1, code+skip-1, groups))
- FAIL;
- code += skip-1;
+ rc = _validate_inner(code, code+skip-1, groups);
}
+ if (rc)
+ FAIL;
+ code += skip-1;
break;
case SRE_OP_ASSERT:
@@ -2038,7 +2034,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (arg & 0x80000000)
FAIL; /* Width too large */
/* Stop 1 before the end; we check the SUCCESS below */
- if (!_validate_inner(code+1, code+skip-2, groups))
+ if (_validate_inner(code+1, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
@@ -2046,6 +2042,12 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
break;
+ case SRE_OP_JUMP:
+ if (code + 1 != end)
+ FAIL;
+ VTRACE(("JUMP: %d\n", __LINE__));
+ return 1;
+
default:
FAIL;
@@ -2053,7 +2055,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
}
VTRACE(("okay\n"));
- return 1;
+ return 0;
}
static int
@@ -2068,7 +2070,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
static int
_validate(PatternObject *self)
{
- if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
+ if (_validate_outer(self->code, self->code+self->codesize, self->groups))
{
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
return 0;