diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-11-20 21:30:42 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-11-20 21:30:42 (GMT) |
commit | 39bdad813a2f07bd8c373e2c88b76467ef12832c (patch) | |
tree | f7908fef135347773f5eed6ca29a49918e466ff8 | |
parent | 4a1fdcf07d4ee37ec003cd29be6b17e91d5db012 (diff) | |
download | cpython-39bdad813a2f07bd8c373e2c88b76467ef12832c.zip cpython-39bdad813a2f07bd8c373e2c88b76467ef12832c.tar.gz cpython-39bdad813a2f07bd8c373e2c88b76467ef12832c.tar.bz2 |
Issue #1160: Fix compiling large regular expressions on UCS2 builds.
Patch by Serhiy Storchaka.
-rw-r--r-- | Lib/test/test_re.py | 6 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/_sre.c | 11 | ||||
-rw-r--r-- | Modules/sre.h | 6 |
4 files changed, 17 insertions, 9 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 5162d4e..39972d5 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -419,6 +419,12 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match("([\u2222\u2223])", "\u2222", re.UNICODE).group(1), "\u2222") + def test_big_codesize(self): + # Issue #1160 + r = re.compile('|'.join(('%d'%x for x in range(10000)))) + self.assertIsNotNone(r.match('1000')) + self.assertIsNotNone(r.match('9999')) + def test_anyall(self): self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), "a\nb") @@ -167,6 +167,9 @@ Core and Builtins Library ------- +- Issue #1160: Fix compiling large regular expressions on UCS2 builds. + Patch by Serhiy Storchaka. + - Issue #14313: zipfile now raises NotImplementedError when the compression type is unknown. diff --git a/Modules/_sre.c b/Modules/_sre.c index 472b5a3..9600a08 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2695,6 +2695,13 @@ _compile(PyObject* self_, PyObject* args) for (i = 0; i < n; i++) { PyObject *o = PyList_GET_ITEM(code, i); unsigned long value = PyLong_AsUnsignedLong(o); + if (value == (unsigned long)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + } + break; + } self->code[i] = (SRE_CODE) value; if ((unsigned long) self->code[i] != value) { PyErr_SetString(PyExc_OverflowError, @@ -3065,10 +3072,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-4, groups)) FAIL; code += skip-4; @@ -3086,10 +3091,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-3, groups)) FAIL; code += skip-3; diff --git a/Modules/sre.h b/Modules/sre.h index aec9b54..d389b46 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -14,12 +14,8 @@ #include "sre_constants.h" /* size of a code word (must be unsigned short or larger, and - large enough to hold a Py_UNICODE character) */ -#ifdef Py_UNICODE_WIDE + large enough to hold a UCS4 character) */ #define SRE_CODE Py_UCS4 -#else -#define SRE_CODE unsigned short -#endif typedef struct { PyObject_VAR_HEAD |