diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-16 14:47:15 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-16 14:47:15 (GMT) |
commit | e18e05cce92182e7f852e2d1569904190b8a9a40 (patch) | |
tree | 6a9b85ad22229c378de5f57e579c8b2296d5c5b0 | |
parent | 94bf697b01f56b99bfd3edaf72b7f4893d80c122 (diff) | |
download | cpython-e18e05cce92182e7f852e2d1569904190b8a9a40.zip cpython-e18e05cce92182e7f852e2d1569904190b8a9a40.tar.gz cpython-e18e05cce92182e7f852e2d1569904190b8a9a40.tar.bz2 |
Issue #13169: The maximal repetition number in a regular expression has been
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
64-bit).
-rw-r--r-- | Lib/sre_compile.py | 1 | ||||
-rw-r--r-- | Lib/sre_constants.py | 4 | ||||
-rw-r--r-- | Lib/sre_parse.py | 9 | ||||
-rw-r--r-- | Lib/test/test_re.py | 33 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Modules/_sre.c | 18 | ||||
-rw-r--r-- | Modules/sre.h | 14 |
7 files changed, 68 insertions, 15 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 2a0c745..7cda2b6 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -13,6 +13,7 @@ import _sre, sys import sre_parse from sre_constants import * +from _sre import MAXREPEAT assert _sre.MAGIC == MAGIC, "SRE module mismatch" diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index 1863f48..4be57f5 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -15,10 +15,6 @@ MAGIC = 20031017 -# max code word in this release - -MAXREPEAT = 65535 - # SRE standard exception (access as sre.error) # should this really be here? diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 8b98b1a..a0cf344 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -15,6 +15,7 @@ import sys from sre_constants import * +from _sre import MAXREPEAT SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" @@ -498,10 +499,14 @@ def _parse(source, state): continue if lo: min = int(lo) + if min >= MAXREPEAT: + raise OverflowError("the repetition number is too large") if hi: max = int(hi) - if max < min: - raise error, "bad repeat interval" + if max >= MAXREPEAT: + raise OverflowError("the repetition number is too large") + if max < min: + raise error("bad repeat interval") else: raise error, "not supported" # figure out which item to repeat diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index dee5efe..a9d6f2c 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,5 +1,5 @@ from test.test_support import verbose, run_unittest, import_module -from test.test_support import precisionbigmemtest, _2G +from test.test_support import precisionbigmemtest, _2G, cpython_only import re from re import Scanner import sys @@ -847,6 +847,37 @@ class ReTests(unittest.TestCase): self.assertEqual(n, size + 1) + def test_repeat_minmax_overflow(self): + # Issue #13169 + string = "x" * 100000 + self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535)) + self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535)) + self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535)) + self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536)) + self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536)) + self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536)) + # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t. + self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128) + self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128) + self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128) + self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) + + @cpython_only + def test_repeat_minmax_overflow_maxrepeat(self): + try: + from _sre import MAXREPEAT + except ImportError: + self.skipTest('requires _sre.MAXREPEAT constant') + string = "x" * 100000 + self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string)) + self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(), + (0, 100000)) + self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string)) + self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT) + self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT) + self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT) + + def run_re_tests(): from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: @@ -205,6 +205,10 @@ Core and Builtins Library ------- +- Issue #13169: The maximal repetition number in a regular expression has been + increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on + 64-bit). + - Issue #16743: Fix mmap overflow check on 32 bit Windows. - Issue #11311: StringIO.readline(0) now returns an empty string as all other diff --git a/Modules/_sre.c b/Modules/_sre.c index b115e2b..73e5aac 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -524,7 +524,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) Py_ssize_t i; /* adjust end */ - if (maxcount < end - ptr && maxcount != 65535) + if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) end = ptr + maxcount; switch (pattern[0]) { @@ -1139,7 +1139,7 @@ entrance: } else { /* general case */ LASTMARK_SAVE(); - while ((Py_ssize_t)ctx->pattern[2] == 65535 + while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { state->ptr = ctx->ptr; DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, @@ -1225,7 +1225,7 @@ entrance: } if ((ctx->count < ctx->u.rep->pattern[2] || - ctx->u.rep->pattern[2] == 65535) && + ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && state->ptr != ctx->u.rep->last_ptr) { /* we may have enough matches, but if we can match another item, do so */ @@ -1303,7 +1303,7 @@ entrance: LASTMARK_RESTORE(); if (ctx->count >= ctx->u.rep->pattern[2] - && ctx->u.rep->pattern[2] != 65535) + && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) RETURN_FAILURE; ctx->u.rep->count = ctx->count; @@ -3042,7 +3042,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; - if (max > 65535) + if (max > SRE_MAXREPEAT) FAIL; if (!_validate_inner(code, code+skip-4, groups)) FAIL; @@ -3061,7 +3061,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; - if (max > 65535) + if (max > SRE_MAXREPEAT) FAIL; if (!_validate_inner(code, code+skip-3, groups)) FAIL; @@ -3938,6 +3938,12 @@ PyMODINIT_FUNC init_sre(void) Py_DECREF(x); } + x = PyLong_FromUnsignedLong(SRE_MAXREPEAT); + if (x) { + PyDict_SetItemString(d, "MAXREPEAT", x); + Py_DECREF(x); + } + x = PyString_FromString(copyright); if (x) { PyDict_SetItemString(d, "copyright", x); diff --git a/Modules/sre.h b/Modules/sre.h index cbc98f1..200e492 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -16,9 +16,19 @@ /* size of a code word (must be unsigned short or larger, and large enough to hold a UCS4 character) */ #ifdef Py_USING_UNICODE -#define SRE_CODE Py_UCS4 +# define SRE_CODE Py_UCS4 +# if SIZEOF_SIZE_T > 4 +# define SRE_MAXREPEAT (~(SRE_CODE)0) +# else +# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u) +# endif #else -#define SRE_CODE unsigned long +# define SRE_CODE unsigned long +# if SIZEOF_SIZE_T > SIZEOF_LONG +# define SRE_MAXREPEAT (~(SRE_CODE)0) +# else +# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u) +# endif #endif typedef struct { |