summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-02-16 14:47:15 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-02-16 14:47:15 (GMT)
commite18e05cce92182e7f852e2d1569904190b8a9a40 (patch)
tree6a9b85ad22229c378de5f57e579c8b2296d5c5b0
parent94bf697b01f56b99bfd3edaf72b7f4893d80c122 (diff)
downloadcpython-e18e05cce92182e7f852e2d1569904190b8a9a40.zip
cpython-e18e05cce92182e7f852e2d1569904190b8a9a40.tar.gz
cpython-e18e05cce92182e7f852e2d1569904190b8a9a40.tar.bz2
Issue #13169: The maximal repetition number in a regular expression has been
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on 64-bit).
-rw-r--r--Lib/sre_compile.py1
-rw-r--r--Lib/sre_constants.py4
-rw-r--r--Lib/sre_parse.py9
-rw-r--r--Lib/test/test_re.py33
-rw-r--r--Misc/NEWS4
-rw-r--r--Modules/_sre.c18
-rw-r--r--Modules/sre.h14
7 files changed, 68 insertions, 15 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 2a0c745..7cda2b6 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -13,6 +13,7 @@
import _sre, sys
import sre_parse
from sre_constants import *
+from _sre import MAXREPEAT
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index 1863f48..4be57f5 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -15,10 +15,6 @@
MAGIC = 20031017
-# max code word in this release
-
-MAXREPEAT = 65535
-
# SRE standard exception (access as sre.error)
# should this really be here?
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 8b98b1a..a0cf344 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -15,6 +15,7 @@
import sys
from sre_constants import *
+from _sre import MAXREPEAT
SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{"
@@ -498,10 +499,14 @@ def _parse(source, state):
continue
if lo:
min = int(lo)
+ if min >= MAXREPEAT:
+ raise OverflowError("the repetition number is too large")
if hi:
max = int(hi)
- if max < min:
- raise error, "bad repeat interval"
+ if max >= MAXREPEAT:
+ raise OverflowError("the repetition number is too large")
+ if max < min:
+ raise error("bad repeat interval")
else:
raise error, "not supported"
# figure out which item to repeat
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index dee5efe..a9d6f2c 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,5 +1,5 @@
from test.test_support import verbose, run_unittest, import_module
-from test.test_support import precisionbigmemtest, _2G
+from test.test_support import precisionbigmemtest, _2G, cpython_only
import re
from re import Scanner
import sys
@@ -847,6 +847,37 @@ class ReTests(unittest.TestCase):
self.assertEqual(n, size + 1)
+ def test_repeat_minmax_overflow(self):
+ # Issue #13169
+ string = "x" * 100000
+ self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+ self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+ self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+ self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+ self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+ self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+ # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+ self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
+ self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
+ self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
+ self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
+
+ @cpython_only
+ def test_repeat_minmax_overflow_maxrepeat(self):
+ try:
+ from _sre import MAXREPEAT
+ except ImportError:
+ self.skipTest('requires _sre.MAXREPEAT constant')
+ string = "x" * 100000
+ self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+ self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+ (0, 100000))
+ self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+ self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
+ self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
+ self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
+
+
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
diff --git a/Misc/NEWS b/Misc/NEWS
index b73f664..ee43643 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -205,6 +205,10 @@ Core and Builtins
Library
-------
+- Issue #13169: The maximal repetition number in a regular expression has been
+ increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
+ 64-bit).
+
- Issue #16743: Fix mmap overflow check on 32 bit Windows.
- Issue #11311: StringIO.readline(0) now returns an empty string as all other
diff --git a/Modules/_sre.c b/Modules/_sre.c
index b115e2b..73e5aac 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -524,7 +524,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Py_ssize_t i;
/* adjust end */
- if (maxcount < end - ptr && maxcount != 65535)
+ if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
end = ptr + maxcount;
switch (pattern[0]) {
@@ -1139,7 +1139,7 @@ entrance:
} else {
/* general case */
LASTMARK_SAVE();
- while ((Py_ssize_t)ctx->pattern[2] == 65535
+ while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1225,7 +1225,7 @@ entrance:
}
if ((ctx->count < ctx->u.rep->pattern[2] ||
- ctx->u.rep->pattern[2] == 65535) &&
+ ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
state->ptr != ctx->u.rep->last_ptr) {
/* we may have enough matches, but if we can
match another item, do so */
@@ -1303,7 +1303,7 @@ entrance:
LASTMARK_RESTORE();
if (ctx->count >= ctx->u.rep->pattern[2]
- && ctx->u.rep->pattern[2] != 65535)
+ && ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
RETURN_FAILURE;
ctx->u.rep->count = ctx->count;
@@ -3042,7 +3042,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; max = arg;
if (min > max)
FAIL;
- if (max > 65535)
+ if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-4, groups))
FAIL;
@@ -3061,7 +3061,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; max = arg;
if (min > max)
FAIL;
- if (max > 65535)
+ if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-3, groups))
FAIL;
@@ -3938,6 +3938,12 @@ PyMODINIT_FUNC init_sre(void)
Py_DECREF(x);
}
+ x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
+ if (x) {
+ PyDict_SetItemString(d, "MAXREPEAT", x);
+ Py_DECREF(x);
+ }
+
x = PyString_FromString(copyright);
if (x) {
PyDict_SetItemString(d, "copyright", x);
diff --git a/Modules/sre.h b/Modules/sre.h
index cbc98f1..200e492 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -16,9 +16,19 @@
/* size of a code word (must be unsigned short or larger, and
large enough to hold a UCS4 character) */
#ifdef Py_USING_UNICODE
-#define SRE_CODE Py_UCS4
+# define SRE_CODE Py_UCS4
+# if SIZEOF_SIZE_T > 4
+# define SRE_MAXREPEAT (~(SRE_CODE)0)
+# else
+# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+# endif
#else
-#define SRE_CODE unsigned long
+# define SRE_CODE unsigned long
+# if SIZEOF_SIZE_T > SIZEOF_LONG
+# define SRE_MAXREPEAT (~(SRE_CODE)0)
+# else
+# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+# endif
#endif
typedef struct {