summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2003-04-14 17:59:34 (GMT)
committerGuido van Rossum <guido@python.org>2003-04-14 17:59:34 (GMT)
commit41c99e7f96f7a0f192839801c568d8a80dcc7091 (patch)
tree5733d278d932546d0b642dce7e631512da483f76 /Lib
parent44c62ef5ee2d48cfa0bd024507f19e47d987e6b3 (diff)
downloadcpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.zip
cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.gz
cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.bz2
SF patch #720991 by Gary Herron:
A small fix for bug #545855 and Greg Chapman's addition of op code SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of pattern '*?' on a long string.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sre_compile.py7
-rw-r--r--Lib/sre_constants.py4
-rw-r--r--Lib/sre_parse.py4
-rw-r--r--Lib/test/test_sre.py13
4 files changed, 24 insertions, 4 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index bb17649..3e54819 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -55,8 +55,11 @@ def _compile(code, pattern, flags):
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip
- elif _simple(av) and op == MAX_REPEAT:
- emit(OPCODES[REPEAT_ONE])
+ elif _simple(av) and op != REPEAT:
+ if op == MAX_REPEAT:
+ emit(OPCODES[REPEAT_ONE])
+ else:
+ emit(OPCODES[MIN_REPEAT_ONE])
skip = len(code); emit(0)
emit(av[0])
emit(av[1])
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index b0b61d9..2cd85a3 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -60,6 +60,7 @@ RANGE = "range"
REPEAT = "repeat"
REPEAT_ONE = "repeat_one"
SUBPATTERN = "subpattern"
+MIN_REPEAT_ONE = "min_repeat_one"
# positions
AT_BEGINNING = "at_beginning"
@@ -120,7 +121,8 @@ OPCODES = [
RANGE,
REPEAT,
REPEAT_ONE,
- SUBPATTERN
+ SUBPATTERN,
+ MIN_REPEAT_ONE
]
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 1b52967..fdf6767 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -419,7 +419,7 @@ def _parse(source, state):
set.append(code1)
set.append((LITERAL, ord("-")))
break
- else:
+ elif this:
if this[0] == "\\":
code2 = _class_escape(source, this)
else:
@@ -431,6 +431,8 @@ def _parse(source, state):
if hi < lo:
raise error, "bad character range"
set.append((RANGE, (lo, hi)))
+ else:
+ raise error, "unexpected end of regular expression"
else:
if code1[0] is IN:
code1 = code1[1][0]
diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py
index 6a00aff..e4eb08d 100644
--- a/Lib/test/test_sre.py
+++ b/Lib/test/test_sre.py
@@ -83,6 +83,19 @@ test(r"""sre.match(r'(a)?a','a').lastindex""", None)
test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
+# bug 545855 -- This pattern failed to cause a compile error as it
+# should, instead provoking a TypeError.
+test(r"""sre.compile('foo[a-')""", None, sre.error)
+
+# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
+# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
+# pattern '*?' on a long string.
+test(r"""sre.match('.*?c', 10000*'ab'+'cd').end(0)""", 20001)
+test(r"""sre.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0)""", 20003)
+test(r"""sre.match('.*?cd', 20000*'abc'+'de').end(0)""", 60001)
+# non-simple '*?' still recurses and hits the recursion limit
+test(r"""sre.search('(a|b)*?c', 10000*'ab'+'cd').end(0)""", None, RuntimeError)
+
if verbose:
print 'Running tests on sre.sub'