diff options
author | Guido van Rossum <guido@python.org> | 2003-04-14 17:59:34 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2003-04-14 17:59:34 (GMT) |
commit | 41c99e7f96f7a0f192839801c568d8a80dcc7091 (patch) | |
tree | 5733d278d932546d0b642dce7e631512da483f76 /Lib | |
parent | 44c62ef5ee2d48cfa0bd024507f19e47d987e6b3 (diff) | |
download | cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.zip cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.gz cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.bz2 |
SF patch #720991 by Gary Herron:
A small fix for bug #545855 and Greg Chapman's
addition of op code SRE_OP_MIN_REPEAT_ONE for
eliminating recursion on simple uses of pattern '*?' on a
long string.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre_compile.py | 7 | ||||
-rw-r--r-- | Lib/sre_constants.py | 4 | ||||
-rw-r--r-- | Lib/sre_parse.py | 4 | ||||
-rw-r--r-- | Lib/test/test_sre.py | 13 |
4 files changed, 24 insertions, 4 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index bb17649..3e54819 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -55,8 +55,11 @@ def _compile(code, pattern, flags): _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip - elif _simple(av) and op == MAX_REPEAT: - emit(OPCODES[REPEAT_ONE]) + elif _simple(av) and op != REPEAT: + if op == MAX_REPEAT: + emit(OPCODES[REPEAT_ONE]) + else: + emit(OPCODES[MIN_REPEAT_ONE]) skip = len(code); emit(0) emit(av[0]) emit(av[1]) diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index b0b61d9..2cd85a3 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -60,6 +60,7 @@ RANGE = "range" REPEAT = "repeat" REPEAT_ONE = "repeat_one" SUBPATTERN = "subpattern" +MIN_REPEAT_ONE = "min_repeat_one" # positions AT_BEGINNING = "at_beginning" @@ -120,7 +121,8 @@ OPCODES = [ RANGE, REPEAT, REPEAT_ONE, - SUBPATTERN + SUBPATTERN, + MIN_REPEAT_ONE ] diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 1b52967..fdf6767 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -419,7 +419,7 @@ def _parse(source, state): set.append(code1) set.append((LITERAL, ord("-"))) break - else: + elif this: if this[0] == "\\": code2 = _class_escape(source, this) else: @@ -431,6 +431,8 @@ def _parse(source, state): if hi < lo: raise error, "bad character range" set.append((RANGE, (lo, hi))) + else: + raise error, "unexpected end of regular expression" else: if code1[0] is IN: code1 = code1[1][0] diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 6a00aff..e4eb08d 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -83,6 +83,19 @@ test(r"""sre.match(r'(a)?a','a').lastindex""", None) test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1) test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a') +# bug 545855 -- This pattern failed to cause a compile error as it +# should, instead provoking a TypeError. +test(r"""sre.compile('foo[a-')""", None, sre.error) + +# bugs 418626 at al. -- Testing Greg Chapman's addition of op code +# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of +# pattern '*?' on a long string. +test(r"""sre.match('.*?c', 10000*'ab'+'cd').end(0)""", 20001) +test(r"""sre.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0)""", 20003) +test(r"""sre.match('.*?cd', 20000*'abc'+'de').end(0)""", 60001) +# non-simple '*?' still recurses and hits the recursion limit +test(r"""sre.search('(a|b)*?c', 10000*'ab'+'cd').end(0)""", None, RuntimeError) + if verbose: print 'Running tests on sre.sub' |