diff options
author | Gustavo Niemeyer <gustavo@niemeyer.net> | 2003-10-17 22:13:16 (GMT) |
---|---|---|
committer | Gustavo Niemeyer <gustavo@niemeyer.net> | 2003-10-17 22:13:16 (GMT) |
commit | ad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb (patch) | |
tree | 929ea71dea18a5ee0c5c862bbb39d37b693209ad /Lib | |
parent | 41e2809febd6e09a34adf21beb6d2ae2360fdc46 (diff) | |
download | cpython-ad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb.zip cpython-ad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb.tar.gz cpython-ad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb.tar.bz2 |
Implemented non-recursive SRE matching.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre_compile.py | 13 | ||||
-rw-r--r-- | Lib/sre_constants.py | 5 | ||||
-rw-r--r-- | Lib/sre_parse.py | 40 | ||||
-rw-r--r-- | Lib/test/test_re.py | 14 |
4 files changed, 62 insertions, 10 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 8a26a0f..ee0882d 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -145,6 +145,19 @@ def _compile(code, pattern, flags): else: emit(OPCODES[op]) emit(av-1) + elif op is GROUPREF_EXISTS: + emit(OPCODES[op]) + emit((av[0]-1)*2) + skipyes = len(code); emit(0) + _compile(code, av[1], flags) + if av[2]: + emit(OPCODES[JUMP]) + skipno = len(code); emit(0) + code[skipyes] = len(code) - skipyes + 1 + _compile(code, av[2], flags) + code[skipno] = len(code) - skipno + else: + code[skipyes] = len(code) - skipyes + 1 else: raise ValueError, ("unsupported operand type", op) diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index fa009c1..002b195 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -13,7 +13,7 @@ # update when constants are added or removed -MAGIC = 20030419 +MAGIC = 20031017 # max code word in this release @@ -42,6 +42,7 @@ CATEGORY = "category" CHARSET = "charset" GROUPREF = "groupref" GROUPREF_IGNORE = "groupref_ignore" +GROUPREF_EXISTS = "groupref_exists" IN = "in" IN_IGNORE = "in_ignore" INFO = "info" @@ -108,7 +109,7 @@ OPCODES = [ CALL, CATEGORY, CHARSET, BIGCHARSET, - GROUPREF, GROUPREF_IGNORE, + GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, IN, IN_IGNORE, INFO, JUMP, diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index b85aea7..fe5d143 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -364,6 +364,20 @@ def _parse_sub(source, state, nested=1): subpattern.append((BRANCH, (None, items))) return subpattern +def _parse_sub_cond(source, state, condgroup): + item_yes = _parse(source, state) + if source.match("|"): + item_no = _parse(source, state) + if source.match("|"): + raise error, "conditional backref with more than two branches" + else: + item_no = None + if source.next and not source.match(")", 0): + raise error, "pattern not properly closed" + subpattern = SubPattern(state) + subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) + return subpattern + def _parse(source, state): # parse a simple pattern @@ -499,6 +513,7 @@ def _parse(source, state): elif this == "(": group = 1 name = None + condgroup = None if source.match("?"): group = 0 # options @@ -568,6 +583,26 @@ def _parse(source, state): else: subpattern.append((ASSERT_NOT, (dir, p))) continue + elif source.match("("): + # conditional backreference group + condname = "" + while 1: + char = source.get() + if char is None: + raise error, "unterminated name" + if char == ")": + break + condname = condname + char + group = 2 + if isname(condname): + condgroup = state.groupdict.get(condname) + if condgroup is None: + raise error, "unknown group name" + else: + try: + condgroup = atoi(condname) + except ValueError: + raise error, "bad character in group name" else: # flags if not source.next in FLAGS: @@ -581,7 +616,10 @@ def _parse(source, state): group = None else: group = state.opengroup(name) - p = _parse_sub(source, state) + if condgroup: + p = _parse_sub_cond(source, state, condgroup) + else: + p = _parse_sub(source, state) if not source.match(")"): raise error, "unbalanced parenthesis" if group is not None: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f724806..d2e2753 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -169,7 +169,6 @@ class ReTests(unittest.TestCase): self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) def test_re_groupref_exists(self): - return # not yet self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), ('(', 'a')) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), @@ -405,19 +404,20 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0), 20003) self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001) - # non-simple '*?' still recurses and hits the recursion limit - self.assertRaises(RuntimeError, re.search, '(a|b)*?c', 10000*'ab'+'cd') + # non-simple '*?' still used to hit the recursion limit, before the + # non-recursive scheme was implemented. + self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) def test_bug_612074(self): pat=u"["+re.escape(u"\u2039")+u"]" self.assertEqual(re.compile(pat) and 1, 1) def test_stack_overflow(self): - # nasty case that overflows the straightforward recursive + # nasty cases that used to overflow the straightforward recursive # implementation of repeated groups. - self.assertRaises(RuntimeError, re.match, '(x)*', 50000*'x') - self.assertRaises(RuntimeError, re.match, '(x)*y', 50000*'x'+'y') - self.assertRaises(RuntimeError, re.match, '(x)*?y', 50000*'x'+'y') + self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x') + self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') + self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') def test_scanner(self): def s_ident(scanner, token): return token |