summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGustavo Niemeyer <gustavo@niemeyer.net>2003-10-17 22:13:16 (GMT)
committerGustavo Niemeyer <gustavo@niemeyer.net>2003-10-17 22:13:16 (GMT)
commitad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb (patch)
tree929ea71dea18a5ee0c5c862bbb39d37b693209ad /Lib
parent41e2809febd6e09a34adf21beb6d2ae2360fdc46 (diff)
downloadcpython-ad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb.zip
cpython-ad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb.tar.gz
cpython-ad3fc44ccb40f2ad33c0d09f5a2dfbd4feb442eb.tar.bz2
Implemented non-recursive SRE matching.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sre_compile.py13
-rw-r--r--Lib/sre_constants.py5
-rw-r--r--Lib/sre_parse.py40
-rw-r--r--Lib/test/test_re.py14
4 files changed, 62 insertions, 10 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 8a26a0f..ee0882d 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -145,6 +145,19 @@ def _compile(code, pattern, flags):
else:
emit(OPCODES[op])
emit(av-1)
+ elif op is GROUPREF_EXISTS:
+ emit(OPCODES[op])
+ emit((av[0]-1)*2)
+ skipyes = len(code); emit(0)
+ _compile(code, av[1], flags)
+ if av[2]:
+ emit(OPCODES[JUMP])
+ skipno = len(code); emit(0)
+ code[skipyes] = len(code) - skipyes + 1
+ _compile(code, av[2], flags)
+ code[skipno] = len(code) - skipno
+ else:
+ code[skipyes] = len(code) - skipyes + 1
else:
raise ValueError, ("unsupported operand type", op)
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index fa009c1..002b195 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -13,7 +13,7 @@
# update when constants are added or removed
-MAGIC = 20030419
+MAGIC = 20031017
# max code word in this release
@@ -42,6 +42,7 @@ CATEGORY = "category"
CHARSET = "charset"
GROUPREF = "groupref"
GROUPREF_IGNORE = "groupref_ignore"
+GROUPREF_EXISTS = "groupref_exists"
IN = "in"
IN_IGNORE = "in_ignore"
INFO = "info"
@@ -108,7 +109,7 @@ OPCODES = [
CALL,
CATEGORY,
CHARSET, BIGCHARSET,
- GROUPREF, GROUPREF_IGNORE,
+ GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
IN, IN_IGNORE,
INFO,
JUMP,
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index b85aea7..fe5d143 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -364,6 +364,20 @@ def _parse_sub(source, state, nested=1):
subpattern.append((BRANCH, (None, items)))
return subpattern
+def _parse_sub_cond(source, state, condgroup):
+ item_yes = _parse(source, state)
+ if source.match("|"):
+ item_no = _parse(source, state)
+ if source.match("|"):
+ raise error, "conditional backref with more than two branches"
+ else:
+ item_no = None
+ if source.next and not source.match(")", 0):
+ raise error, "pattern not properly closed"
+ subpattern = SubPattern(state)
+ subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
+ return subpattern
+
def _parse(source, state):
# parse a simple pattern
@@ -499,6 +513,7 @@ def _parse(source, state):
elif this == "(":
group = 1
name = None
+ condgroup = None
if source.match("?"):
group = 0
# options
@@ -568,6 +583,26 @@ def _parse(source, state):
else:
subpattern.append((ASSERT_NOT, (dir, p)))
continue
+ elif source.match("("):
+ # conditional backreference group
+ condname = ""
+ while 1:
+ char = source.get()
+ if char is None:
+ raise error, "unterminated name"
+ if char == ")":
+ break
+ condname = condname + char
+ group = 2
+ if isname(condname):
+ condgroup = state.groupdict.get(condname)
+ if condgroup is None:
+ raise error, "unknown group name"
+ else:
+ try:
+ condgroup = atoi(condname)
+ except ValueError:
+ raise error, "bad character in group name"
else:
# flags
if not source.next in FLAGS:
@@ -581,7 +616,10 @@ def _parse(source, state):
group = None
else:
group = state.opengroup(name)
- p = _parse_sub(source, state)
+ if condgroup:
+ p = _parse_sub_cond(source, state, condgroup)
+ else:
+ p = _parse_sub(source, state)
if not source.match(")"):
raise error, "unbalanced parenthesis"
if group is not None:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index f724806..d2e2753 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -169,7 +169,6 @@ class ReTests(unittest.TestCase):
self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
def test_re_groupref_exists(self):
- return # not yet
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
('(', 'a'))
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
@@ -405,19 +404,20 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
20003)
self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
- # non-simple '*?' still recurses and hits the recursion limit
- self.assertRaises(RuntimeError, re.search, '(a|b)*?c', 10000*'ab'+'cd')
+ # non-simple '*?' still used to hit the recursion limit, before the
+ # non-recursive scheme was implemented.
+ self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
def test_bug_612074(self):
pat=u"["+re.escape(u"\u2039")+u"]"
self.assertEqual(re.compile(pat) and 1, 1)
def test_stack_overflow(self):
- # nasty case that overflows the straightforward recursive
+ # nasty cases that used to overflow the straightforward recursive
# implementation of repeated groups.
- self.assertRaises(RuntimeError, re.match, '(x)*', 50000*'x')
- self.assertRaises(RuntimeError, re.match, '(x)*y', 50000*'x'+'y')
- self.assertRaises(RuntimeError, re.match, '(x)*?y', 50000*'x'+'y')
+ self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
+ self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
+ self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
def test_scanner(self):
def s_ident(scanner, token): return token