summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-06-30 10:41:31 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-06-30 10:41:31 (GMT)
commit43b3b49b5ab486295baef3a35cd8e836f735c065 (patch)
tree3ca960c788dafde87799443f2087dc661dc52d48 /Lib
parenta4657f736c34e4bb1802b40246028c5bec545fe6 (diff)
downloadcpython-43b3b49b5ab486295baef3a35cd8e836f735c065.zip
cpython-43b3b49b5ab486295baef3a35cd8e836f735c065.tar.gz
cpython-43b3b49b5ab486295baef3a35cd8e836f735c065.tar.bz2
- fixed lookahead assertions (#10, #11, #12)
- untabified sre_constants.py
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sre_compile.py100
-rw-r--r--Lib/sre_constants.py32
-rw-r--r--Lib/sre_parse.py19
-rw-r--r--Lib/test/output/test_sre3
4 files changed, 93 insertions, 61 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 9fdc8f3..0829c00 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -26,52 +26,12 @@ def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
emit = code.append
for op, av in pattern:
- if op is ANY:
- if flags & SRE_FLAG_DOTALL:
- emit(OPCODES[op])
- else:
- emit(OPCODES[CATEGORY])
- emit(CHCODES[CATEGORY_NOT_LINEBREAK])
- elif op in (SUCCESS, FAILURE):
- emit(OPCODES[op])
- elif op is AT:
- emit(OPCODES[op])
- if flags & SRE_FLAG_MULTILINE:
- emit(ATCODES[AT_MULTILINE[av]])
- else:
- emit(ATCODES[av])
- elif op is BRANCH:
- emit(OPCODES[op])
- tail = []
- for av in av[1]:
- skip = len(code); emit(0)
- _compile(code, av, flags)
- emit(OPCODES[JUMP])
- tail.append(len(code)); emit(0)
- code[skip] = len(code) - skip
- emit(0) # end of branch
- for tail in tail:
- code[tail] = len(code) - tail
- elif op is CALL:
- emit(OPCODES[op])
- skip = len(code); emit(0)
- _compile(code, av, flags)
- emit(OPCODES[SUCCESS])
- code[skip] = len(code) - skip
- elif op is CATEGORY:
- emit(OPCODES[op])
- if flags & SRE_FLAG_LOCALE:
- emit(CHCODES[CH_LOCALE[av]])
- elif flags & SRE_FLAG_UNICODE:
- emit(CHCODES[CH_UNICODE[av]])
- else:
- emit(CHCODES[av])
- elif op is GROUP:
+ if op in (LITERAL, NOT_LITERAL):
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
else:
emit(OPCODES[op])
- emit(av-1)
+ emit(ord(av))
elif op is IN:
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
@@ -101,15 +61,12 @@ def _compile(code, pattern, flags):
raise error, "internal: unsupported set operator"
emit(OPCODES[FAILURE])
code[skip] = len(code) - skip
- elif op in (LITERAL, NOT_LITERAL):
- if flags & SRE_FLAG_IGNORECASE:
- emit(OPCODES[OP_IGNORE[op]])
- else:
+ elif op is ANY:
+ if flags & SRE_FLAG_DOTALL:
emit(OPCODES[op])
- emit(ord(av))
- elif op is MARK:
- emit(OPCODES[op])
- emit(av)
+ else:
+ emit(OPCODES[CATEGORY])
+ emit(CHCODES[CATEGORY_NOT_LINEBREAK])
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
if flags & SRE_FLAG_TEMPLATE:
emit(OPCODES[REPEAT])
@@ -150,6 +107,49 @@ def _compile(code, pattern, flags):
if group:
emit(OPCODES[MARK])
emit((group-1)*2+1)
+ elif op in (SUCCESS, FAILURE):
+ emit(OPCODES[op])
+ elif op in (ASSERT, ASSERT_NOT, CALL):
+ emit(OPCODES[op])
+ skip = len(code); emit(0)
+ _compile(code, av, flags)
+ emit(OPCODES[SUCCESS])
+ code[skip] = len(code) - skip
+ elif op is AT:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_MULTILINE:
+ emit(ATCODES[AT_MULTILINE[av]])
+ else:
+ emit(ATCODES[av])
+ elif op is BRANCH:
+ emit(OPCODES[op])
+ tail = []
+ for av in av[1]:
+ skip = len(code); emit(0)
+ _compile(code, av, flags)
+ emit(OPCODES[JUMP])
+ tail.append(len(code)); emit(0)
+ code[skip] = len(code) - skip
+ emit(0) # end of branch
+ for tail in tail:
+ code[tail] = len(code) - tail
+ elif op is CATEGORY:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_LOCALE:
+ emit(CHCODES[CH_LOCALE[av]])
+ elif flags & SRE_FLAG_UNICODE:
+ emit(CHCODES[CH_UNICODE[av]])
+ else:
+ emit(CHCODES[av])
+ elif op is GROUP:
+ if flags & SRE_FLAG_IGNORECASE:
+ emit(OPCODES[OP_IGNORE[op]])
+ else:
+ emit(OPCODES[op])
+ emit(av-1)
+ elif op is MARK:
+ emit(OPCODES[op])
+ emit(av)
else:
raise ValueError, ("unsupported operand type", op)
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index f5e7894..45f4f48 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -23,6 +23,7 @@ SUCCESS = "success"
ANY = "any"
ASSERT = "assert"
+ASSERT_NOT = "assert_not"
AT = "at"
BRANCH = "branch"
CALL = "call"
@@ -81,7 +82,7 @@ OPCODES = [
FAILURE, SUCCESS,
ANY,
- ASSERT,
+ ASSERT, ASSERT_NOT,
AT,
BRANCH,
CALL,
@@ -121,8 +122,8 @@ def makedict(list):
d = {}
i = 0
for item in list:
- d[item] = i
- i = i + 1
+ d[item] = i
+ i = i + 1
return d
OPCODES = makedict(OPCODES)
@@ -176,12 +177,27 @@ SRE_FLAG_VERBOSE = 64
if __name__ == "__main__":
import string
def dump(f, d, prefix):
- items = d.items()
- items.sort(lambda a, b: cmp(a[1], b[1]))
- for k, v in items:
- f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
+ items = d.items()
+ items.sort(lambda a, b: cmp(a[1], b[1]))
+ for k, v in items:
+ f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
f = open("sre_constants.h", "w")
- f.write("/* generated from sre_constants.py */\n")
+ f.write("""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py. If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
+""")
+
dump(f, OPCODES, "SRE_OP")
dump(f, ATCODES, "SRE")
dump(f, CHCODES, "SRE")
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index a6f3082..d3dbe00 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -470,6 +470,25 @@ def _parse(source, state, flags=0):
if source.next is None or source.next == ")":
break
source.get()
+ elif source.next in ("=", "!"):
+ # lookahead assertions
+ char = source.get()
+ b = []
+ while 1:
+ p = _parse(source, state, flags)
+ if source.next == ")":
+ if b:
+ b.append(p)
+ p = _branch(state, b)
+ if char == "=":
+ subpattern.append((ASSERT, p))
+ else:
+ subpattern.append((ASSERT_NOT, p))
+ break
+ elif source.match("|"):
+ b.append(p)
+ else:
+ raise error, "pattern not properly closed"
else:
# flags
while FLAGS.has_key(source.next):
diff --git a/Lib/test/output/test_sre b/Lib/test/output/test_sre
index 75caa55..d3732b5 100644
--- a/Lib/test/output/test_sre
+++ b/Lib/test/output/test_sre
@@ -6,7 +6,4 @@ test_support -- test failed re module cPickle
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
-=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')