summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-08-01 18:20:07 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-08-01 18:20:07 (GMT)
commit29c4ba9ada44d62988c62c85c8046985f10a1c85 (patch)
tree89f38c5859e98069d05491dcd977e338477fd2d2 /Lib
parent19c6afb42b12c3a50900b4157c8398e01acad91f (diff)
downloadcpython-29c4ba9ada44d62988c62c85c8046985f10a1c85.zip
cpython-29c4ba9ada44d62988c62c85c8046985f10a1c85.tar.gz
cpython-29c4ba9ada44d62988c62c85c8046985f10a1c85.tar.bz2
SRE 0.9.8: passes the entire test suite
-- reverted REPEAT operator to use "repeat context" strategy (from 0.8.X), but done right this time. -- got rid of backtracking stack; use nested SRE_MATCH calls instead (should probably put it back again in 0.9.9 ;-) -- properly reset state in scanner mode -- don't use aggressive inlining by default
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sre.py13
-rw-r--r--Lib/sre_compile.py64
-rw-r--r--Lib/sre_constants.py18
-rw-r--r--Lib/sre_parse.py6
4 files changed, 46 insertions, 55 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index 6dd1df9..3e125a7 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -5,8 +5,12 @@
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
+# This version of the SRE library can be redistributed under CNRI's
+# Python 1.6 license. For any other use, please contact Secret Labs
+# AB (info@pythonware.com).
+#
# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
+# CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work.
#
@@ -24,7 +28,7 @@ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
-# sre extensions (may or may not be in 2.0 final)
+# sre extensions (may or may not be in 1.6/2.0 final)
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
@@ -168,15 +172,14 @@ copy_reg.pickle(type(_compile("")), _pickle, _compile)
class Scanner:
def __init__(self, lexicon):
- from sre_constants import BRANCH, SUBPATTERN, INDEX
+ from sre_constants import BRANCH, SUBPATTERN
self.lexicon = lexicon
# combine phrases into a compound pattern
p = []
s = sre_parse.Pattern()
for phrase, action in lexicon:
p.append(sre_parse.SubPattern(s, [
- (SUBPATTERN, (None, sre_parse.parse(phrase))),
- (INDEX, len(p))
+ (SUBPATTERN, (len(p), sre_parse.parse(phrase))),
]))
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
s.groups = len(p)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index ef26e1c..2d1cbb1 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -5,9 +5,7 @@
#
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
#
-# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
-# other compatibility work.
+# See the sre.py file for information on usage and redistribution.
#
import _sre
@@ -124,6 +122,7 @@ def _compile(code, pattern, flags):
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
if flags & SRE_FLAG_TEMPLATE:
+ raise error, "internal: unsupported template operator"
emit(OPCODES[REPEAT])
skip = len(code); emit(0)
emit(av[0])
@@ -136,9 +135,8 @@ def _compile(code, pattern, flags):
if lo == 0:
raise error, "nothing to repeat"
if 0 and lo == hi == 1 and op is MAX_REPEAT:
- # FIXME: <fl> need a better way to figure out when
- # it's safe to use this one (in the parser, probably)
- emit(OPCODES[MAX_REPEAT_ONE])
+ # FIXME: <fl> fast and wrong (but we'll fix that)
+ emit(OPCODES[REPEAT_ONE])
skip = len(code); emit(0)
emit(av[0])
emit(av[1])
@@ -146,29 +144,24 @@ def _compile(code, pattern, flags):
emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip
else:
- emit(OPCODES[op])
+ emit(OPCODES[REPEAT])
skip = len(code); emit(0)
emit(av[0])
emit(av[1])
- mark = MAXCODE
- if av[2][0][0] == SUBPATTERN:
- # repeated subpattern
- gid, foo = av[2][0][1]
- if gid:
- mark = (gid-1)*2
- emit(mark)
_compile(code, av[2], flags)
- emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip
+ if op == MAX_REPEAT:
+ emit(OPCODES[MAX_UNTIL])
+ else:
+ emit(OPCODES[MIN_UNTIL])
elif op is SUBPATTERN:
- gid = av[0]
- if gid:
+ if av[0]:
emit(OPCODES[MARK])
- emit((gid-1)*2)
+ emit((av[0]-1)*2)
_compile(code, av[1], flags)
- if gid:
+ if av[0]:
emit(OPCODES[MARK])
- emit((gid-1)*2+1)
+ emit((av[0]-1)*2+1)
elif op in (SUCCESS, FAILURE):
emit(OPCODES[op])
elif op in (ASSERT, ASSERT_NOT):
@@ -197,11 +190,10 @@ def _compile(code, pattern, flags):
else:
emit(ATCODES[av])
elif op is BRANCH:
+ emit(OPCODES[op])
tail = []
for av in av[1]:
- emit(OPCODES[op])
skip = len(code); emit(0)
- emit(MAXCODE) # save mark
_compile(code, av, flags)
emit(OPCODES[JUMP])
tail.append(len(code)); emit(0)
@@ -223,9 +215,6 @@ def _compile(code, pattern, flags):
else:
emit(OPCODES[op])
emit(av-1)
- elif op in (MARK, INDEX):
- emit(OPCODES[op])
- emit(av)
else:
raise ValueError, ("unsupported operand type", op)
@@ -294,16 +283,7 @@ try:
except NameError:
pass
-def compile(p, flags=0):
- # internal: convert pattern list to internal format
-
- # compile, as necessary
- if type(p) in STRING_TYPES:
- import sre_parse
- pattern = p
- p = sre_parse.parse(p, flags)
- else:
- pattern = None
+def _compile1(p, flags):
flags = p.pattern.flags | flags
code = []
@@ -316,6 +296,20 @@ def compile(p, flags=0):
code.append(OPCODES[SUCCESS])
+ return code
+
+def compile(p, flags=0):
+ # internal: convert pattern list to internal format
+
+ if type(p) in STRING_TYPES:
+ import sre_parse
+ pattern = p
+ p = sre_parse.parse(p, flags)
+ else:
+ pattern = None
+
+ code = _compile1(p, flags)
+
# print code
# FIXME: <fl> get rid of this limitation!
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index ef32c32..e595915 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -6,9 +6,7 @@
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
-# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
-# other compatibility work.
+# See the sre.py file for information on usage and redistribution.
#
# should this really be here?
@@ -33,15 +31,15 @@ GROUPREF = "groupref"
GROUPREF_IGNORE = "groupref_ignore"
IN = "in"
IN_IGNORE = "in_ignore"
-INDEX = "index"
INFO = "info"
JUMP = "jump"
LITERAL = "literal"
LITERAL_IGNORE = "literal_ignore"
MARK = "mark"
MAX_REPEAT = "max_repeat"
-MAX_REPEAT_ONE = "max_repeat_one"
+MAX_UNTIL = "max_until"
MIN_REPEAT = "min_repeat"
+MIN_UNTIL = "min_until"
NEGATE = "negate"
NOT_LITERAL = "not_literal"
NOT_LITERAL_IGNORE = "not_literal_ignore"
@@ -91,19 +89,19 @@ OPCODES = [
CATEGORY,
CHARSET,
GROUPREF, GROUPREF_IGNORE,
- INDEX,
IN, IN_IGNORE,
INFO,
JUMP,
LITERAL, LITERAL_IGNORE,
MARK,
- MAX_REPEAT,
- MAX_REPEAT_ONE,
- MIN_REPEAT,
+ MAX_UNTIL,
+ MIN_UNTIL,
NOT_LITERAL, NOT_LITERAL_IGNORE,
NEGATE,
RANGE,
- REPEAT
+ REPEAT,
+ REPEAT_ONE,
+ SUBPATTERN
]
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 1b56352..299aa0e 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -5,9 +5,7 @@
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
-# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
-# other compatibility work.
+# See the sre.py file for information on usage and redistribution.
#
import string, sys
@@ -536,8 +534,6 @@ def _parse(source, state):
group = state.getgroup(name)
p = _parse_sub(source, state)
subpattern.append((SUBPATTERN, (group, p)))
- if group is not None:
- p.append((INDEX, group))
else:
while 1:
char = source.get()