summaryrefslogtreecommitdiffstats
path: root/Lib/sre_compile.py
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-06-29 23:33:12 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-06-29 23:33:12 (GMT)
commit29c08beab08ae3e8b9686a119f5cf0afe99ed918 (patch)
treeaf7731824bc150a290f02095bdaecf37edaaf605 /Lib/sre_compile.py
parent22e1bf7da556de6c14c1e3531db23ca2ff6d8fbb (diff)
downloadcpython-29c08beab08ae3e8b9686a119f5cf0afe99ed918.zip
cpython-29c08beab08ae3e8b9686a119f5cf0afe99ed918.tar.gz
cpython-29c08beab08ae3e8b9686a119f5cf0afe99ed918.tar.bz2
still trying to figure out how to fix the remaining
group reset problem. in the meantime, I added some optimizations: - added "inline" directive to LOCAL (this assumes that AC_C_INLINE does what it's supposed to do). to compile SRE on a non-unix platform that doesn't support inline, you have to add a "#define inline" somewhere... - added code to generate a SRE_OP_INFO primitive - added code to do fast prefix search (enabled by the USE_FAST_SEARCH define; default is on, in this release)
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r--Lib/sre_compile.py57
1 files changed, 56 insertions, 1 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index c042375..344dc29 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -23,6 +23,7 @@ else:
raise RuntimeError, "cannot find a useable array type"
def _compile(code, pattern, flags):
+ # internal: compile a (sub)pattern
emit = code.append
for op, av in pattern:
if op is ANY:
@@ -152,21 +153,75 @@ def _compile(code, pattern, flags):
else:
raise ValueError, ("unsupported operand type", op)
+def _compile_info(code, pattern, flags):
+ # internal: compile an info block. in the current version,
+ # this contains min/max pattern width and a literal prefix,
+ # if any
+ lo, hi = pattern.getwidth()
+ if lo == 0:
+ return # not worth it
+ # look for a literal prefix
+ prefix = []
+ if not (flags & SRE_FLAG_IGNORECASE):
+ for op, av in pattern.data:
+ if op is LITERAL:
+ prefix.append(ord(av))
+ else:
+ break
+ # add an info block
+ emit = code.append
+ emit(OPCODES[INFO])
+ skip = len(code); emit(0)
+ # literal flag
+ mask = 0
+ if len(prefix) == len(pattern.data):
+ mask = 1
+ emit(mask)
+ # pattern length
+ emit(lo)
+ if hi < 32768:
+ emit(hi)
+ else:
+ emit(0)
+ # add literal prefix
+ emit(len(prefix))
+ if prefix:
+ code.extend(prefix)
+ # generate overlap table
+ table = [-1] + ([0]*len(prefix))
+ for i in range(len(prefix)):
+ table[i+1] = table[i]+1
+ while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
+ table[i+1] = table[table[i+1]-1]+1
+ code.extend(table[1:]) # don't store first entry
+ code[skip] = len(code) - skip
+
def compile(p, flags=0):
# internal: convert pattern list to internal format
+
+ # compile, as necessary
if type(p) in (type(""), type(u"")):
import sre_parse
pattern = p
p = sre_parse.parse(p)
else:
pattern = None
+
flags = p.pattern.flags | flags
code = []
+
+ # compile info block
+ _compile_info(code, p, flags)
+
+ # compile the pattern
_compile(code, p.data, flags)
+
code.append(OPCODES[SUCCESS])
- # FIXME: <fl> get rid of this limitation
+
+ # FIXME: <fl> get rid of this limitation!
assert p.pattern.groups <= 100,\
"sorry, but this version only supports 100 named groups"
+
return _sre.compile(
pattern, flags,
array.array(WORDSIZE, code).tostring(),