diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-06-29 23:33:12 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-06-29 23:33:12 (GMT) |
commit | 29c08beab08ae3e8b9686a119f5cf0afe99ed918 (patch) | |
tree | af7731824bc150a290f02095bdaecf37edaaf605 /Lib | |
parent | 22e1bf7da556de6c14c1e3531db23ca2ff6d8fbb (diff) | |
download | cpython-29c08beab08ae3e8b9686a119f5cf0afe99ed918.zip cpython-29c08beab08ae3e8b9686a119f5cf0afe99ed918.tar.gz cpython-29c08beab08ae3e8b9686a119f5cf0afe99ed918.tar.bz2 |
still trying to figure out how to fix the remaining
group reset problem. in the meantime, I added some
optimizations:
- added "inline" directive to LOCAL
(this assumes that AC_C_INLINE does what it's
supposed to do). to compile SRE on a non-unix
platform that doesn't support inline, you have
to add a "#define inline" somewhere...
- added code to generate a SRE_OP_INFO primitive
- added code to do fast prefix search
(enabled by the USE_FAST_SEARCH define; default
is on, in this release)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre_compile.py | 57 |
1 files changed, 56 insertions, 1 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index c042375..344dc29 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -23,6 +23,7 @@ else: raise RuntimeError, "cannot find a useable array type" def _compile(code, pattern, flags): + # internal: compile a (sub)pattern emit = code.append for op, av in pattern: if op is ANY: @@ -152,21 +153,75 @@ def _compile(code, pattern, flags): else: raise ValueError, ("unsupported operand type", op) +def _compile_info(code, pattern, flags): + # internal: compile an info block. in the current version, + # this contains min/max pattern width and a literal prefix, + # if any + lo, hi = pattern.getwidth() + if lo == 0: + return # not worth it + # look for a literal prefix + prefix = [] + if not (flags & SRE_FLAG_IGNORECASE): + for op, av in pattern.data: + if op is LITERAL: + prefix.append(ord(av)) + else: + break + # add an info block + emit = code.append + emit(OPCODES[INFO]) + skip = len(code); emit(0) + # literal flag + mask = 0 + if len(prefix) == len(pattern.data): + mask = 1 + emit(mask) + # pattern length + emit(lo) + if hi < 32768: + emit(hi) + else: + emit(0) + # add literal prefix + emit(len(prefix)) + if prefix: + code.extend(prefix) + # generate overlap table + table = [-1] + ([0]*len(prefix)) + for i in range(len(prefix)): + table[i+1] = table[i]+1 + while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: + table[i+1] = table[table[i+1]-1]+1 + code.extend(table[1:]) # don't store first entry + code[skip] = len(code) - skip + def compile(p, flags=0): # internal: convert pattern list to internal format + + # compile, as necessary if type(p) in (type(""), type(u"")): import sre_parse pattern = p p = sre_parse.parse(p) else: pattern = None + flags = p.pattern.flags | flags code = [] + + # compile info block + _compile_info(code, p, flags) + + # compile the pattern _compile(code, p.data, flags) + code.append(OPCODES[SUCCESS]) - # FIXME: <fl> get rid of this limitation + + # FIXME: <fl> get rid of this limitation! assert p.pattern.groups <= 100,\ "sorry, but this version only supports 100 named groups" + return _sre.compile( pattern, flags, array.array(WORDSIZE, code).tostring(), |