summaryrefslogtreecommitdiffstats
path: root/Lib/sre_compile.py
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2004-03-26 11:16:55 (GMT)
committerRaymond Hettinger <python@rcn.com>2004-03-26 11:16:55 (GMT)
commit01c9f8c35f583338e3638906ceeef9d2f29a0254 (patch)
tree46ba5f9ad31ed05eb22195b0e20ccd78b90e2bc5 /Lib/sre_compile.py
parent707483fdef1dcb91e5239f7dd16db509ce2737c7 (diff)
downloadcpython-01c9f8c35f583338e3638906ceeef9d2f29a0254.zip
cpython-01c9f8c35f583338e3638906ceeef9d2f29a0254.tar.gz
cpython-01c9f8c35f583338e3638906ceeef9d2f29a0254.tar.bz2
Simple optimizations:
* pre-build a single identity function for the fixup function * pre-build membership tests in dictionaries instead of in-line tuples * assign len() to a local variable * assign append() methods to a local variable * use xrange() instead of range() * replace "x<<1" with "x+x"
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r--Lib/sre_compile.py116
1 files changed, 69 insertions, 47 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 1d5dae3..5d42b2b 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -21,11 +21,25 @@ if _sre.CODESIZE == 2:
else:
MAXCODE = 0xFFFFFFFFL
+def _identityfunction(x):
+ return x
+
+# use xrange if available
+try:
+ xrange
+except NameError:
+ xrange = range
+
def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
emit = code.append
+ _len = len
+ LITERAL_CODES = {LITERAL:1, NOT_LITERAL:1}
+ REPEATING_CODES = {REPEAT:1, MIN_REPEAT:1, MAX_REPEAT:1}
+ SUCCESS_CODES = {SUCCESS:1, FAILURE:1}
+ ASSERT_CODES = {ASSERT:1, ASSERT_NOT:1}
for op, av in pattern:
- if op in (LITERAL, NOT_LITERAL):
+ if op in LITERAL_CODES:
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
emit(_sre.getlower(av, flags))
@@ -39,44 +53,44 @@ def _compile(code, pattern, flags):
return _sre.getlower(literal, flags)
else:
emit(OPCODES[op])
- fixup = lambda x: x
- skip = len(code); emit(0)
+ fixup = _identityfunction
+ skip = _len(code); emit(0)
_compile_charset(av, flags, code, fixup)
- code[skip] = len(code) - skip
+ code[skip] = _len(code) - skip
elif op is ANY:
if flags & SRE_FLAG_DOTALL:
emit(OPCODES[ANY_ALL])
else:
emit(OPCODES[ANY])
- elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
+ elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error, "internal: unsupported template operator"
emit(OPCODES[REPEAT])
- skip = len(code); emit(0)
+ skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
- code[skip] = len(code) - skip
- elif _simple(av) and op != REPEAT:
- if op == MAX_REPEAT:
+ code[skip] = _len(code) - skip
+ elif _simple(av) and op is not REPEAT:
+ if op is MAX_REPEAT:
emit(OPCODES[REPEAT_ONE])
else:
emit(OPCODES[MIN_REPEAT_ONE])
- skip = len(code); emit(0)
+ skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
- code[skip] = len(code) - skip
+ code[skip] = _len(code) - skip
else:
emit(OPCODES[REPEAT])
- skip = len(code); emit(0)
+ skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
- code[skip] = len(code) - skip
- if op == MAX_REPEAT:
+ code[skip] = _len(code) - skip
+ if op is MAX_REPEAT:
emit(OPCODES[MAX_UNTIL])
else:
emit(OPCODES[MIN_UNTIL])
@@ -89,11 +103,11 @@ def _compile(code, pattern, flags):
if av[0]:
emit(OPCODES[MARK])
emit((av[0]-1)*2+1)
- elif op in (SUCCESS, FAILURE):
+ elif op in SUCCESS_CODES:
emit(OPCODES[op])
- elif op in (ASSERT, ASSERT_NOT):
+ elif op in ASSERT_CODES:
emit(OPCODES[op])
- skip = len(code); emit(0)
+ skip = _len(code); emit(0)
if av[0] >= 0:
emit(0) # look ahead
else:
@@ -103,13 +117,13 @@ def _compile(code, pattern, flags):
emit(lo) # look behind
_compile(code, av[1], flags)
emit(OPCODES[SUCCESS])
- code[skip] = len(code) - skip
+ code[skip] = _len(code) - skip
elif op is CALL:
emit(OPCODES[op])
- skip = len(code); emit(0)
+ skip = _len(code); emit(0)
_compile(code, av, flags)
emit(OPCODES[SUCCESS])
- code[skip] = len(code) - skip
+ code[skip] = _len(code) - skip
elif op is AT:
emit(OPCODES[op])
if flags & SRE_FLAG_MULTILINE:
@@ -122,16 +136,17 @@ def _compile(code, pattern, flags):
elif op is BRANCH:
emit(OPCODES[op])
tail = []
+ tailappend = tail.append
for av in av[1]:
- skip = len(code); emit(0)
+ skip = _len(code); emit(0)
# _compile_info(code, av, flags)
_compile(code, av, flags)
emit(OPCODES[JUMP])
- tail.append(len(code)); emit(0)
- code[skip] = len(code) - skip
+ tailappend(_len(code)); emit(0)
+ code[skip] = _len(code) - skip
emit(0) # end of branch
for tail in tail:
- code[tail] = len(code) - tail
+ code[tail] = _len(code) - tail
elif op is CATEGORY:
emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE:
@@ -148,16 +163,16 @@ def _compile(code, pattern, flags):
elif op is GROUPREF_EXISTS:
emit(OPCODES[op])
emit((av[0]-1)*2)
- skipyes = len(code); emit(0)
+ skipyes = _len(code); emit(0)
_compile(code, av[1], flags)
if av[2]:
emit(OPCODES[JUMP])
- skipno = len(code); emit(0)
- code[skipyes] = len(code) - skipyes + 1
+ skipno = _len(code); emit(0)
+ code[skipyes] = _len(code) - skipyes + 1
_compile(code, av[2], flags)
- code[skipno] = len(code) - skipno
+ code[skipno] = _len(code) - skipno
else:
- code[skipyes] = len(code) - skipyes + 1
+ code[skipyes] = _len(code) - skipyes + 1
else:
raise ValueError, ("unsupported operand type", op)
@@ -165,7 +180,7 @@ def _compile_charset(charset, flags, code, fixup=None):
# compile charset subprogram
emit = code.append
if fixup is None:
- fixup = lambda x: x
+ fixup = _identityfunction
for op, av in _optimize_charset(charset, fixup):
emit(OPCODES[op])
if op is NEGATE:
@@ -193,11 +208,12 @@ def _compile_charset(charset, flags, code, fixup=None):
def _optimize_charset(charset, fixup):
# internal: optimize character set
out = []
+ outappend = out.append
charmap = [False]*256
try:
for op, av in charset:
if op is NEGATE:
- out.append((op, av))
+ outappend((op, av))
elif op is LITERAL:
charmap[fixup(av)] = True
elif op is RANGE:
@@ -212,35 +228,37 @@ def _optimize_charset(charset, fixup):
# compress character map
i = p = n = 0
runs = []
+ runsappend = runs.append
for c in charmap:
if c:
if n == 0:
p = i
n = n + 1
elif n:
- runs.append((p, n))
+ runsappend((p, n))
n = 0
i = i + 1
if n:
- runs.append((p, n))
+ runsappend((p, n))
if len(runs) <= 2:
# use literal/range
for p, n in runs:
if n == 1:
- out.append((LITERAL, p))
+ outappend((LITERAL, p))
else:
- out.append((RANGE, (p, p+n-1)))
+ outappend((RANGE, (p, p+n-1)))
if len(out) < len(charset):
return out
else:
# use bitmap
data = _mk_bitmap(charmap)
- out.append((CHARSET, data))
+ outappend((CHARSET, data))
return out
return charset
def _mk_bitmap(bits):
data = []
+ dataappend = data.append
if _sre.CODESIZE == 2:
start = (1, 0)
else:
@@ -249,9 +267,9 @@ def _mk_bitmap(bits):
for c in bits:
if c:
v = v + m
- m = m << 1
+ m = m + m
if m > MAXCODE:
- data.append(v)
+ dataappend(v)
m, v = start
return data
@@ -295,7 +313,7 @@ def _optimize_unicode(charset, fixup):
elif op is LITERAL:
charmap[fixup(av)] = True
elif op is RANGE:
- for i in range(fixup(av[0]), fixup(av[1])+1):
+ for i in xrange(fixup(av[0]), fixup(av[1])+1):
charmap[i] = True
elif op is CATEGORY:
# XXX: could expand category
@@ -307,13 +325,13 @@ def _optimize_unicode(charset, fixup):
if sys.maxunicode != 65535:
# XXX: negation does not work with big charsets
return charset
- for i in range(65536):
+ for i in xrange(65536):
charmap[i] = not charmap[i]
comps = {}
mapping = [0]*256
block = 0
data = []
- for i in range(256):
+ for i in xrange(256):
chunk = tuple(charmap[i*256:(i+1)*256])
new = comps.setdefault(chunk, block)
mapping[i] = new
@@ -348,19 +366,21 @@ def _compile_info(code, pattern, flags):
return # not worth it
# look for a literal prefix
prefix = []
+ prefixappend = prefix.append
prefix_skip = 0
charset = [] # not used
+ charsetappend = charset.append
if not (flags & SRE_FLAG_IGNORECASE):
# look for literal prefix
for op, av in pattern.data:
if op is LITERAL:
if len(prefix) == prefix_skip:
prefix_skip = prefix_skip + 1
- prefix.append(av)
+ prefixappend(av)
elif op is SUBPATTERN and len(av[1]) == 1:
op, av = av[1][0]
if op is LITERAL:
- prefix.append(av)
+ prefixappend(av)
else:
break
else:
@@ -371,27 +391,29 @@ def _compile_info(code, pattern, flags):
if op is SUBPATTERN and av[1]:
op, av = av[1][0]
if op is LITERAL:
- charset.append((op, av))
+ charsetappend((op, av))
elif op is BRANCH:
c = []
+ cappend = c.append
for p in av[1]:
if not p:
break
op, av = p[0]
if op is LITERAL:
- c.append((op, av))
+ cappend((op, av))
else:
break
else:
charset = c
elif op is BRANCH:
c = []
+ cappend = c.append
for p in av[1]:
if not p:
break
op, av = p[0]
if op is LITERAL:
- c.append((op, av))
+ cappend((op, av))
else:
break
else:
@@ -432,7 +454,7 @@ def _compile_info(code, pattern, flags):
code.extend(prefix)
# generate overlap table
table = [-1] + ([0]*len(prefix))
- for i in range(len(prefix)):
+ for i in xrange(len(prefix)):
table[i+1] = table[i]+1
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
table[i+1] = table[table[i+1]-1]+1