summaryrefslogtreecommitdiffstats
path: root/Lib/sre_compile.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2000-04-10 17:10:48 (GMT)
committerGuido van Rossum <guido@python.org>2000-04-10 17:10:48 (GMT)
commitb81e70ebdb28246e427249d386518bc03d08c959 (patch)
tree4f2ba435b4815d7ff7f4f6abab7505fb16f4c7c7 /Lib/sre_compile.py
parent5de435a245fd7158b1a8db1201154ad73fd4bf13 (diff)
downloadcpython-b81e70ebdb28246e427249d386518bc03d08c959.zip
cpython-b81e70ebdb28246e427249d386518bc03d08c959.tar.gz
cpython-b81e70ebdb28246e427249d386518bc03d08c959.tar.bz2
Fredrik Lundh: new snapshot. Mostly reindented.
This one should work with unicode expressions, and compile a bit more silently.
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r--Lib/sre_compile.py250
1 files changed, 125 insertions, 125 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 600b237..8738061 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -26,7 +26,7 @@ from sre_constants import *
# find an array type code that matches the engine's code size
for WORDSIZE in "BHil":
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
- break
+ break
else:
raise RuntimeError, "cannot find a useable array type"
@@ -34,18 +34,18 @@ else:
class Code:
def __init__(self):
- self.data = []
+ self.data = []
def __len__(self):
- return len(self.data)
+ return len(self.data)
def __getitem__(self, index):
- return self.data[index]
+ return self.data[index]
def __setitem__(self, index, code):
- self.data[index] = code
+ self.data[index] = code
def append(self, code):
- self.data.append(code)
+ self.data.append(code)
def todata(self):
- # print self.data
- return array.array(WORDSIZE, self.data).tostring()
+ # print self.data
+ return array.array(WORDSIZE, self.data).tostring()
def _lower(literal):
# return _sre._lower(literal) # FIXME
@@ -54,122 +54,122 @@ def _lower(literal):
def _compile(code, pattern, flags):
append = code.append
for op, av in pattern:
- if op is ANY:
- if "s" in flags:
- append(CODES[op]) # any character at all!
- else:
- append(CODES[NOT_LITERAL])
- append(10)
- elif op in (SUCCESS, FAILURE):
- append(CODES[op])
- elif op is AT:
- append(CODES[op])
- append(POSITIONS[av])
- elif op is BRANCH:
- append(CODES[op])
- tail = []
- for av in av[1]:
- skip = len(code); append(0)
- _compile(code, av, flags)
- append(CODES[JUMP])
- tail.append(len(code)); append(0)
- code[skip] = len(code) - skip
- append(0) # end of branch
- for tail in tail:
- code[tail] = len(code) - tail
- elif op is CALL:
- append(CODES[op])
- skip = len(code); append(0)
- _compile(code, av, flags)
- append(CODES[SUCCESS])
- code[skip] = len(code) - skip
- elif op is CATEGORY: # not used by current parser
- append(CODES[op])
- append(CATEGORIES[av])
- elif op is GROUP:
- if "i" in flags:
- append(CODES[MAP_IGNORE[op]])
- else:
- append(CODES[op])
- append(av)
- elif op is IN:
- if "i" in flags:
- append(CODES[MAP_IGNORE[op]])
- def fixup(literal):
- return ord(_lower(literal))
- else:
- append(CODES[op])
- fixup = ord
- skip = len(code); append(0)
- for op, av in av:
- append(CODES[op])
- if op is NEGATE:
- pass
- elif op is LITERAL:
- append(fixup(av))
- elif op is RANGE:
- append(fixup(av[0]))
- append(fixup(av[1]))
- elif op is CATEGORY:
- append(CATEGORIES[av])
- else:
- raise ValueError, "unsupported set operator"
- append(CODES[FAILURE])
- code[skip] = len(code) - skip
- elif op in (LITERAL, NOT_LITERAL):
- if "i" in flags:
- append(CODES[MAP_IGNORE[op]])
- append(ord(_lower(av)))
- else:
- append(CODES[op])
- append(ord(av))
- elif op is MARK:
- append(CODES[op])
- append(av)
- elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
- lo, hi = av[2].getwidth()
- if lo == 0:
- raise SyntaxError, "cannot repeat zero-width items"
- if lo == hi == 1 and op is MAX_REPEAT:
- append(CODES[MAX_REPEAT_ONE])
- skip = len(code); append(0)
- append(av[0])
- append(av[1])
- _compile(code, av[2], flags)
- append(CODES[SUCCESS])
- code[skip] = len(code) - skip
- else:
- append(CODES[op])
- skip = len(code); append(0)
- append(av[0])
- append(av[1])
- _compile(code, av[2], flags)
- if op is MIN_REPEAT:
- append(CODES[MIN_UNTIL])
- else:
- # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
- append(CODES[MAX_UNTIL])
- code[skip] = len(code) - skip
- elif op is SUBPATTERN:
-## group = av[0]
-## if group:
-## append(CODES[MARK])
-## append((group-1)*2)
- _compile(code, av[1], flags)
-## if group:
-## append(CODES[MARK])
-## append((group-1)*2+1)
- else:
- raise ValueError, ("unsupported operand type", op)
+ if op is ANY:
+ if "s" in flags:
+ append(CODES[op]) # any character at all!
+ else:
+ append(CODES[NOT_LITERAL])
+ append(10)
+ elif op in (SUCCESS, FAILURE):
+ append(CODES[op])
+ elif op is AT:
+ append(CODES[op])
+ append(POSITIONS[av])
+ elif op is BRANCH:
+ append(CODES[op])
+ tail = []
+ for av in av[1]:
+ skip = len(code); append(0)
+ _compile(code, av, flags)
+ append(CODES[JUMP])
+ tail.append(len(code)); append(0)
+ code[skip] = len(code) - skip
+ append(0) # end of branch
+ for tail in tail:
+ code[tail] = len(code) - tail
+ elif op is CALL:
+ append(CODES[op])
+ skip = len(code); append(0)
+ _compile(code, av, flags)
+ append(CODES[SUCCESS])
+ code[skip] = len(code) - skip
+ elif op is CATEGORY: # not used by current parser
+ append(CODES[op])
+ append(CATEGORIES[av])
+ elif op is GROUP:
+ if "i" in flags:
+ append(CODES[MAP_IGNORE[op]])
+ else:
+ append(CODES[op])
+ append(av)
+ elif op is IN:
+ if "i" in flags:
+ append(CODES[MAP_IGNORE[op]])
+ def fixup(literal):
+ return ord(_lower(literal))
+ else:
+ append(CODES[op])
+ fixup = ord
+ skip = len(code); append(0)
+ for op, av in av:
+ append(CODES[op])
+ if op is NEGATE:
+ pass
+ elif op is LITERAL:
+ append(fixup(av))
+ elif op is RANGE:
+ append(fixup(av[0]))
+ append(fixup(av[1]))
+ elif op is CATEGORY:
+ append(CATEGORIES[av])
+ else:
+ raise ValueError, "unsupported set operator"
+ append(CODES[FAILURE])
+ code[skip] = len(code) - skip
+ elif op in (LITERAL, NOT_LITERAL):
+ if "i" in flags:
+ append(CODES[MAP_IGNORE[op]])
+ append(ord(_lower(av)))
+ else:
+ append(CODES[op])
+ append(ord(av))
+ elif op is MARK:
+ append(CODES[op])
+ append(av)
+ elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
+ lo, hi = av[2].getwidth()
+ if lo == 0:
+ raise SyntaxError, "cannot repeat zero-width items"
+ if lo == hi == 1 and op is MAX_REPEAT:
+ append(CODES[MAX_REPEAT_ONE])
+ skip = len(code); append(0)
+ append(av[0])
+ append(av[1])
+ _compile(code, av[2], flags)
+ append(CODES[SUCCESS])
+ code[skip] = len(code) - skip
+ else:
+ append(CODES[op])
+ skip = len(code); append(0)
+ append(av[0])
+ append(av[1])
+ _compile(code, av[2], flags)
+ if op is MIN_REPEAT:
+ append(CODES[MIN_UNTIL])
+ else:
+ # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
+ append(CODES[MAX_UNTIL])
+ code[skip] = len(code) - skip
+ elif op is SUBPATTERN:
+## group = av[0]
+## if group:
+## append(CODES[MARK])
+## append((group-1)*2)
+ _compile(code, av[1], flags)
+## if group:
+## append(CODES[MARK])
+## append((group-1)*2+1)
+ else:
+ raise ValueError, ("unsupported operand type", op)
def compile(p, flags=()):
# convert pattern list to internal format
- if type(p) is type(""):
- import sre_parse
- pattern = p
- p = sre_parse.parse(p)
+ if type(p) in (type(""), type(u"")):
+ import sre_parse
+ pattern = p
+ p = sre_parse.parse(p)
else:
- pattern = None
+ pattern = None
# print p.getwidth()
# print p
code = Code()
@@ -178,10 +178,10 @@ def compile(p, flags=()):
# print list(code.data)
data = code.todata()
if 0: # debugging
- print
- print "-" * 68
- import sre_disasm
- sre_disasm.disasm(data)
- print "-" * 68
+ print
+ print "-" * 68
+ import sre_disasm
+ sre_disasm.disasm(data)
+ print "-" * 68
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)