diff options
author | Guido van Rossum <guido@python.org> | 2000-04-10 17:10:48 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2000-04-10 17:10:48 (GMT) |
commit | b81e70ebdb28246e427249d386518bc03d08c959 (patch) | |
tree | 4f2ba435b4815d7ff7f4f6abab7505fb16f4c7c7 /Lib/sre_compile.py | |
parent | 5de435a245fd7158b1a8db1201154ad73fd4bf13 (diff) | |
download | cpython-b81e70ebdb28246e427249d386518bc03d08c959.zip cpython-b81e70ebdb28246e427249d386518bc03d08c959.tar.gz cpython-b81e70ebdb28246e427249d386518bc03d08c959.tar.bz2 |
Fredrik Lundh: new snapshot. Mostly reindented.
This one should work with unicode expressions, and compile
a bit more silently.
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r-- | Lib/sre_compile.py | 250 |
1 files changed, 125 insertions, 125 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 600b237..8738061 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -26,7 +26,7 @@ from sre_constants import * # find an array type code that matches the engine's code size for WORDSIZE in "BHil": if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize(): - break + break else: raise RuntimeError, "cannot find a useable array type" @@ -34,18 +34,18 @@ else: class Code: def __init__(self): - self.data = [] + self.data = [] def __len__(self): - return len(self.data) + return len(self.data) def __getitem__(self, index): - return self.data[index] + return self.data[index] def __setitem__(self, index, code): - self.data[index] = code + self.data[index] = code def append(self, code): - self.data.append(code) + self.data.append(code) def todata(self): - # print self.data - return array.array(WORDSIZE, self.data).tostring() + # print self.data + return array.array(WORDSIZE, self.data).tostring() def _lower(literal): # return _sre._lower(literal) # FIXME @@ -54,122 +54,122 @@ def _lower(literal): def _compile(code, pattern, flags): append = code.append for op, av in pattern: - if op is ANY: - if "s" in flags: - append(CODES[op]) # any character at all! - else: - append(CODES[NOT_LITERAL]) - append(10) - elif op in (SUCCESS, FAILURE): - append(CODES[op]) - elif op is AT: - append(CODES[op]) - append(POSITIONS[av]) - elif op is BRANCH: - append(CODES[op]) - tail = [] - for av in av[1]: - skip = len(code); append(0) - _compile(code, av, flags) - append(CODES[JUMP]) - tail.append(len(code)); append(0) - code[skip] = len(code) - skip - append(0) # end of branch - for tail in tail: - code[tail] = len(code) - tail - elif op is CALL: - append(CODES[op]) - skip = len(code); append(0) - _compile(code, av, flags) - append(CODES[SUCCESS]) - code[skip] = len(code) - skip - elif op is CATEGORY: # not used by current parser - append(CODES[op]) - append(CATEGORIES[av]) - elif op is GROUP: - if "i" in flags: - append(CODES[MAP_IGNORE[op]]) - else: - append(CODES[op]) - append(av) - elif op is IN: - if "i" in flags: - append(CODES[MAP_IGNORE[op]]) - def fixup(literal): - return ord(_lower(literal)) - else: - append(CODES[op]) - fixup = ord - skip = len(code); append(0) - for op, av in av: - append(CODES[op]) - if op is NEGATE: - pass - elif op is LITERAL: - append(fixup(av)) - elif op is RANGE: - append(fixup(av[0])) - append(fixup(av[1])) - elif op is CATEGORY: - append(CATEGORIES[av]) - else: - raise ValueError, "unsupported set operator" - append(CODES[FAILURE]) - code[skip] = len(code) - skip - elif op in (LITERAL, NOT_LITERAL): - if "i" in flags: - append(CODES[MAP_IGNORE[op]]) - append(ord(_lower(av))) - else: - append(CODES[op]) - append(ord(av)) - elif op is MARK: - append(CODES[op]) - append(av) - elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): - lo, hi = av[2].getwidth() - if lo == 0: - raise SyntaxError, "cannot repeat zero-width items" - if lo == hi == 1 and op is MAX_REPEAT: - append(CODES[MAX_REPEAT_ONE]) - skip = len(code); append(0) - append(av[0]) - append(av[1]) - _compile(code, av[2], flags) - append(CODES[SUCCESS]) - code[skip] = len(code) - skip - else: - append(CODES[op]) - skip = len(code); append(0) - append(av[0]) - append(av[1]) - _compile(code, av[2], flags) - if op is MIN_REPEAT: - append(CODES[MIN_UNTIL]) - else: - # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?) - append(CODES[MAX_UNTIL]) - code[skip] = len(code) - skip - elif op is SUBPATTERN: -## group = av[0] -## if group: -## append(CODES[MARK]) -## append((group-1)*2) - _compile(code, av[1], flags) -## if group: -## append(CODES[MARK]) -## append((group-1)*2+1) - else: - raise ValueError, ("unsupported operand type", op) + if op is ANY: + if "s" in flags: + append(CODES[op]) # any character at all! + else: + append(CODES[NOT_LITERAL]) + append(10) + elif op in (SUCCESS, FAILURE): + append(CODES[op]) + elif op is AT: + append(CODES[op]) + append(POSITIONS[av]) + elif op is BRANCH: + append(CODES[op]) + tail = [] + for av in av[1]: + skip = len(code); append(0) + _compile(code, av, flags) + append(CODES[JUMP]) + tail.append(len(code)); append(0) + code[skip] = len(code) - skip + append(0) # end of branch + for tail in tail: + code[tail] = len(code) - tail + elif op is CALL: + append(CODES[op]) + skip = len(code); append(0) + _compile(code, av, flags) + append(CODES[SUCCESS]) + code[skip] = len(code) - skip + elif op is CATEGORY: # not used by current parser + append(CODES[op]) + append(CATEGORIES[av]) + elif op is GROUP: + if "i" in flags: + append(CODES[MAP_IGNORE[op]]) + else: + append(CODES[op]) + append(av) + elif op is IN: + if "i" in flags: + append(CODES[MAP_IGNORE[op]]) + def fixup(literal): + return ord(_lower(literal)) + else: + append(CODES[op]) + fixup = ord + skip = len(code); append(0) + for op, av in av: + append(CODES[op]) + if op is NEGATE: + pass + elif op is LITERAL: + append(fixup(av)) + elif op is RANGE: + append(fixup(av[0])) + append(fixup(av[1])) + elif op is CATEGORY: + append(CATEGORIES[av]) + else: + raise ValueError, "unsupported set operator" + append(CODES[FAILURE]) + code[skip] = len(code) - skip + elif op in (LITERAL, NOT_LITERAL): + if "i" in flags: + append(CODES[MAP_IGNORE[op]]) + append(ord(_lower(av))) + else: + append(CODES[op]) + append(ord(av)) + elif op is MARK: + append(CODES[op]) + append(av) + elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): + lo, hi = av[2].getwidth() + if lo == 0: + raise SyntaxError, "cannot repeat zero-width items" + if lo == hi == 1 and op is MAX_REPEAT: + append(CODES[MAX_REPEAT_ONE]) + skip = len(code); append(0) + append(av[0]) + append(av[1]) + _compile(code, av[2], flags) + append(CODES[SUCCESS]) + code[skip] = len(code) - skip + else: + append(CODES[op]) + skip = len(code); append(0) + append(av[0]) + append(av[1]) + _compile(code, av[2], flags) + if op is MIN_REPEAT: + append(CODES[MIN_UNTIL]) + else: + # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?) + append(CODES[MAX_UNTIL]) + code[skip] = len(code) - skip + elif op is SUBPATTERN: +## group = av[0] +## if group: +## append(CODES[MARK]) +## append((group-1)*2) + _compile(code, av[1], flags) +## if group: +## append(CODES[MARK]) +## append((group-1)*2+1) + else: + raise ValueError, ("unsupported operand type", op) def compile(p, flags=()): # convert pattern list to internal format - if type(p) is type(""): - import sre_parse - pattern = p - p = sre_parse.parse(p) + if type(p) in (type(""), type(u"")): + import sre_parse + pattern = p + p = sre_parse.parse(p) else: - pattern = None + pattern = None # print p.getwidth() # print p code = Code() @@ -178,10 +178,10 @@ def compile(p, flags=()): # print list(code.data) data = code.todata() if 0: # debugging - print - print "-" * 68 - import sre_disasm - sre_disasm.disasm(data) - print "-" * 68 + print + print "-" * 68 + import sre_disasm + sre_disasm.disasm(data) + print "-" * 68 # print len(data), p.pattern.groups, len(p.pattern.groupdict) return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict) |