diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-06-30 07:50:59 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-06-30 07:50:59 (GMT) |
commit | 90a07913229ada1bb3011cfa08a1e56bca31daaf (patch) | |
tree | 753ada4e42f130dddf2af3bccfecaafd5987abbf /Lib/sre_compile.py | |
parent | df02d0b3f0f438e6a773528010cc360d01b8f393 (diff) | |
download | cpython-90a07913229ada1bb3011cfa08a1e56bca31daaf.zip cpython-90a07913229ada1bb3011cfa08a1e56bca31daaf.tar.gz cpython-90a07913229ada1bb3011cfa08a1e56bca31daaf.tar.bz2 |
- pedantic: make sure "python -t" doesn't complain...
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r-- | Lib/sre_compile.py | 306 |
1 files changed, 153 insertions, 153 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index ea5f5bc..9fdc8f3 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -18,7 +18,7 @@ from sre_constants import * # find an array type code that matches the engine's code size for WORDSIZE in "BHil": if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize(): - break + break else: raise RuntimeError, "cannot find a useable array type" @@ -26,132 +26,132 @@ def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append for op, av in pattern: - if op is ANY: - if flags & SRE_FLAG_DOTALL: - emit(OPCODES[op]) - else: - emit(OPCODES[CATEGORY]) - emit(CHCODES[CATEGORY_NOT_LINEBREAK]) - elif op in (SUCCESS, FAILURE): - emit(OPCODES[op]) - elif op is AT: - emit(OPCODES[op]) - if flags & SRE_FLAG_MULTILINE: - emit(ATCODES[AT_MULTILINE[av]]) - else: - emit(ATCODES[av]) - elif op is BRANCH: - emit(OPCODES[op]) - tail = [] - for av in av[1]: - skip = len(code); emit(0) - _compile(code, av, flags) - emit(OPCODES[JUMP]) - tail.append(len(code)); emit(0) - code[skip] = len(code) - skip - emit(0) # end of branch - for tail in tail: - code[tail] = len(code) - tail - elif op is CALL: - emit(OPCODES[op]) - skip = len(code); emit(0) - _compile(code, av, flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - elif op is CATEGORY: - emit(OPCODES[op]) - if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) - elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) - else: - emit(CHCODES[av]) - elif op is GROUP: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - else: - emit(OPCODES[op]) - emit(av-1) - elif op is IN: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - def fixup(literal, flags=flags): - return _sre.getlower(ord(literal), flags) - else: - emit(OPCODES[op]) - fixup = ord - skip = len(code); emit(0) - for op, av in av: - emit(OPCODES[op]) - if op is NEGATE: - pass - elif op is LITERAL: - emit(fixup(av)) - elif op is RANGE: - emit(fixup(av[0])) - emit(fixup(av[1])) - elif op is CATEGORY: - if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) - elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) - else: - emit(CHCODES[av]) - else: - raise error, "internal: unsupported set operator" - emit(OPCODES[FAILURE]) - code[skip] = len(code) - skip - elif op in (LITERAL, NOT_LITERAL): - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - else: - emit(OPCODES[op]) - emit(ord(av)) - elif op is MARK: - emit(OPCODES[op]) - emit(av) - elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): - if flags & SRE_FLAG_TEMPLATE: - emit(OPCODES[REPEAT]) - skip = len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - else: - lo, hi = av[2].getwidth() - if lo == 0: - raise error, "nothing to repeat" - if 0 and lo == hi == 1 and op is MAX_REPEAT: - # FIXME: <fl> need a better way to figure out when - # it's safe to use this one (in the parser, probably) - emit(OPCODES[MAX_REPEAT_ONE]) - skip = len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - else: - emit(OPCODES[op]) - skip = len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - elif op is SUBPATTERN: - group = av[0] - if group: - emit(OPCODES[MARK]) - emit((group-1)*2) - _compile(code, av[1], flags) - if group: - emit(OPCODES[MARK]) - emit((group-1)*2+1) - else: - raise ValueError, ("unsupported operand type", op) + if op is ANY: + if flags & SRE_FLAG_DOTALL: + emit(OPCODES[op]) + else: + emit(OPCODES[CATEGORY]) + emit(CHCODES[CATEGORY_NOT_LINEBREAK]) + elif op in (SUCCESS, FAILURE): + emit(OPCODES[op]) + elif op is AT: + emit(OPCODES[op]) + if flags & SRE_FLAG_MULTILINE: + emit(ATCODES[AT_MULTILINE[av]]) + else: + emit(ATCODES[av]) + elif op is BRANCH: + emit(OPCODES[op]) + tail = [] + for av in av[1]: + skip = len(code); emit(0) + _compile(code, av, flags) + emit(OPCODES[JUMP]) + tail.append(len(code)); emit(0) + code[skip] = len(code) - skip + emit(0) # end of branch + for tail in tail: + code[tail] = len(code) - tail + elif op is CALL: + emit(OPCODES[op]) + skip = len(code); emit(0) + _compile(code, av, flags) + emit(OPCODES[SUCCESS]) + code[skip] = len(code) - skip + elif op is CATEGORY: + emit(OPCODES[op]) + if flags & SRE_FLAG_LOCALE: + emit(CHCODES[CH_LOCALE[av]]) + elif flags & SRE_FLAG_UNICODE: + emit(CHCODES[CH_UNICODE[av]]) + else: + emit(CHCODES[av]) + elif op is GROUP: + if flags & SRE_FLAG_IGNORECASE: + emit(OPCODES[OP_IGNORE[op]]) + else: + emit(OPCODES[op]) + emit(av-1) + elif op is IN: + if flags & SRE_FLAG_IGNORECASE: + emit(OPCODES[OP_IGNORE[op]]) + def fixup(literal, flags=flags): + return _sre.getlower(ord(literal), flags) + else: + emit(OPCODES[op]) + fixup = ord + skip = len(code); emit(0) + for op, av in av: + emit(OPCODES[op]) + if op is NEGATE: + pass + elif op is LITERAL: + emit(fixup(av)) + elif op is RANGE: + emit(fixup(av[0])) + emit(fixup(av[1])) + elif op is CATEGORY: + if flags & SRE_FLAG_LOCALE: + emit(CHCODES[CH_LOCALE[av]]) + elif flags & SRE_FLAG_UNICODE: + emit(CHCODES[CH_UNICODE[av]]) + else: + emit(CHCODES[av]) + else: + raise error, "internal: unsupported set operator" + emit(OPCODES[FAILURE]) + code[skip] = len(code) - skip + elif op in (LITERAL, NOT_LITERAL): + if flags & SRE_FLAG_IGNORECASE: + emit(OPCODES[OP_IGNORE[op]]) + else: + emit(OPCODES[op]) + emit(ord(av)) + elif op is MARK: + emit(OPCODES[op]) + emit(av) + elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): + if flags & SRE_FLAG_TEMPLATE: + emit(OPCODES[REPEAT]) + skip = len(code); emit(0) + emit(av[0]) + emit(av[1]) + _compile(code, av[2], flags) + emit(OPCODES[SUCCESS]) + code[skip] = len(code) - skip + else: + lo, hi = av[2].getwidth() + if lo == 0: + raise error, "nothing to repeat" + if 0 and lo == hi == 1 and op is MAX_REPEAT: + # FIXME: <fl> need a better way to figure out when + # it's safe to use this one (in the parser, probably) + emit(OPCODES[MAX_REPEAT_ONE]) + skip = len(code); emit(0) + emit(av[0]) + emit(av[1]) + _compile(code, av[2], flags) + emit(OPCODES[SUCCESS]) + code[skip] = len(code) - skip + else: + emit(OPCODES[op]) + skip = len(code); emit(0) + emit(av[0]) + emit(av[1]) + _compile(code, av[2], flags) + emit(OPCODES[SUCCESS]) + code[skip] = len(code) - skip + elif op is SUBPATTERN: + group = av[0] + if group: + emit(OPCODES[MARK]) + emit((group-1)*2) + _compile(code, av[1], flags) + if group: + emit(OPCODES[MARK]) + emit((group-1)*2+1) + else: + raise ValueError, ("unsupported operand type", op) def _compile_info(code, pattern, flags): # internal: compile an info block. in the current version, @@ -159,15 +159,15 @@ def _compile_info(code, pattern, flags): # if any lo, hi = pattern.getwidth() if lo == 0: - return # not worth it + return # not worth it # look for a literal prefix prefix = [] if not (flags & SRE_FLAG_IGNORECASE): - for op, av in pattern.data: - if op is LITERAL: - prefix.append(ord(av)) - else: - break + for op, av in pattern.data: + if op is LITERAL: + prefix.append(ord(av)) + else: + break # add an info block emit = code.append emit(OPCODES[INFO]) @@ -175,25 +175,25 @@ def _compile_info(code, pattern, flags): # literal flag mask = 0 if len(prefix) == len(pattern.data): - mask = 1 + mask = 1 emit(mask) # pattern length emit(lo) if hi < 32768: - emit(hi) + emit(hi) else: - emit(0) + emit(0) # add literal prefix emit(len(prefix)) if prefix: - code.extend(prefix) - # generate overlap table - table = [-1] + ([0]*len(prefix)) - for i in range(len(prefix)): - table[i+1] = table[i]+1 - while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: - table[i+1] = table[table[i+1]-1]+1 - code.extend(table[1:]) # don't store first entry + code.extend(prefix) + # generate overlap table + table = [-1] + ([0]*len(prefix)) + for i in range(len(prefix)): + table[i+1] = table[i]+1 + while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: + table[i+1] = table[table[i+1]-1]+1 + code.extend(table[1:]) # don't store first entry code[skip] = len(code) - skip def compile(p, flags=0): @@ -201,11 +201,11 @@ def compile(p, flags=0): # compile, as necessary if type(p) in (type(""), type(u"")): - import sre_parse - pattern = p - p = sre_parse.parse(p) + import sre_parse + pattern = p + p = sre_parse.parse(p) else: - pattern = None + pattern = None flags = p.pattern.flags | flags code = [] @@ -220,10 +220,10 @@ def compile(p, flags=0): # FIXME: <fl> get rid of this limitation! assert p.pattern.groups <= 100,\ - "sorry, but this version only supports 100 named groups" + "sorry, but this version only supports 100 named groups" return _sre.compile( - pattern, flags, - array.array(WORDSIZE, code).tostring(), - p.pattern.groups-1, p.pattern.groupdict - ) + pattern, flags, + array.array(WORDSIZE, code).tostring(), + p.pattern.groups-1, p.pattern.groupdict + ) |