diff options
author | Guido van Rossum <guido@python.org> | 2000-11-13 17:11:45 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2000-11-13 17:11:45 (GMT) |
commit | 56baca328f7c018c4ae776dd87f44a145f502773 (patch) | |
tree | aa5418fee3c046353db47491d1746a523402fa2e /Lib/dos-8x3/sre_comp.py | |
parent | f6fc875831d0cfbf4077b4d2e1800365a78f7c2e (diff) | |
download | cpython-56baca328f7c018c4ae776dd87f44a145f502773.zip cpython-56baca328f7c018c4ae776dd87f44a145f502773.tar.gz cpython-56baca328f7c018c4ae776dd87f44a145f502773.tar.bz2 |
Removing DOS 8x3 support
Diffstat (limited to 'Lib/dos-8x3/sre_comp.py')
-rw-r--r-- | Lib/dos-8x3/sre_comp.py | 381 |
1 files changed, 0 insertions, 381 deletions
diff --git a/Lib/dos-8x3/sre_comp.py b/Lib/dos-8x3/sre_comp.py deleted file mode 100644 index dc508e5..0000000 --- a/Lib/dos-8x3/sre_comp.py +++ /dev/null @@ -1,381 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# convert template to internal format -# -# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. -# -# See the sre.py file for information on usage and redistribution. -# - -import _sre - -from sre_constants import * - -MAXCODE = 65535 - -def _compile(code, pattern, flags): - # internal: compile a (sub)pattern - emit = code.append - for op, av in pattern: - if op in (LITERAL, NOT_LITERAL): - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - else: - emit(OPCODES[op]) - emit(av) - elif op is IN: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - def fixup(literal, flags=flags): - return _sre.getlower(literal, flags) - else: - emit(OPCODES[op]) - fixup = lambda x: x - skip = len(code); emit(0) - _compile_charset(av, flags, code, fixup) - code[skip] = len(code) - skip - elif op is ANY: - if flags & SRE_FLAG_DOTALL: - emit(OPCODES[ANY_ALL]) - else: - emit(OPCODES[ANY]) - elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): - if flags & SRE_FLAG_TEMPLATE: - raise error, "internal: unsupported template operator" - emit(OPCODES[REPEAT]) - skip = len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - elif _simple(av) and op == MAX_REPEAT: - emit(OPCODES[REPEAT_ONE]) - skip = len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - else: - emit(OPCODES[REPEAT]) - skip = len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - code[skip] = len(code) - skip - if op == MAX_REPEAT: - emit(OPCODES[MAX_UNTIL]) - else: - emit(OPCODES[MIN_UNTIL]) - elif op is SUBPATTERN: - if av[0]: - emit(OPCODES[MARK]) - emit((av[0]-1)*2) - # _compile_info(code, av[1], flags) - _compile(code, av[1], flags) - if av[0]: - emit(OPCODES[MARK]) - emit((av[0]-1)*2+1) - elif op in (SUCCESS, FAILURE): - emit(OPCODES[op]) - elif op in (ASSERT, ASSERT_NOT): - emit(OPCODES[op]) - skip = len(code); emit(0) - if av[0] >= 0: - emit(0) # look ahead - else: - lo, hi = av[1].getwidth() - if lo != hi: - raise error, "look-behind requires fixed-width pattern" - emit(lo) # look behind - _compile(code, av[1], flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - elif op is CALL: - emit(OPCODES[op]) - skip = len(code); emit(0) - _compile(code, av, flags) - emit(OPCODES[SUCCESS]) - code[skip] = len(code) - skip - elif op is AT: - emit(OPCODES[op]) - if flags & SRE_FLAG_MULTILINE: - emit(ATCODES[AT_MULTILINE.get(av, av)]) - else: - emit(ATCODES[av]) - elif op is BRANCH: - emit(OPCODES[op]) - tail = [] - for av in av[1]: - skip = len(code); emit(0) - # _compile_info(code, av, flags) - _compile(code, av, flags) - emit(OPCODES[JUMP]) - tail.append(len(code)); emit(0) - code[skip] = len(code) - skip - emit(0) # end of branch - for tail in tail: - code[tail] = len(code) - tail - elif op is CATEGORY: - emit(OPCODES[op]) - if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) - elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) - else: - emit(CHCODES[av]) - elif op is GROUPREF: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - else: - emit(OPCODES[op]) - emit(av-1) - else: - raise ValueError, ("unsupported operand type", op) - -def _compile_charset(charset, flags, code, fixup=None): - # compile charset subprogram - emit = code.append - if not fixup: - fixup = lambda x: x - for op, av in _optimize_charset(charset, fixup): - emit(OPCODES[op]) - if op is NEGATE: - pass - elif op is LITERAL: - emit(fixup(av)) - elif op is RANGE: - emit(fixup(av[0])) - emit(fixup(av[1])) - elif op is CHARSET: - code.extend(av) - elif op is CATEGORY: - if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) - elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) - else: - emit(CHCODES[av]) - else: - raise error, "internal: unsupported set operator" - emit(OPCODES[FAILURE]) - -def _optimize_charset(charset, fixup): - # internal: optimize character set - out = [] - charmap = [0]*256 - try: - for op, av in charset: - if op is NEGATE: - out.append((op, av)) - elif op is LITERAL: - charmap[fixup(av)] = 1 - elif op is RANGE: - for i in range(fixup(av[0]), fixup(av[1])+1): - charmap[i] = 1 - elif op is CATEGORY: - # FIXME: could append to charmap tail - return charset # cannot compress - except IndexError: - # character set contains unicode characters - return charset - # compress character map - i = p = n = 0 - runs = [] - for c in charmap: - if c: - if n == 0: - p = i - n = n + 1 - elif n: - runs.append((p, n)) - n = 0 - i = i + 1 - if n: - runs.append((p, n)) - if len(runs) <= 2: - # use literal/range - for p, n in runs: - if n == 1: - out.append((LITERAL, p)) - else: - out.append((RANGE, (p, p+n-1))) - if len(out) < len(charset): - return out - else: - # use bitmap - data = [] - m = 1; v = 0 - for c in charmap: - if c: - v = v + m - m = m << 1 - if m > MAXCODE: - data.append(v) - m = 1; v = 0 - out.append((CHARSET, data)) - return out - return charset - -def _simple(av): - # check if av is a "simple" operator - lo, hi = av[2].getwidth() - if lo == 0 and hi == MAXREPEAT: - raise error, "nothing to repeat" - return lo == hi == 1 and av[2][0][0] != SUBPATTERN - -def _compile_info(code, pattern, flags): - # internal: compile an info block. in the current version, - # this contains min/max pattern width, and an optional literal - # prefix or a character map - lo, hi = pattern.getwidth() - if lo == 0: - return # not worth it - # look for a literal prefix - prefix = [] - prefix_skip = 0 - charset = [] # not used - if not (flags & SRE_FLAG_IGNORECASE): - # look for literal prefix - for op, av in pattern.data: - if op is LITERAL: - if len(prefix) == prefix_skip: - prefix_skip = prefix_skip + 1 - prefix.append(av) - elif op is SUBPATTERN and len(av[1]) == 1: - op, av = av[1][0] - if op is LITERAL: - prefix.append(av) - else: - break - else: - break - # if no prefix, look for charset prefix - if not prefix and pattern.data: - op, av = pattern.data[0] - if op is SUBPATTERN and av[1]: - op, av = av[1][0] - if op is LITERAL: - charset.append((op, av)) - elif op is BRANCH: - c = [] - for p in av[1]: - if not p: - break - op, av = p[0] - if op is LITERAL: - c.append((op, av)) - else: - break - else: - charset = c - elif op is BRANCH: - c = [] - for p in av[1]: - if not p: - break - op, av = p[0] - if op is LITERAL: - c.append((op, av)) - else: - break - else: - charset = c - elif op is IN: - charset = av -## if prefix: -## print "*** PREFIX", prefix, prefix_skip -## if charset: -## print "*** CHARSET", charset - # add an info block - emit = code.append - emit(OPCODES[INFO]) - skip = len(code); emit(0) - # literal flag - mask = 0 - if prefix: - mask = SRE_INFO_PREFIX - if len(prefix) == prefix_skip == len(pattern.data): - mask = mask + SRE_INFO_LITERAL - elif charset: - mask = mask + SRE_INFO_CHARSET - emit(mask) - # pattern length - if lo < MAXCODE: - emit(lo) - else: - emit(MAXCODE) - prefix = prefix[:MAXCODE] - if hi < MAXCODE: - emit(hi) - else: - emit(0) - # add literal prefix - if prefix: - emit(len(prefix)) # length - emit(prefix_skip) # skip - code.extend(prefix) - # generate overlap table - table = [-1] + ([0]*len(prefix)) - for i in range(len(prefix)): - table[i+1] = table[i]+1 - while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: - table[i+1] = table[table[i+1]-1]+1 - code.extend(table[1:]) # don't store first entry - elif charset: - _compile_charset(charset, 0, code) - code[skip] = len(code) - skip - -STRING_TYPES = [type("")] - -try: - STRING_TYPES.append(type(unicode(""))) -except NameError: - pass - -def _code(p, flags): - - flags = p.pattern.flags | flags - code = [] - - # compile info block - _compile_info(code, p, flags) - - # compile the pattern - _compile(code, p.data, flags) - - code.append(OPCODES[SUCCESS]) - - return code - -def compile(p, flags=0): - # internal: convert pattern list to internal format - - if type(p) in STRING_TYPES: - import sre_parse - pattern = p - p = sre_parse.parse(p, flags) - else: - pattern = None - - code = _code(p, flags) - - # print code - - # FIXME: <fl> get rid of this limitation! - assert p.pattern.groups <= 100,\ - "sorry, but this version only supports 100 named groups" - - # map in either direction - groupindex = p.pattern.groupdict - indexgroup = [None] * p.pattern.groups - for k, i in groupindex.items(): - indexgroup[i] = k - - return _sre.compile( - pattern, flags, code, - p.pattern.groups-1, - groupindex, indexgroup - ) |