diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-08-01 18:20:07 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-08-01 18:20:07 (GMT) |
commit | 29c4ba9ada44d62988c62c85c8046985f10a1c85 (patch) | |
tree | 89f38c5859e98069d05491dcd977e338477fd2d2 /Lib | |
parent | 19c6afb42b12c3a50900b4157c8398e01acad91f (diff) | |
download | cpython-29c4ba9ada44d62988c62c85c8046985f10a1c85.zip cpython-29c4ba9ada44d62988c62c85c8046985f10a1c85.tar.gz cpython-29c4ba9ada44d62988c62c85c8046985f10a1c85.tar.bz2 |
SRE 0.9.8: passes the entire test suite
-- reverted REPEAT operator to use "repeat context" strategy
(from 0.8.X), but done right this time.
-- got rid of backtracking stack; use nested SRE_MATCH calls
instead (should probably put it back again in 0.9.9 ;-)
-- properly reset state in scanner mode
-- don't use aggressive inlining by default
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre.py | 13 | ||||
-rw-r--r-- | Lib/sre_compile.py | 64 | ||||
-rw-r--r-- | Lib/sre_constants.py | 18 | ||||
-rw-r--r-- | Lib/sre_parse.py | 6 |
4 files changed, 46 insertions, 55 deletions
@@ -5,8 +5,12 @@ # # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. # +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# # Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 2.0 integration and +# CNRI. Hewlett-Packard provided funding for 1.6 integration and # other compatibility work. # @@ -24,7 +28,7 @@ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE S = DOTALL = sre_compile.SRE_FLAG_DOTALL X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE -# sre extensions (may or may not be in 2.0 final) +# sre extensions (may or may not be in 1.6/2.0 final) T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE U = UNICODE = sre_compile.SRE_FLAG_UNICODE @@ -168,15 +172,14 @@ copy_reg.pickle(type(_compile("")), _pickle, _compile) class Scanner: def __init__(self, lexicon): - from sre_constants import BRANCH, SUBPATTERN, INDEX + from sre_constants import BRANCH, SUBPATTERN self.lexicon = lexicon # combine phrases into a compound pattern p = [] s = sre_parse.Pattern() for phrase, action in lexicon: p.append(sre_parse.SubPattern(s, [ - (SUBPATTERN, (None, sre_parse.parse(phrase))), - (INDEX, len(p)) + (SUBPATTERN, (len(p), sre_parse.parse(phrase))), ])) p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) s.groups = len(p) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index ef26e1c..2d1cbb1 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -5,9 +5,7 @@ # # Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. # -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 2.0 integration and -# other compatibility work. +# See the sre.py file for information on usage and redistribution. # import _sre @@ -124,6 +122,7 @@ def _compile(code, pattern, flags): emit(CHCODES[CATEGORY_NOT_LINEBREAK]) elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): if flags & SRE_FLAG_TEMPLATE: + raise error, "internal: unsupported template operator" emit(OPCODES[REPEAT]) skip = len(code); emit(0) emit(av[0]) @@ -136,9 +135,8 @@ def _compile(code, pattern, flags): if lo == 0: raise error, "nothing to repeat" if 0 and lo == hi == 1 and op is MAX_REPEAT: - # FIXME: <fl> need a better way to figure out when - # it's safe to use this one (in the parser, probably) - emit(OPCODES[MAX_REPEAT_ONE]) + # FIXME: <fl> fast and wrong (but we'll fix that) + emit(OPCODES[REPEAT_ONE]) skip = len(code); emit(0) emit(av[0]) emit(av[1]) @@ -146,29 +144,24 @@ def _compile(code, pattern, flags): emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip else: - emit(OPCODES[op]) + emit(OPCODES[REPEAT]) skip = len(code); emit(0) emit(av[0]) emit(av[1]) - mark = MAXCODE - if av[2][0][0] == SUBPATTERN: - # repeated subpattern - gid, foo = av[2][0][1] - if gid: - mark = (gid-1)*2 - emit(mark) _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip + if op == MAX_REPEAT: + emit(OPCODES[MAX_UNTIL]) + else: + emit(OPCODES[MIN_UNTIL]) elif op is SUBPATTERN: - gid = av[0] - if gid: + if av[0]: emit(OPCODES[MARK]) - emit((gid-1)*2) + emit((av[0]-1)*2) _compile(code, av[1], flags) - if gid: + if av[0]: emit(OPCODES[MARK]) - emit((gid-1)*2+1) + emit((av[0]-1)*2+1) elif op in (SUCCESS, FAILURE): emit(OPCODES[op]) elif op in (ASSERT, ASSERT_NOT): @@ -197,11 +190,10 @@ def _compile(code, pattern, flags): else: emit(ATCODES[av]) elif op is BRANCH: + emit(OPCODES[op]) tail = [] for av in av[1]: - emit(OPCODES[op]) skip = len(code); emit(0) - emit(MAXCODE) # save mark _compile(code, av, flags) emit(OPCODES[JUMP]) tail.append(len(code)); emit(0) @@ -223,9 +215,6 @@ def _compile(code, pattern, flags): else: emit(OPCODES[op]) emit(av-1) - elif op in (MARK, INDEX): - emit(OPCODES[op]) - emit(av) else: raise ValueError, ("unsupported operand type", op) @@ -294,16 +283,7 @@ try: except NameError: pass -def compile(p, flags=0): - # internal: convert pattern list to internal format - - # compile, as necessary - if type(p) in STRING_TYPES: - import sre_parse - pattern = p - p = sre_parse.parse(p, flags) - else: - pattern = None +def _compile1(p, flags): flags = p.pattern.flags | flags code = [] @@ -316,6 +296,20 @@ def compile(p, flags=0): code.append(OPCODES[SUCCESS]) + return code + +def compile(p, flags=0): + # internal: convert pattern list to internal format + + if type(p) in STRING_TYPES: + import sre_parse + pattern = p + p = sre_parse.parse(p, flags) + else: + pattern = None + + code = _compile1(p, flags) + # print code # FIXME: <fl> get rid of this limitation! diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index ef32c32..e595915 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -6,9 +6,7 @@ # # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. # -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 2.0 integration and -# other compatibility work. +# See the sre.py file for information on usage and redistribution. # # should this really be here? @@ -33,15 +31,15 @@ GROUPREF = "groupref" GROUPREF_IGNORE = "groupref_ignore" IN = "in" IN_IGNORE = "in_ignore" -INDEX = "index" INFO = "info" JUMP = "jump" LITERAL = "literal" LITERAL_IGNORE = "literal_ignore" MARK = "mark" MAX_REPEAT = "max_repeat" -MAX_REPEAT_ONE = "max_repeat_one" +MAX_UNTIL = "max_until" MIN_REPEAT = "min_repeat" +MIN_UNTIL = "min_until" NEGATE = "negate" NOT_LITERAL = "not_literal" NOT_LITERAL_IGNORE = "not_literal_ignore" @@ -91,19 +89,19 @@ OPCODES = [ CATEGORY, CHARSET, GROUPREF, GROUPREF_IGNORE, - INDEX, IN, IN_IGNORE, INFO, JUMP, LITERAL, LITERAL_IGNORE, MARK, - MAX_REPEAT, - MAX_REPEAT_ONE, - MIN_REPEAT, + MAX_UNTIL, + MIN_UNTIL, NOT_LITERAL, NOT_LITERAL_IGNORE, NEGATE, RANGE, - REPEAT + REPEAT, + REPEAT_ONE, + SUBPATTERN ] diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 1b56352..299aa0e 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -5,9 +5,7 @@ # # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. # -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 2.0 integration and -# other compatibility work. +# See the sre.py file for information on usage and redistribution. # import string, sys @@ -536,8 +534,6 @@ def _parse(source, state): group = state.getgroup(name) p = _parse_sub(source, state) subpattern.append((SUBPATTERN, (group, p))) - if group is not None: - p.append((INDEX, group)) else: while 1: char = source.get() |