diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre.py | 51 | ||||
-rw-r--r-- | Lib/sre_compile.py | 6 | ||||
-rw-r--r-- | Lib/sre_constants.py | 11 | ||||
-rw-r--r-- | Lib/sre_parse.py | 18 | ||||
-rw-r--r-- | Lib/test/test_sre.py | 16 |
5 files changed, 64 insertions, 38 deletions
@@ -3,7 +3,7 @@ # # re-compatible interface for the sre matching engine # -# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # This version of the SRE library can be redistributed under CNRI's # Python 1.6 license. For any other use, please contact Secret Labs @@ -14,23 +14,22 @@ # other compatibility work. # -# FIXME: change all FIXME's to XXX ;-) - import sre_compile import sre_parse import string # flags -I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE -L = LOCALE = sre_compile.SRE_FLAG_LOCALE -M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE -S = DOTALL = sre_compile.SRE_FLAG_DOTALL -X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE +I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case +L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale +U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale +M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline +S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline +X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments -# sre extensions (may or may not be in 1.6/2.0 final) -T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE -U = UNICODE = sre_compile.SRE_FLAG_UNICODE +# sre extensions (experimental, don't rely on these) +T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking +DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation # sre exception error = sre_compile.error @@ -38,36 +37,60 @@ error = sre_compile.error # -------------------------------------------------------------------- # public interface -# FIXME: add docstrings - def match(pattern, string, flags=0): + """Try to apply the pattern at the start of the string, returning + a match object, or None if no match was found.""" return _compile(pattern, flags).match(string) def search(pattern, string, flags=0): + """Scan through string looking for a match to the pattern, returning + a match object, or None if no match was found.""" return _compile(pattern, flags).search(string) def sub(pattern, repl, string, count=0): + """Return the string obtained by replacing the leftmost + non-overlapping occurrences of the pattern in string by the + replacement repl""" return _compile(pattern, 0).sub(repl, string, count) def subn(pattern, repl, string, count=0): + """Return a 2-tuple containing (new_string, number). + new_string is the string obtained by replacing the leftmost + non-overlapping occurrences of the pattern in the source + string by the replacement repl. number is the number of + substitutions that were made.""" return _compile(pattern, 0).subn(repl, string, count) def split(pattern, string, maxsplit=0): + """Split the source string by the occurrences of the pattern, + returning a list containing the resulting substrings.""" return _compile(pattern, 0).split(string, maxsplit) def findall(pattern, string, maxsplit=0): + """Return a list of all non-overlapping matches in the string. + + If one or more groups are present in the pattern, return a + list of groups; this will be a list of tuples if the pattern + has more than one group. + + Empty matches are included in the result.""" return _compile(pattern, 0).findall(string, maxsplit) def compile(pattern, flags=0): + "Compile a regular expression pattern, returning a pattern object." return _compile(pattern, flags) def purge(): + "Clear the regular expression cache" _cache.clear() def template(pattern, flags=0): + "Compile a template pattern, returning a pattern object" + return _compile(pattern, flags|T) def escape(pattern): + "Escape all non-alphanumeric characters in pattern." s = list(pattern) for i in range(len(pattern)): c = pattern[i] @@ -204,7 +227,7 @@ class Scanner: break action = self.lexicon[m.lastindex][1] if callable(action): - self.match = match + self.match = m action = action(self, m.group()) if action is not None: append(action) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index dc508e5..adab767 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -3,7 +3,7 @@ # # convert template to internal format # -# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. +# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # @@ -176,7 +176,7 @@ def _optimize_charset(charset, fixup): for i in range(fixup(av[0]), fixup(av[1])+1): charmap[i] = 1 elif op is CATEGORY: - # FIXME: could append to charmap tail + # XXX: could append to charmap tail return charset # cannot compress except IndexError: # character set contains unicode characters @@ -364,7 +364,7 @@ def compile(p, flags=0): # print code - # FIXME: <fl> get rid of this limitation! + # XXX: <fl> get rid of this limitation! assert p.pattern.groups <= 100,\ "sorry, but this version only supports 100 named groups" diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index ea649c0..a5e4bb8 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -4,7 +4,7 @@ # various symbols used by the regular expression engine. # run this script to update the _sre include files! # -# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # @@ -54,10 +54,12 @@ SUBPATTERN = "subpattern" # positions AT_BEGINNING = "at_beginning" AT_BEGINNING_LINE = "at_beginning_line" +AT_BEGINNING_STRING = "at_beginning_string" AT_BOUNDARY = "at_boundary" AT_NON_BOUNDARY = "at_non_boundary" AT_END = "at_end" AT_END_LINE = "at_end_line" +AT_END_STRING = "at_end_string" # categories CATEGORY_DIGIT = "category_digit" @@ -109,8 +111,8 @@ OPCODES = [ ] ATCODES = [ - AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY, - AT_NON_BOUNDARY, AT_END, AT_END_LINE + AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, + AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING ] CHCODES = [ @@ -178,6 +180,7 @@ SRE_FLAG_MULTILINE = 8 # treat target as multiline string SRE_FLAG_DOTALL = 16 # treat target as a single string SRE_FLAG_UNICODE = 32 # use unicode locale SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments +SRE_FLAG_DEBUG = 128 # debugging # flags for INFO primitive SRE_INFO_PREFIX = 1 # has prefix @@ -201,7 +204,7 @@ if __name__ == "__main__": * NOTE: This file is generated by sre_constants.py. If you need * to change anything in here, edit sre_constants.py and run it. * - * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * * See the _sre.c file for information on usage and redistribution. */ diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 5334e06..a21fd61 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -3,7 +3,7 @@ # # convert re-style regular expression to sre pattern # -# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # @@ -34,7 +34,7 @@ ESCAPES = { } CATEGORIES = { - r"\A": (AT, AT_BEGINNING), # start of string + r"\A": (AT, AT_BEGINNING_STRING), # start of string r"\b": (AT, AT_BOUNDARY), r"\B": (AT, AT_NON_BOUNDARY), r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), @@ -43,7 +43,7 @@ CATEGORIES = { r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), - r"\Z": (AT, AT_END), # end of string + r"\Z": (AT, AT_END_STRING), # end of string } FLAGS = { @@ -421,13 +421,13 @@ def _parse(source, state): code1 = code1[1][0] set.append(code1) - # FIXME: <fl> move set optimization to compiler! + # XXX: <fl> should move set optimization to compiler! if len(set)==1 and set[0][0] is LITERAL: subpattern.append(set[0]) # optimization elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: subpattern.append((NOT_LITERAL, set[1][1])) # optimization else: - # FIXME: <fl> add charmap optimization + # XXX: <fl> should add charmap optimization here subpattern.append((IN, set)) elif this and this[0] in REPEAT_CHARS: @@ -457,7 +457,7 @@ def _parse(source, state): min = int(lo) if hi: max = int(hi) - # FIXME: <fl> check that hi >= lo! + # XXX: <fl> check that hi >= lo ??? else: raise error, "not supported" # figure out which item to repeat @@ -601,7 +601,8 @@ def parse(str, flags=0, pattern=None): elif tail: raise error, "bogus characters at end of regular expression" - # p.dump() + if flags & SRE_FLAG_DEBUG: + p.dump() if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: # the VERBOSE flag was switched on inside the pattern. to be @@ -672,8 +673,7 @@ def parse_template(source, pattern): return p def expand_template(template, match): - # FIXME: <fl> this is sooooo slow. drop in the slicelist - # code instead + # XXX: <fl> this is sooooo slow. drop in the slicelist code instead p = [] a = p.append sep = match.string[:0] diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 9c01c66..b9692a1 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -47,12 +47,12 @@ if verbose: print 'Running tests on character literals' for i in [0, 8, 16, 32, 64, 127, 128, 255]: - test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1) - test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1) - test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1) - test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1) - test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1) - test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1) + test(r"""sre.match(r"\%03o" % i, chr(i)) != None""", 1) + test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") != None""", 1) + test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") != None""", 1) + test(r"""sre.match(r"\x%02x" % i, chr(i)) != None""", 1) + test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") != None""", 1) + test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") != None""", 1) test(r"""sre.match("\911", "")""", None, sre.error) # @@ -197,11 +197,11 @@ if verbose: p = "" for i in range(0, 256): p = p + chr(i) - test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1) + test(r"""sre.match(sre.escape(chr(i)), chr(i)) != None""", 1) test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1)) pat = sre.compile(sre.escape(p)) -test(r"""pat.match(p) is not None""", 1) +test(r"""pat.match(p) != None""", 1) test(r"""pat.match(p).span()""", (0,256)) if verbose: |