summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sre.py51
-rw-r--r--Lib/sre_compile.py6
-rw-r--r--Lib/sre_constants.py11
-rw-r--r--Lib/sre_parse.py18
-rw-r--r--Lib/test/test_sre.py16
5 files changed, 64 insertions, 38 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index 6dea5c4..8d03e92 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -3,7 +3,7 @@
#
# re-compatible interface for the sre matching engine
#
-# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
#
# This version of the SRE library can be redistributed under CNRI's
# Python 1.6 license. For any other use, please contact Secret Labs
@@ -14,23 +14,22 @@
# other compatibility work.
#
-# FIXME: change all FIXME's to XXX ;-)
-
import sre_compile
import sre_parse
import string
# flags
-I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
-L = LOCALE = sre_compile.SRE_FLAG_LOCALE
-M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
-S = DOTALL = sre_compile.SRE_FLAG_DOTALL
-X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
+I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
+L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
+U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
+M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
+S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
+X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
-# sre extensions (may or may not be in 1.6/2.0 final)
-T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE
+# sre extensions (experimental, don't rely on these)
+T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
+DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
# sre exception
error = sre_compile.error
@@ -38,36 +37,60 @@ error = sre_compile.error
# --------------------------------------------------------------------
# public interface
-# FIXME: add docstrings
-
def match(pattern, string, flags=0):
+ """Try to apply the pattern at the start of the string, returning
+ a match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
def search(pattern, string, flags=0):
+ """Scan through string looking for a match to the pattern, returning
+ a match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0):
+ """Return the string obtained by replacing the leftmost
+ non-overlapping occurrences of the pattern in string by the
+ replacement repl"""
return _compile(pattern, 0).sub(repl, string, count)
def subn(pattern, repl, string, count=0):
+ """Return a 2-tuple containing (new_string, number).
+ new_string is the string obtained by replacing the leftmost
+ non-overlapping occurrences of the pattern in the source
+ string by the replacement repl. number is the number of
+ substitutions that were made."""
return _compile(pattern, 0).subn(repl, string, count)
def split(pattern, string, maxsplit=0):
+ """Split the source string by the occurrences of the pattern,
+ returning a list containing the resulting substrings."""
return _compile(pattern, 0).split(string, maxsplit)
def findall(pattern, string, maxsplit=0):
+ """Return a list of all non-overlapping matches in the string.
+
+ If one or more groups are present in the pattern, return a
+ list of groups; this will be a list of tuples if the pattern
+ has more than one group.
+
+ Empty matches are included in the result."""
return _compile(pattern, 0).findall(string, maxsplit)
def compile(pattern, flags=0):
+ "Compile a regular expression pattern, returning a pattern object."
return _compile(pattern, flags)
def purge():
+ "Clear the regular expression cache"
_cache.clear()
def template(pattern, flags=0):
+ "Compile a template pattern, returning a pattern object"
+
return _compile(pattern, flags|T)
def escape(pattern):
+ "Escape all non-alphanumeric characters in pattern."
s = list(pattern)
for i in range(len(pattern)):
c = pattern[i]
@@ -204,7 +227,7 @@ class Scanner:
break
action = self.lexicon[m.lastindex][1]
if callable(action):
- self.match = match
+ self.match = m
action = action(self, m.group())
if action is not None:
append(action)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index dc508e5..adab767 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -3,7 +3,7 @@
#
# convert template to internal format
#
-# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
@@ -176,7 +176,7 @@ def _optimize_charset(charset, fixup):
for i in range(fixup(av[0]), fixup(av[1])+1):
charmap[i] = 1
elif op is CATEGORY:
- # FIXME: could append to charmap tail
+ # XXX: could append to charmap tail
return charset # cannot compress
except IndexError:
# character set contains unicode characters
@@ -364,7 +364,7 @@ def compile(p, flags=0):
# print code
- # FIXME: <fl> get rid of this limitation!
+ # XXX: <fl> get rid of this limitation!
assert p.pattern.groups <= 100,\
"sorry, but this version only supports 100 named groups"
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index ea649c0..a5e4bb8 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -4,7 +4,7 @@
# various symbols used by the regular expression engine.
# run this script to update the _sre include files!
#
-# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
@@ -54,10 +54,12 @@ SUBPATTERN = "subpattern"
# positions
AT_BEGINNING = "at_beginning"
AT_BEGINNING_LINE = "at_beginning_line"
+AT_BEGINNING_STRING = "at_beginning_string"
AT_BOUNDARY = "at_boundary"
AT_NON_BOUNDARY = "at_non_boundary"
AT_END = "at_end"
AT_END_LINE = "at_end_line"
+AT_END_STRING = "at_end_string"
# categories
CATEGORY_DIGIT = "category_digit"
@@ -109,8 +111,8 @@ OPCODES = [
]
ATCODES = [
- AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
- AT_NON_BOUNDARY, AT_END, AT_END_LINE
+ AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
+ AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING
]
CHCODES = [
@@ -178,6 +180,7 @@ SRE_FLAG_MULTILINE = 8 # treat target as multiline string
SRE_FLAG_DOTALL = 16 # treat target as a single string
SRE_FLAG_UNICODE = 32 # use unicode locale
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+SRE_FLAG_DEBUG = 128 # debugging
# flags for INFO primitive
SRE_INFO_PREFIX = 1 # has prefix
@@ -201,7 +204,7 @@ if __name__ == "__main__":
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
*
- * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+ * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 5334e06..a21fd61 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -3,7 +3,7 @@
#
# convert re-style regular expression to sre pattern
#
-# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
@@ -34,7 +34,7 @@ ESCAPES = {
}
CATEGORIES = {
- r"\A": (AT, AT_BEGINNING), # start of string
+ r"\A": (AT, AT_BEGINNING_STRING), # start of string
r"\b": (AT, AT_BOUNDARY),
r"\B": (AT, AT_NON_BOUNDARY),
r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
@@ -43,7 +43,7 @@ CATEGORIES = {
r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
- r"\Z": (AT, AT_END), # end of string
+ r"\Z": (AT, AT_END_STRING), # end of string
}
FLAGS = {
@@ -421,13 +421,13 @@ def _parse(source, state):
code1 = code1[1][0]
set.append(code1)
- # FIXME: <fl> move set optimization to compiler!
+ # XXX: <fl> should move set optimization to compiler!
if len(set)==1 and set[0][0] is LITERAL:
subpattern.append(set[0]) # optimization
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
else:
- # FIXME: <fl> add charmap optimization
+ # XXX: <fl> should add charmap optimization here
subpattern.append((IN, set))
elif this and this[0] in REPEAT_CHARS:
@@ -457,7 +457,7 @@ def _parse(source, state):
min = int(lo)
if hi:
max = int(hi)
- # FIXME: <fl> check that hi >= lo!
+ # XXX: <fl> check that hi >= lo ???
else:
raise error, "not supported"
# figure out which item to repeat
@@ -601,7 +601,8 @@ def parse(str, flags=0, pattern=None):
elif tail:
raise error, "bogus characters at end of regular expression"
- # p.dump()
+ if flags & SRE_FLAG_DEBUG:
+ p.dump()
if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
# the VERBOSE flag was switched on inside the pattern. to be
@@ -672,8 +673,7 @@ def parse_template(source, pattern):
return p
def expand_template(template, match):
- # FIXME: <fl> this is sooooo slow. drop in the slicelist
- # code instead
+ # XXX: <fl> this is sooooo slow. drop in the slicelist code instead
p = []
a = p.append
sep = match.string[:0]
diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py
index 9c01c66..b9692a1 100644
--- a/Lib/test/test_sre.py
+++ b/Lib/test/test_sre.py
@@ -47,12 +47,12 @@ if verbose:
print 'Running tests on character literals'
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
- test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
- test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
- test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
- test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
- test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
- test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
+ test(r"""sre.match(r"\%03o" % i, chr(i)) != None""", 1)
+ test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") != None""", 1)
+ test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") != None""", 1)
+ test(r"""sre.match(r"\x%02x" % i, chr(i)) != None""", 1)
+ test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") != None""", 1)
+ test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") != None""", 1)
test(r"""sre.match("\911", "")""", None, sre.error)
#
@@ -197,11 +197,11 @@ if verbose:
p = ""
for i in range(0, 256):
p = p + chr(i)
- test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
+ test(r"""sre.match(sre.escape(chr(i)), chr(i)) != None""", 1)
test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
pat = sre.compile(sre.escape(p))
-test(r"""pat.match(p) is not None""", 1)
+test(r"""pat.match(p) != None""", 1)
test(r"""pat.match(p).span()""", (0,256))
if verbose: