diff options
-rw-r--r-- | Lib/sre.py | 14 | ||||
-rw-r--r-- | Lib/sre_compile.py | 2 | ||||
-rw-r--r-- | Lib/sre_constants.py | 3 | ||||
-rw-r--r-- | Lib/sre_parse.py | 49 | ||||
-rw-r--r-- | Lib/test/test_sre.py | 2 |
5 files changed, 39 insertions, 31 deletions
@@ -17,9 +17,14 @@ import sre_compile import sre_parse -__all__ = ["match","search","sub","subn","split","findall","compile", - "purge","template","escape","I","L","M","S","X","U","IGNORECASE", - "LOCALE","MULTILINE","DOTALL","VERBOSE","UNICODE","error"] +# public symbols +__all__ = [ "match", "search", "sub", "subn", "split", "findall", + "compile", "purge", "template", "escape", "I", "L", "M", "S", "X", + "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", + "UNICODE", "error" ] + +# this module works under 1.5.2 and later. don't use string methods +import string # flags I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case @@ -88,7 +93,6 @@ def purge(): def template(pattern, flags=0): "Compile a template pattern, returning a pattern object" - return _compile(pattern, flags|T) def escape(pattern): @@ -111,7 +115,7 @@ _MAXCACHE = 100 def _join(seq, sep): # internal: join into string having the same type as sep - return sep[:0].join(seq) + return string.join(seq, sep[:0]) def _compile(*key): # internal: compile pattern diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 18dee88..ab2a2cc 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -12,8 +12,6 @@ import _sre from sre_constants import * -__all__ = ["compile"] - assert _sre.MAGIC == MAGIC, "SRE module mismatch" MAXCODE = 65535 diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index 7aedab1..b429a33 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -195,11 +195,12 @@ SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) SRE_INFO_CHARSET = 4 # pattern starts with character from given set if __name__ == "__main__": + import string def dump(f, d, prefix): items = d.items() items.sort(lambda a, b: cmp(a[1], b[1])) for k, v in items: - f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) + f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v)) f = open("sre_constants.h", "w") f.write("""\ /* diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index fc3c492..36036b6 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -10,13 +10,11 @@ # XXX: show string offset and offending character for all errors -import sys +# this module works under 1.5.2 and later. don't use string methods +import string, sys from sre_constants import * -__all__ = ["Pattern","SubPattern","Tokenizer","parse","parse_template", - "expand_template"] - SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" @@ -28,13 +26,13 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF") WHITESPACE = tuple(" \t\n\r\v\f") ESCAPES = { - r"\a": (LITERAL, 7), - r"\b": (LITERAL, 8), - r"\f": (LITERAL, 12), - r"\n": (LITERAL, 10), - r"\r": (LITERAL, 13), - r"\t": (LITERAL, 9), - r"\v": (LITERAL, 11), + r"\a": (LITERAL, ord("\a")), + r"\b": (LITERAL, ord("\b")), + r"\f": (LITERAL, ord("\f")), + r"\n": (LITERAL, ord("\n")), + r"\r": (LITERAL, ord("\r")), + r"\t": (LITERAL, ord("\t")), + r"\v": (LITERAL, ord("\v")), r"\\": (LITERAL, ord("\\")) } @@ -63,6 +61,13 @@ FLAGS = { "u": SRE_FLAG_UNICODE, } +# figure out best way to convert hex/octal numbers to integers +try: + int("10", 8) + atoi = int # 2.0 and later +except TypeError: + atoi = string.atoi # 1.5.2 + class Pattern: # master pattern object. keeps track of global attributes def __init__(self): @@ -219,7 +224,7 @@ def isname(name): def _group(escape, groups): # check if the escape string represents a valid group try: - gid = int(escape[1:]) + gid = atoi(escape[1:]) if gid and gid < groups: return gid except ValueError: @@ -242,13 +247,13 @@ def _class_escape(source, escape): escape = escape[2:] if len(escape) != 2: raise error, "bogus escape: %s" % repr("\\" + escape) - return LITERAL, int(escape, 16) & 0xff + return LITERAL, atoi(escape, 16) & 0xff elif str(escape[1:2]) in OCTDIGITS: # octal escape (up to three digits) while source.next in OCTDIGITS and len(escape) < 5: escape = escape + source.get() escape = escape[1:] - return LITERAL, int(escape, 8) & 0xff + return LITERAL, atoi(escape, 8) & 0xff if len(escape) == 2: return LITERAL, ord(escape[1]) except ValueError: @@ -270,12 +275,12 @@ def _escape(source, escape, state): escape = escape + source.get() if len(escape) != 4: raise ValueError - return LITERAL, int(escape[2:], 16) & 0xff + return LITERAL, atoi(escape[2:], 16) & 0xff elif escape[1:2] == "0": # octal escape while source.next in OCTDIGITS and len(escape) < 4: escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff + return LITERAL, atoi(escape[1:], 8) & 0xff elif escape[1:2] in DIGITS: # octal escape *or* decimal group reference (sigh) here = source.tell() @@ -285,7 +290,7 @@ def _escape(source, escape, state): source.next in OCTDIGITS): # got three octal digits; this is an octal escape escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff + return LITERAL, atoi(escape[1:], 8) & 0xff # got at least one decimal digit; this is a group reference group = _group(escape, state.groups) if group: @@ -459,9 +464,9 @@ def _parse(source, state): source.seek(here) continue if lo: - min = int(lo) + min = atoi(lo) if hi: - max = int(hi) + max = atoi(hi) if max < min: raise error, "bad repeat interval" else: @@ -649,7 +654,7 @@ def parse_template(source, pattern): if not name: raise error, "bad group name" try: - index = int(name) + index = atoi(name) except ValueError: if not isname(name): raise error, "bad character in group name" @@ -673,7 +678,7 @@ def parse_template(source, pattern): break if not code: this = this[1:] - code = LITERAL, int(this[-6:], 8) & 0xff + code = LITERAL, atoi(this[-6:], 8) & 0xff a(code) else: try: @@ -702,4 +707,4 @@ def expand_template(template, match): if s is None: raise error, "empty group" a(s) - return sep.join(p) + return string.join(p, sep) diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 89364d5..88c0d62 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -8,7 +8,7 @@ sys.path=['.']+sys.path from test_support import verbose, TestFailed import sre -import sys, os, traceback +import sys, os, string, traceback # # test support |