diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre.py | 13 | ||||
-rw-r--r-- | Lib/sre_compile.py | 8 | ||||
-rw-r--r-- | Lib/sre_parse.py | 66 | ||||
-rw-r--r-- | Lib/test/output/test_sre | 3 |
4 files changed, 49 insertions, 41 deletions
@@ -89,6 +89,10 @@ def _compile(pattern, flags=0): _cache[key] = p return p +def purge(): + # clear pattern cache + _cache.clear() + def _sub(pattern, template, string, count=0): # internal: pattern.sub implementation hook return _subn(pattern, template, string, count)[0] @@ -142,3 +146,12 @@ def _split(pattern, string, maxsplit=0): n = n + 1 append(string[i:]) return s + +# register myself for pickling + +import copy_reg + +def _pickle(p): + return _compile, (p.pattern, p.flags) + +copy_reg.pickle(type(_compile("")), _pickle, _compile) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 0829c00..e48a7eb 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -31,15 +31,15 @@ def _compile(code, pattern, flags): emit(OPCODES[OP_IGNORE[op]]) else: emit(OPCODES[op]) - emit(ord(av)) + emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) def fixup(literal, flags=flags): - return _sre.getlower(ord(literal), flags) + return _sre.getlower(literal, flags) else: emit(OPCODES[op]) - fixup = ord + fixup = lambda x: x skip = len(code); emit(0) for op, av in av: emit(OPCODES[op]) @@ -165,7 +165,7 @@ def _compile_info(code, pattern, flags): if not (flags & SRE_FLAG_IGNORECASE): for op, av in pattern.data: if op is LITERAL: - prefix.append(ord(av)) + prefix.append(av) else: break # add an info block diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index d3dbe00..fb954e9 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -19,6 +19,9 @@ from sre_constants import * # FIXME: should be 65535, but the arraymodule is still broken MAXREPEAT = 32767 +# FIXME: same here +CHARMASK = 0x7fff + SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" @@ -30,14 +33,14 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF") WHITESPACE = string.whitespace ESCAPES = { - r"\a": (LITERAL, chr(7)), - r"\b": (LITERAL, chr(8)), - r"\f": (LITERAL, chr(12)), - r"\n": (LITERAL, chr(10)), - r"\r": (LITERAL, chr(13)), - r"\t": (LITERAL, chr(9)), - r"\v": (LITERAL, chr(11)), - r"\\": (LITERAL, "\\") + r"\a": (LITERAL, 7), + r"\b": (LITERAL, 8), + r"\f": (LITERAL, 12), + r"\n": (LITERAL, 10), + r"\r": (LITERAL, 13), + r"\t": (LITERAL, 9), + r"\v": (LITERAL, 11), + r"\\": (LITERAL, ord("\\")) } CATEGORIES = { @@ -176,9 +179,6 @@ def isdigit(char): def isname(name): # check that group name is a valid string - # FIXME: <fl> this code is really lame. should use a regular - # expression instead, but I seem to have certain bootstrapping - # problems here ;-) if not isident(name[0]): return 0 for char in name: @@ -209,16 +209,14 @@ def _class_escape(source, escape): while source.next in HEXDIGITS: escape = escape + source.get() escape = escape[2:] - # FIXME: support unicode characters! - return LITERAL, chr(int(escape[-4:], 16) & 0xff) + return LITERAL, int(escape[-4:], 16) & CHARMASK elif str(escape[1:2]) in OCTDIGITS: while source.next in OCTDIGITS: escape = escape + source.get() escape = escape[1:] - # FIXME: support unicode characters! - return LITERAL, chr(int(escape[-6:], 8) & 0xff) + return LITERAL, int(escape[-6:], 8) & CHARMASK if len(escape) == 2: - return LITERAL, escape[1] + return LITERAL, ord(escape[1]) except ValueError: pass raise error, "bogus escape: %s" % repr(escape) @@ -236,8 +234,7 @@ def _escape(source, escape, state): while source.next in HEXDIGITS: escape = escape + source.get() escape = escape[2:] - # FIXME: support unicode characters! - return LITERAL, chr(int(escape[-4:], 16) & 0xff) + return LITERAL, int(escape[-4:], 16) & CHARMASK elif escape[1:2] in DIGITS: while 1: group = _group(escape, state.groups) @@ -251,17 +248,14 @@ def _escape(source, escape, state): else: break escape = escape[1:] - # FIXME: support unicode characters! - return LITERAL, chr(int(escape[-6:], 8) & 0xff) + return LITERAL, int(escape[-6:], 8) & CHARMASK if len(escape) == 2: - return LITERAL, escape[1] + return LITERAL, ord(escape[1]) except ValueError: pass raise error, "bogus escape: %s" % repr(escape) - def _branch(pattern, items): - # form a branch operator from a set of items subpattern = SubPattern(pattern) @@ -327,7 +321,7 @@ def _parse(source, state, flags=0): continue if this and this[0] not in SPECIAL_CHARS: - subpattern.append((LITERAL, this)) + subpattern.append((LITERAL, ord(this))) elif this == "[": # character set @@ -345,7 +339,7 @@ def _parse(source, state, flags=0): elif this and this[0] == "\\": code1 = _class_escape(source, this) elif this: - code1 = LITERAL, this + code1 = LITERAL, ord(this) else: raise error, "unexpected end of regular expression" if source.match("-"): @@ -353,17 +347,15 @@ def _parse(source, state, flags=0): this = source.get() if this == "]": set.append(code1) - set.append((LITERAL, "-")) + set.append((LITERAL, ord("-"))) break else: if this[0] == "\\": code2 = _class_escape(source, this) else: - code2 = LITERAL, this + code2 = LITERAL, ord(this) if code1[0] != LITERAL or code2[0] != LITERAL: raise error, "illegal range" - if len(code1[1]) != 1 or len(code2[1]) != 1: - raise error, "illegal range" set.append((RANGE, (code1[1], code2[1]))) else: if code1[0] is IN: @@ -605,17 +597,16 @@ def parse_template(source, pattern): break if not code: this = this[1:] - # FIXME: support unicode characters! - code = LITERAL, chr(int(this[-6:], 8) & 0xff) + code = LITERAL, int(this[-6:], 8) & CHARMASK a(code) else: try: a(ESCAPES[this]) except KeyError: for c in this: - a((LITERAL, c)) + a((LITERAL, ord(c))) else: - a((LITERAL, this)) + a((LITERAL, ord(this))) return p def expand_template(template, match): @@ -623,12 +614,17 @@ def expand_template(template, match): # code instead p = [] a = p.append + sep = match.string[:0] + if type(sep) is type(""): + char = chr + else: + char = unichr for c, s in template: if c is LITERAL: - a(s) + a(char(s)) elif c is MARK: s = match.group(s) if s is None: raise error, "empty group" a(s) - return match.string[:0].join(p) + return sep.join(p) diff --git a/Lib/test/output/test_sre b/Lib/test/output/test_sre index d3732b5..10de93d 100644 --- a/Lib/test/output/test_sre +++ b/Lib/test/output/test_sre @@ -1,6 +1,5 @@ test_sre -test_support -- test failed re module pickle -test_support -- test failed re module cPickle +=== Failed incorrectly ('\\x00ffffffffffffff', '\377', 0, 'found', '\377') === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') === Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') === grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/' |