diff options
author | Guido van Rossum <guido@python.org> | 1997-08-13 22:34:14 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-08-13 22:34:14 (GMT) |
commit | 95e8053a9ff42a544197e562fdf4c462fc34e8b4 (patch) | |
tree | 06a4d3c599317f90f7a73027b1648bece2a059b2 /Lib | |
parent | a74ef66ac82edd5a587606daef57ff6c26279280 (diff) | |
download | cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.zip cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.gz cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.bz2 |
1.5a3 prerelease 1 from AMK
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/re.py | 35 | ||||
-rwxr-xr-x | Lib/test/re_tests.py | 15 | ||||
-rw-r--r-- | Lib/test/regex_tests.py | 8 | ||||
-rw-r--r-- | Lib/test/test_re.py | 13 |
4 files changed, 57 insertions, 14 deletions
@@ -317,10 +317,19 @@ class Eol(Instruction): class Set(Instruction): name = 'set' - def __init__(self, set): + def __init__(self, set, flags=0): self.set = set - Instruction.__init__(self, chr(3), 33) + if flags & IGNORECASE: self.set=map(string.lower, self.set) + if len(set)==1: + # If only one element, use the "exact" opcode (it'll be faster) + Instruction.__init__(self, chr(4), 2) + else: + # Use the "set" opcode + Instruction.__init__(self, chr(3), 33) def assemble(self, position, labels): + if len(self.set)==1: + # If only one character in set, generate an "exact" opcode + return self.opcode + self.set[0] result = self.opcode temp = 0 for i, c in map(lambda x: (x, chr(x)), range(256)): @@ -333,14 +342,16 @@ class Set(Instruction): def __repr__(self): result = '%-15s' % (self.name) self.set.sort() + # XXX this should print more intelligently for char in self.set: result = result + char return result class Exact(Instruction): name = 'exact' - def __init__(self, char): + def __init__(self, char, flags): self.char = char + if flags & IGNORECASE: self.char=string.lower(self.char) Instruction.__init__(self, chr(4), 2) def assemble(self, position, labels): return self.opcode + self.char @@ -881,7 +892,7 @@ def compile(pattern, flags=0): escape_type, value, index = expand_escape(pattern, index) if escape_type == CHAR: - stack.append([Exact(value)]) + stack.append([Exact(value, flags)]) lastop = '\\' + value elif escape_type == MEMORY_REFERENCE: @@ -1306,7 +1317,7 @@ def compile(pattern, flags=0): elif char == '.': if flags & DOTALL: - stack.append([Set(map(chr, range(256)))]) + stack.append([Set(map(chr, range(256)), flags)]) else: stack.append([AnyChar()]) lastop = '.' @@ -1336,12 +1347,12 @@ def compile(pattern, flags=0): index = end + 1 # do not change lastop else: - stack.append([Exact(char)]) + stack.append([Exact(char, flags)]) lastop = '#' elif char in string.whitespace: if not (flags & VERBOSE): - stack.append([Exact(char)]) + stack.append([Exact(char, flags)]) lastop = char elif char == '[': @@ -1449,22 +1460,25 @@ def compile(pattern, flags=0): index = index + 1 if negate: + # If case is being ignored, then both upper- and lowercase + # versions of the letters must be excluded. + if flags & IGNORECASE: set=set+map(string.upper, set) notset = [] for char in map(chr, range(256)): if char not in set: notset.append(char) if len(notset) == 0: raise error, 'empty negated set' - stack.append([Set(notset)]) + stack.append([Set(notset, flags)]) else: if len(set) == 0: raise error, 'empty set' - stack.append([Set(set)]) + stack.append([Set(set, flags)]) lastop = '[]' else: - stack.append([Exact(char)]) + stack.append([Exact(char, flags)]) lastop = char code = [] @@ -1485,6 +1499,7 @@ def compile(pattern, flags=0): code.append(Label(label)) label = label + 1 code.append(End()) +# print code return RegexObject(pattern, flags, code, register, groupindex) # Replace expand_escape and _expand functions with their C equivalents. diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index eb50558..9143938 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -318,6 +318,7 @@ tests = [ # ('((((((((((a))))))))))\\41', 'aa', FAIL), # ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'), ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR), + ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR), ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), ('multiple words of text', 'uh-uh', FAIL), ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), @@ -448,7 +449,6 @@ tests = [ ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'), #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL), #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'), - ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR), ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'), ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'), ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'), @@ -506,10 +506,21 @@ xyzabc ('a.b', 'a\nb', FAIL), ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), - # test \w, etc. + # test \w, etc. both inside and outside character classes ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), + ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'), + ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'), ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'), ('[\\d-x]', '-', SYNTAX_ERROR), + (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), + (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), + + (r'\xff', '\377', SUCCEED, 'found', chr(255)), + (r'\x00ff', '\377', SUCCEED, 'found', chr(255)), + (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), + ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), + (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)), + (r'[\t][\n][\v][\r][\f][\a][\A][\b][\B][\Z][\g]', '\t\n\v\r\f\aA\bBZg', SUCCEED, 'found', '\t\n\v\r\f\aA\bBZg'), ] diff --git a/Lib/test/regex_tests.py b/Lib/test/regex_tests.py index 70ecdab..dcb980a 100644 --- a/Lib/test/regex_tests.py +++ b/Lib/test/regex_tests.py @@ -278,6 +278,12 @@ tests = [ ('\\([xyz]*\\)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'), ('\\(a\\)+b\\|aac', 'aac', SUCCEED, - 'found+"-"+g1', 'aac-None') + 'found+"-"+g1', 'aac-None'), +('\<a', 'a', SUCCEED, 'found', 'a'), +('\<a', '!', FAIL), +('a\<b', 'ab', FAIL), +('a\>', 'ab', FAIL), +('a\>', 'a!', SUCCEED, 'found', 'a'), +('a\>', 'a', SUCCEED, 'found', 'a'), ] diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 1581856..c4b21cf 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -31,6 +31,10 @@ try: assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx' + assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD' + assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a' + assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)) + except AssertionError: raise TestFailed, "re.sub" @@ -120,7 +124,6 @@ if verbose: print 'Running re_tests test suite' for t in tests: - print t sys.stdout.flush() pattern=s=outcome=repl=expected=None if len(t)==5: @@ -136,6 +139,7 @@ for t in tests: if outcome==SYNTAX_ERROR: pass # Expected a syntax error else: print '=== Syntax error:', t + except KeyboardInterrupt: raise KeyboardInterrupt except: print '*** Unexpected error ***' if verbose: @@ -182,3 +186,10 @@ for t in tests: print repr(repl)+' should be '+repr(expected) else: print '=== Failed incorrectly', t + + # Try the match with IGNORECASE enabled, and check that it + # still succeeds. + obj=re.compile(pattern, re.IGNORECASE) + result=obj.search(s) + if result==None: + print '=== Fails on case-insensitive match', t |