1.5a3 prerelease 1 from AMK

author: Guido van Rossum <guido@python.org> 1997-08-13 22:34:14 (GMT)
committer: Guido van Rossum <guido@python.org> 1997-08-13 22:34:14 (GMT)
commit: 95e8053a9ff42a544197e562fdf4c462fc34e8b4 (patch)
tree: 06a4d3c599317f90f7a73027b1648bece2a059b2 /Lib
parent: a74ef66ac82edd5a587606daef57ff6c26279280 (diff)
download: cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.zip
cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.gz
cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.bz2
4 files changed, 57 insertions, 14 deletions
diff --git a/Lib/re.py b/Lib/re.py
index fd7a02c..d1df766 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -317,10 +317,19 @@ class Eol(Instruction):
 
 class Set(Instruction):
     name = 'set'
-    def __init__(self, set):
+    def __init__(self, set, flags=0):
 	self.set = set
-	Instruction.__init__(self, chr(3), 33)
+	if flags & IGNORECASE: self.set=map(string.lower, self.set)
+	if len(set)==1: 
+	    # If only one element, use the "exact" opcode (it'll be faster)
+	    Instruction.__init__(self, chr(4), 2)
+	else:
+	    # Use the "set" opcode
+	    Instruction.__init__(self, chr(3), 33)
     def assemble(self, position, labels):
+	if len(self.set)==1:
+	    # If only one character in set, generate an "exact" opcode
+	    return self.opcode + self.set[0]
 	result = self.opcode
 	temp = 0
 	for i, c in map(lambda x: (x, chr(x)), range(256)):
@@ -333,14 +342,16 @@ class Set(Instruction):
     def __repr__(self):
 	result = '%-15s' % (self.name)
 	self.set.sort()
+	# XXX this should print more intelligently
 	for char in self.set:
 	    result = result + char
 	return result
     
 class Exact(Instruction):
     name = 'exact'
-    def __init__(self, char):
+    def __init__(self, char, flags):
 	self.char = char
+	if flags & IGNORECASE: self.char=string.lower(self.char)
 	Instruction.__init__(self, chr(4), 2)
     def assemble(self, position, labels):
 	return self.opcode + self.char
@@ -881,7 +892,7 @@ def compile(pattern, flags=0):
 	    escape_type, value, index = expand_escape(pattern, index)
 
 	    if escape_type == CHAR:
-		stack.append([Exact(value)])
+		stack.append([Exact(value, flags)])
 		lastop = '\\' + value
 		
 	    elif escape_type == MEMORY_REFERENCE:
@@ -1306,7 +1317,7 @@ def compile(pattern, flags=0):
 
 	elif char == '.':
 	    if flags & DOTALL:
-		stack.append([Set(map(chr, range(256)))])
+		stack.append([Set(map(chr, range(256)), flags)])
 	    else:
 		stack.append([AnyChar()])
 	    lastop = '.'
@@ -1336,12 +1347,12 @@ def compile(pattern, flags=0):
 		    index = end + 1
 		# do not change lastop
 	    else:
-		stack.append([Exact(char)])
+		stack.append([Exact(char, flags)])
 		lastop = '#'
 
 	elif char in string.whitespace:
 	    if not (flags & VERBOSE):
-		stack.append([Exact(char)])
+		stack.append([Exact(char, flags)])
 		lastop = char
 
 	elif char == '[':
@@ -1449,22 +1460,25 @@ def compile(pattern, flags=0):
 	    index = index + 1
 
 	    if negate:
+		# If case is being ignored, then both upper- and lowercase
+		# versions of the letters must be excluded.
+		if flags & IGNORECASE: set=set+map(string.upper, set)
 		notset = []
 		for char in map(chr, range(256)):
 		    if char not in set:
 			notset.append(char)
 		if len(notset) == 0:
 		    raise error, 'empty negated set'
-		stack.append([Set(notset)])
+		stack.append([Set(notset, flags)])
 	    else:
 		if len(set) == 0:
 		    raise error, 'empty set'
-		stack.append([Set(set)])
+		stack.append([Set(set, flags)])
 
 	    lastop = '[]'
 
 	else:
-	    stack.append([Exact(char)])
+	    stack.append([Exact(char, flags)])
 	    lastop = char
 
     code = []
@@ -1485,6 +1499,7 @@ def compile(pattern, flags=0):
 	code.append(Label(label))
 	label = label + 1
     code.append(End())
+#    print code
     return RegexObject(pattern, flags, code, register, groupindex)
 
 # Replace expand_escape and _expand functions with their C equivalents.
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py
index eb50558..9143938 100755
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -318,6 +318,7 @@ tests = [
 #    ('((((((((((a))))))))))\\41', 'aa', FAIL),
 #    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
     ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
+    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
     ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
     ('multiple words of text', 'uh-uh', FAIL),
     ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
@@ -448,7 +449,6 @@ tests = [
     ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
     #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
     #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
-    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
     ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
     ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
     ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
@@ -506,10 +506,21 @@ xyzabc
     ('a.b', 'a\nb', FAIL),
     ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
 
-    # test \w, etc.
+    # test \w, etc. both inside and outside character classes
 
     ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
+    ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
     ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
+    ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
     ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
     ('[\\d-x]', '-', SYNTAX_ERROR),
+    (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+    (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+
+    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
+    (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
+    (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+    ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
+    (r'[\t][\n][\v][\r][\f][\a][\A][\b][\B][\Z][\g]', '\t\n\v\r\f\aA\bBZg', SUCCEED, 'found', '\t\n\v\r\f\aA\bBZg'),
 ]
diff --git a/Lib/test/regex_tests.py b/Lib/test/regex_tests.py
index 70ecdab..dcb980a 100644
--- a/Lib/test/regex_tests.py
+++ b/Lib/test/regex_tests.py
@@ -278,6 +278,12 @@ tests = [
 ('\\([xyz]*\\)x', 'abcx', SUCCEED,
  'found+"-"+g1', 'x-'),
 ('\\(a\\)+b\\|aac', 'aac', SUCCEED,
- 'found+"-"+g1', 'aac-None')
+ 'found+"-"+g1', 'aac-None'),
+('\<a', 'a', SUCCEED, 'found', 'a'),
+('\<a', '!', FAIL),
+('a\<b', 'ab', FAIL),
+('a\>', 'ab', FAIL),
+('a\>', 'a!', SUCCEED, 'found', 'a'),
+('a\>', 'a', SUCCEED, 'found', 'a'),
 ]
 
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 1581856..c4b21cf 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -31,6 +31,10 @@ try:
 
     assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
 
+    assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD'
+    assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
+    assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
+
 except AssertionError:
     raise TestFailed, "re.sub"
 
@@ -120,7 +124,6 @@ if verbose:
     print 'Running re_tests test suite'
 
 for t in tests:
-    print t
     sys.stdout.flush()
     pattern=s=outcome=repl=expected=None
     if len(t)==5:
@@ -136,6 +139,7 @@ for t in tests:
 	if outcome==SYNTAX_ERROR: pass	# Expected a syntax error
 	else: 
 	    print '=== Syntax error:', t
+    except KeyboardInterrupt: raise KeyboardInterrupt
     except:
 	print '*** Unexpected error ***'
 	if verbose:
@@ -182,3 +186,10 @@ for t in tests:
 		    print repr(repl)+' should be '+repr(expected)
 	    else:
 		print '=== Failed incorrectly', t
+
+            # Try the match with IGNORECASE enabled, and check that it
+	    # still succeeds.
+            obj=re.compile(pattern, re.IGNORECASE)
+            result=obj.search(s)
+            if result==None:
+                print '=== Fails on case-insensitive match', t
author	Guido van Rossum <guido@python.org>	1997-08-13 22:34:14 (GMT)
committer	Guido van Rossum <guido@python.org>	1997-08-13 22:34:14 (GMT)
commit	95e8053a9ff42a544197e562fdf4c462fc34e8b4 (patch)
tree	06a4d3c599317f90f7a73027b1648bece2a059b2 /Lib
parent	a74ef66ac82edd5a587606daef57ff6c26279280 (diff)
download	cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.zip cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.gz cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.bz2