summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-08-13 22:34:14 (GMT)
committerGuido van Rossum <guido@python.org>1997-08-13 22:34:14 (GMT)
commit95e8053a9ff42a544197e562fdf4c462fc34e8b4 (patch)
tree06a4d3c599317f90f7a73027b1648bece2a059b2 /Lib
parenta74ef66ac82edd5a587606daef57ff6c26279280 (diff)
downloadcpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.zip
cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.gz
cpython-95e8053a9ff42a544197e562fdf4c462fc34e8b4.tar.bz2
1.5a3 prerelease 1 from AMK
Diffstat (limited to 'Lib')
-rw-r--r--Lib/re.py35
-rwxr-xr-xLib/test/re_tests.py15
-rw-r--r--Lib/test/regex_tests.py8
-rw-r--r--Lib/test/test_re.py13
4 files changed, 57 insertions, 14 deletions
diff --git a/Lib/re.py b/Lib/re.py
index fd7a02c..d1df766 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -317,10 +317,19 @@ class Eol(Instruction):
class Set(Instruction):
name = 'set'
- def __init__(self, set):
+ def __init__(self, set, flags=0):
self.set = set
- Instruction.__init__(self, chr(3), 33)
+ if flags & IGNORECASE: self.set=map(string.lower, self.set)
+ if len(set)==1:
+ # If only one element, use the "exact" opcode (it'll be faster)
+ Instruction.__init__(self, chr(4), 2)
+ else:
+ # Use the "set" opcode
+ Instruction.__init__(self, chr(3), 33)
def assemble(self, position, labels):
+ if len(self.set)==1:
+ # If only one character in set, generate an "exact" opcode
+ return self.opcode + self.set[0]
result = self.opcode
temp = 0
for i, c in map(lambda x: (x, chr(x)), range(256)):
@@ -333,14 +342,16 @@ class Set(Instruction):
def __repr__(self):
result = '%-15s' % (self.name)
self.set.sort()
+ # XXX this should print more intelligently
for char in self.set:
result = result + char
return result
class Exact(Instruction):
name = 'exact'
- def __init__(self, char):
+ def __init__(self, char, flags):
self.char = char
+ if flags & IGNORECASE: self.char=string.lower(self.char)
Instruction.__init__(self, chr(4), 2)
def assemble(self, position, labels):
return self.opcode + self.char
@@ -881,7 +892,7 @@ def compile(pattern, flags=0):
escape_type, value, index = expand_escape(pattern, index)
if escape_type == CHAR:
- stack.append([Exact(value)])
+ stack.append([Exact(value, flags)])
lastop = '\\' + value
elif escape_type == MEMORY_REFERENCE:
@@ -1306,7 +1317,7 @@ def compile(pattern, flags=0):
elif char == '.':
if flags & DOTALL:
- stack.append([Set(map(chr, range(256)))])
+ stack.append([Set(map(chr, range(256)), flags)])
else:
stack.append([AnyChar()])
lastop = '.'
@@ -1336,12 +1347,12 @@ def compile(pattern, flags=0):
index = end + 1
# do not change lastop
else:
- stack.append([Exact(char)])
+ stack.append([Exact(char, flags)])
lastop = '#'
elif char in string.whitespace:
if not (flags & VERBOSE):
- stack.append([Exact(char)])
+ stack.append([Exact(char, flags)])
lastop = char
elif char == '[':
@@ -1449,22 +1460,25 @@ def compile(pattern, flags=0):
index = index + 1
if negate:
+ # If case is being ignored, then both upper- and lowercase
+ # versions of the letters must be excluded.
+ if flags & IGNORECASE: set=set+map(string.upper, set)
notset = []
for char in map(chr, range(256)):
if char not in set:
notset.append(char)
if len(notset) == 0:
raise error, 'empty negated set'
- stack.append([Set(notset)])
+ stack.append([Set(notset, flags)])
else:
if len(set) == 0:
raise error, 'empty set'
- stack.append([Set(set)])
+ stack.append([Set(set, flags)])
lastop = '[]'
else:
- stack.append([Exact(char)])
+ stack.append([Exact(char, flags)])
lastop = char
code = []
@@ -1485,6 +1499,7 @@ def compile(pattern, flags=0):
code.append(Label(label))
label = label + 1
code.append(End())
+# print code
return RegexObject(pattern, flags, code, register, groupindex)
# Replace expand_escape and _expand functions with their C equivalents.
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py
index eb50558..9143938 100755
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -318,6 +318,7 @@ tests = [
# ('((((((((((a))))))))))\\41', 'aa', FAIL),
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
+ ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
('multiple words of text', 'uh-uh', FAIL),
('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
@@ -448,7 +449,6 @@ tests = [
('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
#('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
#('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
- ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
@@ -506,10 +506,21 @@ xyzabc
('a.b', 'a\nb', FAIL),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
- # test \w, etc.
+ # test \w, etc. both inside and outside character classes
('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
+ ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
+ ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
('[\\d-x]', '-', SYNTAX_ERROR),
+ (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+ (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+
+ (r'\xff', '\377', SUCCEED, 'found', chr(255)),
+ (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
+ (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+ ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+ (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
+ (r'[\t][\n][\v][\r][\f][\a][\A][\b][\B][\Z][\g]', '\t\n\v\r\f\aA\bBZg', SUCCEED, 'found', '\t\n\v\r\f\aA\bBZg'),
]
diff --git a/Lib/test/regex_tests.py b/Lib/test/regex_tests.py
index 70ecdab..dcb980a 100644
--- a/Lib/test/regex_tests.py
+++ b/Lib/test/regex_tests.py
@@ -278,6 +278,12 @@ tests = [
('\\([xyz]*\\)x', 'abcx', SUCCEED,
'found+"-"+g1', 'x-'),
('\\(a\\)+b\\|aac', 'aac', SUCCEED,
- 'found+"-"+g1', 'aac-None')
+ 'found+"-"+g1', 'aac-None'),
+('\<a', 'a', SUCCEED, 'found', 'a'),
+('\<a', '!', FAIL),
+('a\<b', 'ab', FAIL),
+('a\>', 'ab', FAIL),
+('a\>', 'a!', SUCCEED, 'found', 'a'),
+('a\>', 'a', SUCCEED, 'found', 'a'),
]
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 1581856..c4b21cf 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -31,6 +31,10 @@ try:
assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
+ assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD'
+ assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
+ assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
+
except AssertionError:
raise TestFailed, "re.sub"
@@ -120,7 +124,6 @@ if verbose:
print 'Running re_tests test suite'
for t in tests:
- print t
sys.stdout.flush()
pattern=s=outcome=repl=expected=None
if len(t)==5:
@@ -136,6 +139,7 @@ for t in tests:
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
else:
print '=== Syntax error:', t
+ except KeyboardInterrupt: raise KeyboardInterrupt
except:
print '*** Unexpected error ***'
if verbose:
@@ -182,3 +186,10 @@ for t in tests:
print repr(repl)+' should be '+repr(expected)
else:
print '=== Failed incorrectly', t
+
+ # Try the match with IGNORECASE enabled, and check that it
+ # still succeeds.
+ obj=re.compile(pattern, re.IGNORECASE)
+ result=obj.search(s)
+ if result==None:
+ print '=== Fails on case-insensitive match', t