summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1998-04-03 21:47:12 (GMT)
committerGuido van Rossum <guido@python.org>1998-04-03 21:47:12 (GMT)
commit8430c583da5eb966d1aecf882b6f7e6e31fcc26d (patch)
tree410c14d6b9615d533ea7b8fd8d20557eba26b8e4 /Lib
parent07bcd99873c6a481180ce2e6ccc8aff154f5383c (diff)
downloadcpython-8430c583da5eb966d1aecf882b6f7e6e31fcc26d.zip
cpython-8430c583da5eb966d1aecf882b6f7e6e31fcc26d.tar.gz
cpython-8430c583da5eb966d1aecf882b6f7e6e31fcc26d.tar.bz2
AMK's latest
Diffstat (limited to 'Lib')
-rw-r--r--Lib/re.py12
-rwxr-xr-xLib/test/re_tests.py20
-rw-r--r--Lib/test/test_re.py105
3 files changed, 106 insertions, 31 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 4198773..f6bac08 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -66,8 +66,8 @@ def escape(pattern):
alphanum=string.letters+'_'+string.digits
for char in pattern:
if char not in alphanum:
- if char == '\000': result.append(r'\000')
- else: result.append('\\' + char)
+ if char=='\000': result.append('\\000')
+ else: result.append('\\'+char)
else: result.append(char)
return string.join(result, '')
@@ -132,9 +132,9 @@ class RegexObject:
def subn(self, repl, source, count=0):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
- non-overlapping occurrences of the pattern in string by the
- replacement repl. number is the number of substitutions that
- were made."""
+ non-overlapping occurrences of the pattern in the source
+ string by the replacement repl. number is the number of
+ substitutions that were made."""
if count < 0:
raise error, "negative substitution count"
@@ -174,7 +174,7 @@ class RegexObject:
return (string.join(results, ''), n)
def split(self, source, maxsplit=0):
- """Split \var{string} by the occurrences of the pattern,
+ """Split the \var{source} string by the occurrences of the pattern,
returning a list containing the resulting substrings."""
if maxsplit < 0:
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py
index 9bbfa54..a42857a 100755
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -2,7 +2,7 @@
# -*- mode: python -*-
# $Id$
-# Re test suite and benchmark suite v1.5b2
+# Re test suite and benchmark suite v1.5
# The 3 possible outcomes for each pattern
[SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
@@ -62,23 +62,20 @@ tests = [
('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
-
+
# Test octal escapes
- ('\\1', 'a', SYNTAX_ERROR),
+ ('\\1', 'a', SYNTAX_ERROR), # Backreference
+ ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character
('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
('\\141', 'a', SUCCEED, 'found', 'a'),
('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
- # Test that a literal \0 is handled everywhere
- ('\0', '\0', SUCCEED, 'found', '\0'),
+ # Test \0 is handled everywhere
(r'\0', '\0', SUCCEED, 'found', '\0'),
- ('[\0a]', '\0', SUCCEED, 'found', '\0'),
- ('[a\0]', '\0', SUCCEED, 'found', '\0'),
- ('[^a\0]', '\0', FAIL),
(r'[\0a]', '\0', SUCCEED, 'found', '\0'),
(r'[a\0]', '\0', SUCCEED, 'found', '\0'),
(r'[^a\0]', '\0', FAIL),
-
+
# Test various letter escapes
(r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
(r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
@@ -103,6 +100,8 @@ tests = [
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+ (')', '', SYNTAX_ERROR), # Unmatched right bracket
+ ('', '', SUCCEED, 'found', ''), # Empty pattern
('abc', 'abc', SUCCEED, 'found', 'abc'),
('abc', 'xbc', FAIL),
('abc', 'axc', FAIL),
@@ -393,9 +392,6 @@ tests = [
('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
('[k]', 'ab', FAIL),
-# XXX
-# ('abcd', 'abcd', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'abcd-$&-\\abcd'),
-# ('a(bc)d', 'abcd', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'bc-$1-\\bc'),
('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index ffb5c66..d5b16c6 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -2,6 +2,9 @@
# -*- mode: python -*-
# $Id$
+import sys
+sys.path=['.']+sys.path
+
from test_support import verbose, TestFailed
import re
import sys, os, string, traceback
@@ -9,6 +12,27 @@ import sys, os, string, traceback
# Misc tests from Tim Peters' re.doc
if verbose:
+ print 'Running tests on re.search and re.match'
+
+try:
+ assert re.search('x*', 'axx').span(0) == (0, 0)
+ assert re.search('x*', 'axx').span() == (0, 0)
+ assert re.search('x+', 'axx').span(0) == (1, 3)
+ assert re.search('x+', 'axx').span() == (1, 3)
+ assert re.search('x', 'aaa') == None
+except:
+ raise TestFailed, "re.search"
+
+try:
+ assert re.match('a*', 'xxx').span(0) == (0, 0)
+ assert re.match('a*', 'xxx').span() == (0, 0)
+ assert re.match('x*', 'xxxa').span(0) == (0, 3)
+ assert re.match('x*', 'xxxa').span() == (0, 3)
+ assert re.match('a+', 'xxx') == None
+except:
+ raise TestFailed, "re.search"
+
+if verbose:
print 'Running tests on re.sub'
try:
@@ -19,25 +43,30 @@ try:
return str(int_value + 1)
assert re.sub(r'\d+', bump_num, '08.2 -2 23x99y') == '9.3 -3 24x100y'
+ assert re.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3) == '9.3 -3 23x99y'
assert re.sub('.', lambda m: r"\n", 'x') == '\\n'
assert re.sub('.', r"\n", 'x') == '\n'
s = r"\1\1"
assert re.sub('(.)', s, 'x') == 'xx'
- assert re.sub('(.)', re.escape(s), 'x') == s
+ assert re.sub('(.)', re.escape(s), 'x') == s
assert re.sub('(.)', lambda m: s, 'x') == s
assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
+ assert re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx') == 'xxxx'
assert re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx') == 'xxxx'
+ assert re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx') == 'xxxx'
- assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD'
+ assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D'
assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
+ assert re.sub('^\s*', 'X', 'test') == 'Xtest'
except AssertionError:
raise TestFailed, "re.sub"
+
try:
assert re.sub('a', 'b', 'aaaaa') == 'bbbbb'
assert re.sub('a', 'b', 'aaaaa', 1) == 'baaaa'
@@ -76,6 +105,13 @@ else:
raise TestFailed, "symbolic reference"
try:
+ re.sub('(?P<a>x)', '\g<1a1>', 'xx')
+except re.error, reason:
+ pass
+else:
+ raise TestFailed, "symbolic reference"
+
+try:
re.sub('(?P<a>x)', '\g<ab>', 'xx')
except IndexError, reason:
pass
@@ -104,9 +140,13 @@ try:
assert re.subn("b+", "x", "bbbb BBBB") == ('x BBBB', 1)
assert re.subn("b+", "x", "xyz") == ('xyz', 0)
assert re.subn("b*", "x", "xyz") == ('xxxyxzx', 4)
+ assert re.subn("b*", "x", "xyz", 2) == ('xxxyz', 2)
except AssertionError:
raise TestFailed, "re.subn"
+if verbose:
+ print 'Running tests on re.split'
+
try:
assert re.split(":", ":a:b::c") == ['', 'a', 'b', '', 'c']
assert re.split(":*", ":a:b::c") == ['', 'a', 'b', 'c']
@@ -117,7 +157,6 @@ try:
assert re.split("(b)|(:+)", ":a:b::c") == \
['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c']
assert re.split("(?:b)|(?::+)", ":a:b::c") == ['', 'a', '', '', 'c']
-
except AssertionError:
raise TestFailed, "re.split"
@@ -130,16 +169,55 @@ try:
except AssertionError:
raise TestFailed, "qualified re.split"
+try:
+ # No groups at all
+ m = re.match('a', 'a') ; assert m.groups() == ()
+ # A single group
+ m = re.match('(a)', 'a') ; assert m.groups() == ('a',)
+
+ pat = re.compile('((a)|(b))(c)?')
+ assert pat.match('a').groups() == ('a', 'a', None, None)
+ assert pat.match('b').groups() == ('b', None, 'b', None)
+ assert pat.match('ac').groups() == ('a', 'a', None, 'c')
+ assert pat.match('bc').groups() == ('b', None, 'b', 'c')
+except AssertionError:
+ raise TestFailed, "match .groups() method"
+
+try:
+ # A single group
+ m = re.match('(a)', 'a')
+ assert m.group(0) == 'a' ; assert m.group(0) == 'a'
+ assert m.group(1) == 'a' ; assert m.group(1, 1) == ('a', 'a')
+
+ pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
+ assert pat.match('a').group(1, 2, 3) == ('a', None, None)
+ assert pat.match('b').group('a1', 'b2', 'c3') == (None, 'b', None)
+ assert pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c')
+except AssertionError:
+ raise TestFailed, "match .group() method"
+
+try:
+ p=""
+ for i in range(0, 256):
+ p = p + chr(i)
+ assert re.match(re.escape(chr(i)), chr(i)) != None
+ assert re.match(re.escape(chr(i)), chr(i)).span() == (0,1)
+
+ pat=re.compile( re.escape(p) )
+ assert pat.match(p) != None
+ assert pat.match(p).span() == (0,256)
+except AssertionError:
+ raise TestFailed, "re.escape"
+
+
if verbose:
print 'Pickling a RegexObject instance'
- import pickle
- pat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
- s = pickle.dumps(pat)
- pat = pickle.loads(s)
-if verbose:
- print 'Running tests on re.split'
-
+import pickle
+pat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
+s = pickle.dumps(pat)
+pat = pickle.loads(s)
+
try:
assert re.I == re.IGNORECASE
assert re.L == re.LOCALE
@@ -156,11 +234,13 @@ for flags in [re.I, re.M, re.X, re.S, re.L]:
print 'Exception raised on flag', flags
from re_tests import *
+
if verbose:
print 'Running re_tests test suite'
else:
# To save time, only run the first and last 10 tests
- pass #tests = tests[:10] + tests[-10:]
+ #tests = tests[:10] + tests[-10:]
+ pass
for t in tests:
sys.stdout.flush()
@@ -180,7 +260,7 @@ for t in tests:
print '=== Syntax error:', t
except KeyboardInterrupt: raise KeyboardInterrupt
except:
- print '*** Unexpected error ***'
+ print '*** Unexpected error ***', t
if verbose:
traceback.print_exc(file=sys.stdout)
else:
@@ -250,4 +330,3 @@ for t in tests:
result=obj.search(s)
if result==None:
print '=== Fails on locale-sensitive match', t
-