summaryrefslogtreecommitdiffstats
path: root/Lib/test/re_tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/re_tests.py')
-rwxr-xr-xLib/test/re_tests.py140
1 files changed, 125 insertions, 15 deletions
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py
index 85b0267..0cf9241 100755
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
# -*- mode: python -*-
# Re test suite and benchmark suite v1.5
@@ -86,8 +86,9 @@ tests = [
(r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
(r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
# NOTE: not an error under PCRE/PRE:
- (r'\u', '', SYNTAX_ERROR), # A Perl escape
- # (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
+ # (r'\u', '', SYNTAX_ERROR), # A Perl escape
+ (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
+ (r'\xff', '\377', SUCCEED, 'found', chr(255)),
# new \x semantics
(r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
(r'\x00f', '\017', FAIL, 'found', chr(15)),
@@ -105,10 +106,10 @@ tests = [
('a.*b', 'acc\nccb', FAIL),
('a.{4,5}b', 'acc\nccb', FAIL),
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
- ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
- ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+ ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
+ ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
- ('(?s)a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
+ ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
(')', '', SYNTAX_ERROR), # Unmatched right bracket
('', '', SUCCEED, 'found', ''), # Empty pattern
@@ -157,7 +158,7 @@ tests = [
('(abc', '-', SYNTAX_ERROR),
('a]', 'a]', SUCCEED, 'found', 'a]'),
('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
- ('a[\\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+ ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
('a[^bc]d', 'abd', FAIL),
('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
@@ -272,32 +273,124 @@ tests = [
# Test octal escapes/memory references
('\\1', 'a', SYNTAX_ERROR),
+ ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
+ ('\\141', 'a', SUCCEED, 'found', 'a'),
+ ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
# All tests from Perl
+ ('abc', 'abc', SUCCEED, 'found', 'abc'),
+ ('abc', 'xbc', FAIL),
+ ('abc', 'axc', FAIL),
+ ('abc', 'abx', FAIL),
+ ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
+ ('abc', 'ababc', SUCCEED, 'found', 'abc'),
+ ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
+ ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
+ ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
+ ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+ ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
+ ('ab+bc', 'abc', FAIL),
+ ('ab+bc', 'abq', FAIL),
('ab{1,}bc', 'abq', FAIL),
+ ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
('ab{4,5}bc', 'abbbbc', FAIL),
+ ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
+ ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
+ ('ab?bc', 'abbbbc', FAIL),
+ ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
+ ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
+ ('^abc$', 'abcc', FAIL),
+ ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
+ ('^abc$', 'aabc', FAIL),
+ ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
('^', 'abc', SUCCEED, 'found', ''),
('$', 'abc', SUCCEED, 'found', ''),
+ ('a.c', 'abc', SUCCEED, 'found', 'abc'),
+ ('a.c', 'axc', SUCCEED, 'found', 'axc'),
+ ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
+ ('a.*c', 'axyzd', FAIL),
+ ('a[bc]d', 'abc', FAIL),
+ ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
+ ('a[b-d]e', 'abd', FAIL),
+ ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
+ ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
+ ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
('a[b-a]', '-', SYNTAX_ERROR),
+ ('a[]b', '-', SYNTAX_ERROR),
+ ('a[', '-', SYNTAX_ERROR),
+ ('a]', 'a]', SUCCEED, 'found', 'a]'),
+ ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+ ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
+ ('a[^bc]d', 'abd', FAIL),
+ ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
+ ('a[^-b]c', 'a-c', FAIL),
+ ('a[^]b]c', 'a]c', FAIL),
+ ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
+ ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
+ ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
+ ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
('*a', '-', SYNTAX_ERROR),
('(*)b', '-', SYNTAX_ERROR),
+ ('$b', 'b', FAIL),
+ ('a\\', '-', SYNTAX_ERROR),
+ ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
+ ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
+ ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
+ ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
+ ('abc)', '-', SYNTAX_ERROR),
+ ('(abc', '-', SYNTAX_ERROR),
+ ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
+ ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
+ ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
('a**', '-', SYNTAX_ERROR),
('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
+ ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+ ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+ ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
+ (')(', '-', SYNTAX_ERROR),
+ ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
+ ('abc', '', FAIL),
+ ('a*', '', SUCCEED, 'found', ''),
('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
+ ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
+ ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
+ ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
+ ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
+ ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
+ ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
+ ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
('^(ab|cd)e', 'abcde', FAIL),
+ ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
+ ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
+ ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
+ ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
+ ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+ ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+ ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
+ ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
+ ('a[bcd]+dcdcde', 'adcdcde', FAIL),
+ ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
+ ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
+ ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
+ ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
+ ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+ ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
+ ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
+ ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
+ ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
# Python does not have the same rules for \\41 so this is a syntax error
@@ -305,6 +398,15 @@ tests = [
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
+ ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
+ ('multiple words of text', 'uh-uh', FAIL),
+ ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
+ ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
+ ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
+ ('[k]', 'ab', FAIL),
+ ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
+ ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+ ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
('(?i)abc', 'XBC', FAIL),
('(?i)abc', 'AXC', FAIL),
@@ -444,11 +546,12 @@ tests = [
('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
+ ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
# lookbehind: split by : but not if it is escaped by -.
('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
# escaping with \ as we know it
- ('(?<!\\\\):(.*?)(?<!\\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
+ ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
# terminating with ' and escaping with ? as in edifact
("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
@@ -460,7 +563,7 @@ tests = [
# Check odd placement of embedded pattern modifiers
# not an error under PCRE/PRE:
- ('(?i)w', 'W', SUCCEED, 'found', 'W'),
+ ('w(?i)', 'W', SUCCEED, 'found', 'W'),
# ('w(?i)', 'W', SYNTAX_ERROR),
# Comments using the x embedded pattern modifier
@@ -483,7 +586,10 @@ xyz""", SUCCEED, 'found', 'abc'),
xyzabc
123""", SUCCEED, 'found', 'abc'),
+ # using the s embedded pattern modifier
+ ('a.b', 'a\nb', FAIL),
+ ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
# test \w, etc. both inside and outside character classes
@@ -501,8 +607,8 @@ xyzabc
# new \x semantics
(r'\x00ff', '\377', FAIL),
# (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
- (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
- ('\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
+ (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+ ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
(r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
@@ -521,7 +627,7 @@ xyzabc
# bug 114033: nothing to repeat
(r'(x?)?', 'x', SUCCEED, 'found', 'x'),
# bug 115040: rescan if flags are modified inside pattern
- (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
+ (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
# bug 115618: negative lookahead
(r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
# bug 116251: character class bug
@@ -555,10 +661,14 @@ xyzabc
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
]
-u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
-tests.extend([
+try:
+ u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
+except (SyntaxError, ValueError):
+ pass
+else:
+ tests.extend([
# bug 410271: \b broken under locales
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
(r'(?u)\b.\b', u, SUCCEED, 'found', u),
(r'(?u)\w', u, SUCCEED, 'found', u),
-])
+ ])