From 04a1d74229058d204ce570e3727f438c31c1a176 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 15 Jul 1997 14:38:13 +0000 Subject: Jeffrey's newest --- Lib/re.py | 423 +++++++++++++++++++++++++++++++----------------- Lib/test/output/test_re | 59 +------ Lib/test/re_tests.py | 2 +- Modules/reopmodule.c | 18 +-- 4 files changed, 287 insertions(+), 215 deletions(-) diff --git a/Lib/re.py b/Lib/re.py index 2d24da5..b701bb6 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -153,9 +153,7 @@ class MatchObject: g = self.re.groupindex[g] except (KeyError, TypeError): raise IndexError, ('group "' + g + '" is undefined') - if g >= len(self.regs): - result.append(None) - elif (self.regs[g][0] == -1) or (self.regs[g][1] == -1): + if (self.regs[g][0] == -1) or (self.regs[g][1] == -1): result.append(None) else: result.append(self.string[self.regs[g][0]:self.regs[g][1]]) @@ -525,6 +523,186 @@ def build_fastmap(code, pos=0): # # +[NORMAL, CHARCLASS, REPLACEMENT] = range(3) +[CHAR, MEMORY_REFERENCE, SYNTAX, SET, WORD_BOUNDARY, NOT_WORD_BOUNDARY, + BEGINNING_OF_BUFFER, END_OF_BUFFER] = range(8) + +def expand_escape(pattern, index, context=NORMAL): + if index >= len(pattern): + raise error, 'escape ends too soon' + + elif pattern[index] == 't': + return CHAR, chr(9), index + 1 + + elif pattern[index] == 'n': + return CHAR, chr(10), index + 1 + + elif pattern[index] == 'r': + return CHAR, chr(13), index + 1 + + elif pattern[index] == 'f': + return CHAR, chr(12), index + 1 + + elif pattern[index] == 'a': + return CHAR, chr(7), index + 1 + + elif pattern[index] == 'e': + return CHAR, chr(27), index + 1 + + elif pattern[index] == 'c': + if index + 1 >= len(pattern): + raise error, '\\c must be followed by another character' + elif pattern[index + 1] in 'abcdefghijklmnopqrstuvwxyz': + return CHAR, chr(ord(pattern[index + 1]) - ord('a') + 1), index + 2 + else: + return CHAR, chr(ord(pattern[index + 1]) ^ 64), index + 2 + + elif pattern[index] == 'x': + # CAUTION: this is the Python rule, not the Perl rule! + end = index + while (end < len(pattern)) and (pattern[end] in string.hexdigits): + end = end + 1 + if end == index: + raise error, "\\x must be followed by hex digit(s)" + # let Python evaluate it, so we don't incorrectly 2nd-guess + # what it's doing (and Python in turn passes it on to sscanf, + # so that *it* doesn't incorrectly 2nd-guess what C does!) + char = eval ('"' + pattern[index-2:end] + '"') + assert len(char) == 1 + return CHAR, char, end + + elif pattern[index] == 'b': + if context != NORMAL: + return CHAR, chr(8), index + 1 + else: + return WORD_BOUNDARY, '', index + 1 + + elif pattern[index] == 'B': + if context != NORMAL: + return CHAR, 'B', index + 1 + else: + return NOT_WORD_BOUNDARY, '', index + 1 + + elif pattern[index] == 'A': + if context != NORMAL: + return CHAR, 'A', index + 1 + else: + return BEGINNING_OF_BUFFER, '', index + 1 + + elif pattern[index] == 'Z': + if context != NORMAL: + return 'Z', index + 1 + else: + return END_OF_BUFFER, '', index + 1 + + elif pattern[index] in 'GluLUQE': + raise error, ('\\' + ch + ' is not allowed') + + elif pattern[index] == 'w': + if context == NORMAL: + return SYNTAX, 'word', index + 1 + elif context == CHARCLASS: + set = [] + for char in syntax_table.keys(): + if 'word' in syntax_table[char]: + set.append(char) + return SET, set, index + 1 + else: + return CHAR, 'w', index + 1 + + elif pattern[index] == 'W': + if context == NORMAL: + return NOT_SYNTAX, 'word', index + 1 + elif context == CHARCLASS: + set = [] + for char in syntax_table.keys(): + if 'word' not in syntax_table[char]: + set.append(char) + return SET, set, index + 1 + else: + return CHAR, 'W', index + 1 + + elif pattern[index] == 's': + if context == NORMAL: + return SYNTAX, 'whitespace', index + 1 + elif context == CHARCLASS: + set = [] + for char in syntax_table.keys(): + if 'whitespace' in syntax_table[char]: + set.append(char) + return SET, set, index + 1 + else: + return CHAR, 's', index + 1 + + elif pattern[index] == 'S': + if context == NORMAL: + return NOT_SYNTAX, 'whitespace', index + 1 + elif context == CHARCLASS: + set = [] + for char in syntax_table.keys(): + if 'whitespace' not in syntax_table[char]: + set.append(char) + return SET, set, index + 1 + else: + return CHAR, 'S', index + 1 + + elif pattern[index] == 'd': + if context == NORMAL: + return SYNTAX, 'digit', index + 1 + elif context == CHARCLASS: + set = [] + for char in syntax_table.keys(): + if 'digit' in syntax_table[char]: + set.append(char) + return SET, set, index + 1 + else: + return CHAR, 'd', index + 1 + + elif pattern[index] == 'D': + if context == NORMAL: + return NOT_SYNTAX, 'digit', index + 1 + elif context == CHARCLASS: + set = [] + for char in syntax_table.keys(): + if 'digit' not in syntax_table[char]: + set.append(char) + return SET, set, index + 1 + else: + return CHAR, 'D', index + 1 + + elif pattern[index] in '0123456789': + end = index + while (end < len(pattern)) and (pattern[end] in string.digits): + end = end + 1 + value = pattern[index:end] + + if (len(value) == 3) or ((len(value) == 2) and (value[0] == '0')): + # octal character value + value = string.atoi(value, 8) + if value > 255: + raise error, 'octal char out of range' + return CHAR, chr(value), end + + elif value == '0': + return CHAR, chr(0), end + + elif len(value) > 3: + raise error, ('\\' + value + ' has too many digits') + + else: + # \1-\99 - reference a register + if context == CHARCLASS: + raise error, ('cannot reference a register from ' + 'inside a character class') + value = string.atoi(value) + if value == 0: + raise error, ('register 0 cannot be used ' + 'during match') + return MEMORY_REFERENCE, value, end + + else: + return CHAR, pattern[index], index + 1 + def compile(pattern, flags=0): stack = [] index = 0 @@ -536,118 +714,50 @@ def compile(pattern, flags=0): char = pattern[index] index = index + 1 if char == '\\': - if index < len(pattern): - next = pattern[index] - index = index + 1 - if next == 't': - stack.append([Exact(chr(9))]) - - elif next == 'n': - stack.append([Exact(chr(10))]) - - elif next == 'r': - stack.append([Exact(chr(13))]) - - elif next == 'f': - stack.append([Exact(chr(12))]) - - elif next == 'a': - stack.append([Exact(chr(7))]) - - elif next == 'e': - stack.append([Exact(chr(27))]) + escape_type, value, index = expand_escape(pattern, index) - elif next in '0123456789': - value = next - while (index < len(pattern)) and \ - (pattern[index] in string.digits): - value = value + pattern[index] - index = index + 1 - if (len(value) == 3) or \ - ((len(value) == 2) and (value[0] == '0')): - value = string.atoi(value, 8) - if value > 255: - raise error, 'octal char out of range' - stack.append([Exact(chr(value))]) - elif value == '0': - stack.append([Exact(chr(0))]) - elif len(value) > 3: - raise error, 'too many digits' - else: - value = string.atoi(value) - if value >= register: - raise error, ('cannot reference a register ' - 'not yet used') - elif value == 0: - raise error, ('register 0 cannot be used ' - 'during match') - stack.append([MatchMemory(value)]) - - elif next == 'x': - value = '' - while (index < len(pattern)) and \ - (pattern[index] in string.hexdigits): - value = value + pattern[index] - index = index + 1 - value = string.atoi(value, 16) - if value > 255: - raise error, 'hex char out of range' - stack.append([Exact(chr(value))]) - - elif next == 'c': - if index >= len(pattern): - raise error, '\\c at end of re' - elif pattern[index] in 'abcdefghijklmnopqrstuvwxyz': - stack.append(Exact(chr(ord(pattern[index]) - - ord('a') + 1))) - else: - stack.append(Exact(chr(ord(pattern[index]) ^ 64))) - index = index + 1 - - elif next == 'A': - stack.append([BegBuf()]) - - elif next == 'Z': - stack.append([EndBuf()]) - - elif next == 'b': - stack.append([WordBound()]) - - elif next == 'B': - stack.append([NotWordBound()]) - - elif next == 'w': - stack.append([SyntaxSpec('word')]) - - elif next == 'W': - stack.append([NotSyntaxSpec('word')]) - - elif next == 's': - stack.append([SyntaxSpec('whitespace')]) - - elif next == 'S': - stack.append([NotSyntaxSpec('whitespace')]) - - elif next == 'd': - stack.append([SyntaxSpec('digit')]) - - elif next == 'D': - stack.append([NotSyntaxSpec('digit')]) - - elif next in 'GluLUQE': - # some perl-isms that we don't support - raise error, '\\' + next + ' not supported' + if escape_type == CHAR: + stack.append([Exact(value)]) - else: - stack.append([Exact(pattern[index])]) - + elif escape_type == MEMORY_REFERENCE: + if value >= register: + raise error, ('cannot reference a register ' + 'not yet used') + stack.append([MatchMemory(value)]) + + elif escape_type == BEGINNING_OF_BUFFER: + stack.append([BegBuf()]) + + elif escape_type == END_OF_BUFFER: + stack.append([EndBuf()]) + + elif escape_type == WORD_BOUNDARY: + stack.append([WordBound()]) + + elif escape_type == NOT_WORD_BOUNDARY: + stack.append([NotWordBound()]) + + elif escape_type == SYNTAX: + stack.append([SyntaxSpec(value)]) + + elif escape_type == NOT_SYNTAX: + stack.append([NotSyntaxSpec(value)]) + + elif escape_type == SET: + raise error, 'cannot use set escape type here' + else: - raise error, 'backslash at the end of a string' + raise error, 'unknown escape type' elif char == '|': if len(stack) == 0: - raise error, 'nothing to alternate' + raise error, 'alternate with nothing on the left' + if stack[-1][0].name == '(': + raise error, 'alternate with nothing on the left in the group' + if stack[-1][0].name == '|': + raise error, 'alternates with nothing inbetween them' expr = [] + while (len(stack) != 0) and \ (stack[-1][0].name != '(') and \ (stack[-1][0].name != '|'): @@ -775,12 +885,13 @@ def compile(pattern, flags=0): if len(stack) == 0: raise error, 'too many close parens' + if len(expr) == 0: raise error, 'nothing inside parens' # check to see if alternation used correctly if (expr[-1].name == '|'): - raise error, 'alternation with nothing on the right' + raise error, 'alternate with nothing on the right' # remove markers left by alternation expr = filter(lambda x: x.name != '|', expr) @@ -789,7 +900,7 @@ def compile(pattern, flags=0): need_label = 0 for i in range(len(expr)): if (expr[i].name == 'jump') and (expr[i].label == -1): - expr[i] = JumpOpcode(label) + expr[i] = Jump(label) need_label = 1 if need_label: expr.append(Label(label)) @@ -1033,7 +1144,7 @@ def compile(pattern, flags=0): stack.append([Exact(char)]) elif char in string.whitespace: - if flags & VERBOSE: + if not (flags & VERBOSE): stack.append([Exact(char)]) elif char == '[': @@ -1042,28 +1153,44 @@ def compile(pattern, flags=0): negate = 0 last = '' set = [] + if pattern[index] == '^': negate = 1 index = index + 1 if index >= len(pattern): raise error, 'incomplete set' - if pattern[index] in ']-': - set.append(pattern[index]) - last = pattern[index] - index = index + 1 + while (index < len(pattern)) and (pattern[index] != ']'): next = pattern[index] index = index + 1 if next == '-': + if last == '': + raise error, 'improper use of range in character set' + + start = last + if (index >= len(pattern)) or (pattern[index] == ']'): raise error, 'incomplete range in set' - if last > pattern[index]: + + if pattern[index] == '\\': + escape_type, value, index = expand_escape(pattern, + index + 1, + CHARCLASS) + + if escape_type == CHAR: + end = value + else: + raise error, ('illegal escape in character ' + 'class range') + else: + end = pattern[index] + + if start > end: raise error, 'range arguments out of order in set' - for next in map(chr, \ - range(ord(last), \ - ord(pattern[index]) + 1)): - if next not in set: - set.append(next) + for char in map(chr, range(ord(start), ord(end) + 1)): + if char not in set: + set.append(char) + last = '' index = index + 1 @@ -1071,42 +1198,30 @@ def compile(pattern, flags=0): # expand syntax meta-characters and add to set if index >= len(pattern): raise error, 'incomplete set' - elif (pattern[index] == ']'): - raise error, 'backslash at the end of a set' - elif pattern[index] == 'w': - for next in syntax_table.keys(): - if 'word' in syntax_table[next]: - set.append(next) - elif pattern[index] == 'W': - for next in syntax_table.keys(): - if 'word' not in syntax_table[next]: - set.append(next) - elif pattern[index] == 'd': - for next in syntax_table.keys(): - if 'digit' in syntax_table[next]: - set.append(next) - elif pattern[index] == 'D': - for next in syntax_table.keys(): - if 'digit' not in syntax_table[next]: - set.append(next) - elif pattern[index] == 's': - for next in syntax_table.keys(): - if 'whitespace' in syntax_table[next]: - set.append(next) - elif pattern[index] == 'S': - for next in syntax_table.keys(): - if 'whitespace' not in syntax_table[next]: - set.append(next) + + escape_type, value, index = expand_escape(pattern, + index, + CHARCLASS) + + if escape_type == CHAR: + set.append(value) + last = value + + elif escape_type == SET: + for char in value: + if char not in set: + set.append(char) + last = '' + else: - raise error, 'unknown meta in set' - last = '' - index = index + 1 + raise error, 'illegal escape type in character class' else: if next not in set: set.append(next) last = next - if pattern[index] != ']': + + if (index >= len(pattern)) or ( pattern[index] != ']'): raise error, 'incomplete set' index = index + 1 @@ -1116,8 +1231,12 @@ def compile(pattern, flags=0): for char in map(chr, range(256)): if char not in set: notset.append(char) + if len(notset) == 0: + raise error, 'empty negated set' stack.append([Set(notset)]) else: + if len(set) == 0: + raise error, 'empty set' stack.append([Set(set)]) else: @@ -1132,7 +1251,7 @@ def compile(pattern, flags=0): if len(code) == 0: raise error, 'no code generated' if (code[-1].name == '|'): - raise error, 'alternation with nothing on the right' + raise error, 'alternate with nothing on the right' code = filter(lambda x: x.name != '|', code) need_label = 0 for i in range(len(code)): diff --git a/Lib/test/output/test_re b/Lib/test/output/test_re index 56a225c..7ba8cfa 100644 --- a/Lib/test/output/test_re +++ b/Lib/test/output/test_re @@ -34,21 +34,25 @@ test_re ('a[b-d]e', 'ace', 0, 'found', 'ace') ('a[b-d]', 'aac', 0, 'found', 'ac') ('a[-b]', 'a-', 0, 'found', 'a-') +=== Syntax error: ('a[-b]', 'a-', 0, 'found', 'a-') ('a[b-]', 'a-', 2) ('a[]b', '-', 2) -*** Unexpected error *** ('a[', '-', 2) ('a\\', '-', 2) ('abc)', '-', 2) ('(abc', '-', 2) ('a]', 'a]', 0, 'found', 'a]') ('a[]]b', 'a]b', 0, 'found', 'a]b') +=== Syntax error: ('a[]]b', 'a]b', 0, 'found', 'a]b') ('a[^bc]d', 'aed', 0, 'found', 'aed') ('a[^bc]d', 'abd', 1) ('a[^-b]c', 'adc', 0, 'found', 'adc') +=== Syntax error: ('a[^-b]c', 'adc', 0, 'found', 'adc') ('a[^-b]c', 'a-c', 1) +=== Syntax error: ('a[^-b]c', 'a-c', 1) ('a[^]b]c', 'a]c', 1) ('a[^]b]c', 'adc', 0, 'found', 'adc') +=== Failed incorrectly ('a[^]b]c', 'adc', 0, 'found', 'adc') ('\\ba\\b', 'a-', 0, '"-"', '-') ('\\ba\\b', '-a', 0, '"-"', '-') ('\\ba\\b', '-a-', 0, '"-"', '-') @@ -64,125 +68,76 @@ test_re === Syntax error: ('a(b', 'a(b', 0, 'found+"-"+g1', 'a(b-None') ('a\\(*b', 'ab', 0, 'found', 'ab') ('a\\(*b', 'a((b', 0, 'found', 'a((b') -=== Failed incorrectly ('a\\(*b', 'a((b', 0, 'found', 'a((b') ('a\\\\b', 'a\\b', 0, 'found', 'a\\b') -=== Failed incorrectly ('a\\\\b', 'a\\b', 0, 'found', 'a\\b') ('((a))', 'abc', 0, 'found+"-"+g1+"-"+g2', 'a-a-a') -=== grouping error ('((a))', 'abc', 0, 'found+"-"+g1+"-"+g2', 'a-a-a') 'a-None-None' should be 'a-a-a' ('(a)b(c)', 'abc', 0, 'found+"-"+g1+"-"+g2', 'abc-a-c') -=== grouping error ('(a)b(c)', 'abc', 0, 'found+"-"+g1+"-"+g2', 'abc-a-c') 'abc-None-None' should be 'abc-a-c' ('a+b+c', 'aabbabc', 0, 'found', 'abc') ('(a+|b)*', 'ab', 0, 'found+"-"+g1', 'ab-b') -*** Unexpected error *** ('(a+|b)+', 'ab', 0, 'found+"-"+g1', 'ab-b') -*** Unexpected error *** ('(a+|b)?', 'ab', 0, 'found+"-"+g1', 'a-a') -*** Unexpected error *** (')(', '-', 2) ('[^ab]*', 'cde', 0, 'found', 'cde') ('abc', '', 1) ('a*', '', 0, 'found', '') ('a|b|c|d|e', 'e', 0, 'found', 'e') ('(a|b|c|d|e)f', 'ef', 0, 'found+"-"+g1', 'ef-e') -*** Unexpected error *** ('abcd*efg', 'abcdefg', 0, 'found', 'abcdefg') ('ab*', 'xabyabbbz', 0, 'found', 'ab') ('ab*', 'xayabbbz', 0, 'found', 'a') ('(ab|cd)e', 'abcde', 0, 'found+"-"+g1', 'cde-cd') -*** Unexpected error *** ('[abhgefdc]ij', 'hij', 0, 'found', 'hij') ('^(ab|cd)e', 'abcde', 1, 'xg1y', 'xy') -*** Unexpected error *** ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-') === Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-') ('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b') -*** Unexpected error *** ('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a') -*** Unexpected error *** ('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc') -=== grouping error ('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc') 'abc-None' should be 'abc-bc' ('a([bc]*)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d') -=== grouping error ('a([bc]*)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d') 'abcd-None-None' should be 'abcd-bc-d' ('a([bc]+)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d') -=== grouping error ('a([bc]+)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d') 'abcd-None-None' should be 'abcd-bc-d' ('a([bc]*)(c+d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-b-cd') -=== grouping error ('a([bc]*)(c+d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-b-cd') 'abcd-None-None' should be 'abcd-b-cd' ('a[bcd]*dcdcde', 'adcdcde', 0, 'found', 'adcdcde') ('a[bcd]+dcdcde', 'adcdcde', 1) ('(ab|a)b*c', 'abc', 0, 'found+"-"+g1', 'abc-ab') -*** Unexpected error *** ('((a)(b)c)(d)', 'abcd', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d') -=== grouping error ('((a)(b)c)(d)', 'abcd', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d') 'None-None-None-None' should be 'abc-a-b-d' ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', 0, 'found', 'alpha') ('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-None') -*** Unexpected error *** ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None') -*** Unexpected error *** ('(bc+d$|ef*g.|h?i(j|k))', 'ij', 0, 'found+"-"+g1+"-"+g2', 'ij-ij-j') -*** Unexpected error *** ('(bc+d$|ef*g.|h?i(j|k))', 'effg', 1) -*** Unexpected error *** ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', 1) -*** Unexpected error *** ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None') -*** Unexpected error *** ('(((((((((a)))))))))', 'a', 0, 'found', 'a') ('multiple words of text', 'uh-uh', 1) ('multiple words', 'multiple words, yeah', 0, 'found', 'multiple words') -=== Failed incorrectly ('multiple words', 'multiple words, yeah', 0, 'found', 'multiple words') ('(.*)c(.*)', 'abcde', 0, 'found+"-"+g1+"-"+g2', 'abcde-ab-de') -=== grouping error ('(.*)c(.*)', 'abcde', 0, 'found+"-"+g1+"-"+g2', 'abcde-ab-de') 'abcde-None-None' should be 'abcde-ab-de' -('((.*), (.*))', '(a, b)', 0, 'g2+"-"+g1', 'b-a') -=== grouping error ('((.*), (.*))', '(a, b)', 0, 'g2+"-"+g1', 'b-a') 'None-None' should be 'b-a' +('\\((.*), (.*)\\)', '(a, b)', 0, 'g2+"-"+g1', 'b-a') ('[k]', 'ab', 1) ('a[-]?c', 'ac', 0, 'found', 'ac') +=== Syntax error: ('a[-]?c', 'ac', 0, 'found', 'ac') ('(abc)\\1', 'abcabc', 0, 'g1', 'abc') -=== grouping error ('(abc)\\1', 'abcabc', 0, 'g1', 'abc') 'None' should be 'abc' ('([a-c]*)\\1', 'abcabc', 0, 'g1', 'abc') -=== grouping error ('([a-c]*)\\1', 'abcabc', 0, 'g1', 'abc') 'None' should be 'abc' ('^(.+)?B', 'AB', 0, 'g1', 'A') -=== grouping error ('^(.+)?B', 'AB', 0, 'g1', 'A') 'None' should be 'A' ('(a+).\\1$', 'aaaaa', 0, 'found+"-"+g1', 'aaaaa-aa') -=== grouping error ('(a+).\\1$', 'aaaaa', 0, 'found+"-"+g1', 'aaaaa-aa') 'aaaaa-None' should be 'aaaaa-aa' ('^(a+).\\1$', 'aaaa', 1) ('(abc)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') -=== grouping error ('(abc)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') 'abcabc-None' should be 'abcabc-abc' ('([a-c]+)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') -=== grouping error ('([a-c]+)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') 'abcabc-None' should be 'abcabc-abc' ('(a)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') -=== grouping error ('(a)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') 'aa-None' should be 'aa-a' ('(a+)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') -=== grouping error ('(a+)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') 'aa-None' should be 'aa-a' ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') -=== grouping error ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') 'aa-None' should be 'aa-a' ('(a).+\\1', 'aba', 0, 'found+"-"+g1', 'aba-a') -=== grouping error ('(a).+\\1', 'aba', 0, 'found+"-"+g1', 'aba-a') 'aba-None' should be 'aba-a' ('(a)ba*\\1', 'aba', 0, 'found+"-"+g1', 'aba-a') -=== grouping error ('(a)ba*\\1', 'aba', 0, 'found+"-"+g1', 'aba-a') 'aba-None' should be 'aba-a' ('(aa|a)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a') -*** Unexpected error *** ('(a|aa)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a') -*** Unexpected error *** ('(a+)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a') -=== grouping error ('(a+)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a') 'aaa-None' should be 'aaa-a' ('([abc]*)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') -=== grouping error ('([abc]*)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') 'abcabc-None' should be 'abcabc-abc' ('(a)(b)c|ab', 'ab', 0, 'found+"-"+g1+"-"+g2', 'ab-None-None') ('(a)+x', 'aaax', 0, 'found+"-"+g1', 'aaax-a') -=== grouping error ('(a)+x', 'aaax', 0, 'found+"-"+g1', 'aaax-a') 'aaax-None' should be 'aaax-a' ('([ac])+x', 'aacx', 0, 'found+"-"+g1', 'aacx-c') -=== grouping error ('([ac])+x', 'aacx', 0, 'found+"-"+g1', 'aacx-c') 'aacx-None' should be 'aacx-c' ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') -=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-None' should be 'd:msgs/tdir/sub1/-tdir/' ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', 0, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah') -=== Failed incorrectly ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', 0, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah') ('([^N]*N)+', 'abNNxyzN', 0, 'found+"-"+g1', 'abNNxyzN-xyzN') -=== grouping error ('([^N]*N)+', 'abNNxyzN', 0, 'found+"-"+g1', 'abNNxyzN-xyzN') 'abNNxyzN-None' should be 'abNNxyzN-xyzN' ('([^N]*N)+', 'abNNxyz', 0, 'found+"-"+g1', 'abNN-N') -=== grouping error ('([^N]*N)+', 'abNNxyz', 0, 'found+"-"+g1', 'abNN-N') 'abNN-None' should be 'abNN-N' ('([abc]*)x', 'abcx', 0, 'found+"-"+g1', 'abcx-abc') -=== grouping error ('([abc]*)x', 'abcx', 0, 'found+"-"+g1', 'abcx-abc') 'abcx-None' should be 'abcx-abc' ('([abc]*)x', 'abc', 1) ('([xyz]*)x', 'abcx', 0, 'found+"-"+g1', 'x-') -=== grouping error ('([xyz]*)x', 'abcx', 0, 'found+"-"+g1', 'x-') 'x-None' should be 'x-' ('(a)+b|aac', 'aac', 0, 'found+"-"+g1', 'aac-None') diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index fc1fd57..a43b4ac 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -218,7 +218,7 @@ tests = [ 'found', 'multiple words'), ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), -('((.*), (.*))', '(a, b)', SUCCEED, +('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), ('[k]', 'ab', FAIL), ('a[-]?c', 'ac', SUCCEED, diff --git a/Modules/reopmodule.c b/Modules/reopmodule.c index 2ac467f..9b928f5 100644 --- a/Modules/reopmodule.c +++ b/Modules/reopmodule.c @@ -87,13 +87,13 @@ reop_match(self, args) char *string; int fastmaplen, stringlen; int can_be_null, anchor, i; - int num_regs, flags, pos, result; + int flags, pos, result; struct re_pattern_buffer bufp; struct re_registers re_regs; if (!PyArg_Parse(args, "(s#iiis#is#i)", &(bufp.buffer), &(bufp.allocated), - &num_regs, &flags, &can_be_null, + &(bufp.num_registers), &flags, &can_be_null, &(bufp.fastmap), &fastmaplen, &anchor, &string, &stringlen, @@ -106,10 +106,9 @@ reop_match(self, args) bufp.fastmap_accurate=1; bufp.can_be_null=can_be_null; bufp.uses_registers=1; - bufp.num_registers=num_regs; bufp.anchor=anchor; - for(i=0; i