diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2008-07-22 17:53:22 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2008-07-22 17:53:22 (GMT) |
commit | 22628c4d6a79e38371e383cc40702bf0935e355e (patch) | |
tree | 05a58350d5a9340d4370e2ab5e6649a664544732 /Lib | |
parent | 943f33912c243b0769023082691475012428da5a (diff) | |
download | cpython-22628c4d6a79e38371e383cc40702bf0935e355e.zip cpython-22628c4d6a79e38371e383cc40702bf0935e355e.tar.gz cpython-22628c4d6a79e38371e383cc40702bf0935e355e.tar.bz2 |
#3231: re.compile fails with some bytes patterns
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre_parse.py | 2 | ||||
-rwxr-xr-x | Lib/test/re_tests.py | 8 | ||||
-rw-r--r-- | Lib/test/test_re.py | 43 |
3 files changed, 26 insertions, 27 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 6e70024..ffa8902 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -200,7 +200,7 @@ class Tokenizer: except IndexError: raise error("bogus escape (end of line)") if isinstance(self.string, bytes): - char = chr(c) + c = chr(c) char = char + c self.index = self.index + len(char) self.next = char diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index 25b1229..220301a 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -661,12 +661,8 @@ xyzabc ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), ] -try: - u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'") -except SyntaxError: - pass -else: - tests.extend([ +u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}' +tests.extend([ # bug 410271: \b broken under locales (r'\b.\b', 'a', SUCCEED, 'found', 'a'), (r'(?u)\b.\b', u, SUCCEED, 'found', u), diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index fd41b6e..60b816e 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -732,23 +732,25 @@ def run_re_tests(): else: print('=== Failed incorrectly', t) - # Try the match on a unicode string, and check that it - # still succeeds. + # Try the match with both pattern and string converted to + # bytes, and check that it still succeeds. try: - result = obj.search(str(s, "latin-1")) - if result is None: - print('=== Fails on unicode match', t) - except NameError: - continue # 1.5.2 - except TypeError: - continue # unicode test case - - # Try the match on a unicode pattern, and check that it - # still succeeds. - obj=re.compile(str(pattern, "latin-1")) - result = obj.search(s) - if result is None: - print('=== Fails on unicode pattern match', t) + bpat = bytes(pattern, "ascii") + bs = bytes(s, "ascii") + except UnicodeEncodeError: + # skip non-ascii tests + pass + else: + try: + bpat = re.compile(bpat) + except Exception: + print('=== Fails on bytes pattern compile', t) + if verbose: + traceback.print_exc(file=sys.stdout) + else: + bytes_result = bpat.search(bs) + if bytes_result is None: + print('=== Fails on bytes pattern match', t) # Try the match with the search area limited to the extent # of the match and see if it still succeeds. \B will @@ -771,10 +773,11 @@ def run_re_tests(): # Try the match with LOCALE enabled, and check that it # still succeeds. - obj = re.compile(pattern, re.LOCALE) - result = obj.search(s) - if result is None: - print('=== Fails on locale-sensitive match', t) + if '(?u)' not in pattern: + obj = re.compile(pattern, re.LOCALE) + result = obj.search(s) + if result is None: + print('=== Fails on locale-sensitive match', t) # Try the match with UNICODE locale enabled, and check # that it still succeeds. |