From ae04c3356ed2aec0e9e2c39096a3ccd05722575a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 3 Jan 2008 19:12:44 +0000 Subject: Issue #1700, reported by Nguyen Quan Son, fix by Fredruk Lundh: Regular Expression inline flags not handled correctly for some unicode characters. (Forward port from 2.5.2.) --- Lib/sre_compile.py | 2 +- Lib/test/test_re.py | 30 ++++++++++++++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 7109599..22ab2fd 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -525,7 +525,7 @@ def compile(p, flags=0): indexgroup[i] = k return _sre.compile( - pattern, flags, code, + pattern, flags | p.pattern.flags, code, p.pattern.groups-1, groupindex, indexgroup ) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f1fdfba..3056ef3 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -642,6 +642,36 @@ class ReTests(unittest.TestCase): self.assertEqual(re.compile("bla").match(a), None) self.assertEqual(re.compile("").match(a).groups(), ()) + def test_inline_flags(self): + # Bug #1700 + upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow + lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow + + p = re.compile(upper_char, re.I | re.U) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile(lower_char, re.I | re.U) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + p = re.compile('(?i)' + upper_char, re.U) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile('(?i)' + lower_char, re.U) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + p = re.compile('(?iu)' + upper_char) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile('(?iu)' + lower_char) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff --git a/Misc/NEWS b/Misc/NEWS index 50fecb7..3e3f074 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -348,6 +348,9 @@ Core and builtins Library ------- +- Issue #1700: Regular expression inline flags incorrectly handle certain + unicode characters. + - Issue #1689: PEP 3141, numeric abstract base classes. - Tk issue #1851526: Return results from Python callbacks to Tcl as -- cgit v0.12