summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2008-01-03 19:12:44 (GMT)
committerGuido van Rossum <guido@python.org>2008-01-03 19:12:44 (GMT)
commitae04c3356ed2aec0e9e2c39096a3ccd05722575a (patch)
tree0983292ad7e3485d6fa962cfd5ce861852438fad
parent1beea3be3e507a85b0570e82e8f100594d861f6b (diff)
downloadcpython-ae04c3356ed2aec0e9e2c39096a3ccd05722575a.zip
cpython-ae04c3356ed2aec0e9e2c39096a3ccd05722575a.tar.gz
cpython-ae04c3356ed2aec0e9e2c39096a3ccd05722575a.tar.bz2
Issue #1700, reported by Nguyen Quan Son, fix by Fredruk Lundh:
Regular Expression inline flags not handled correctly for some unicode characters. (Forward port from 2.5.2.)
-rw-r--r--Lib/sre_compile.py2
-rw-r--r--Lib/test/test_re.py30
-rw-r--r--Misc/NEWS3
3 files changed, 34 insertions, 1 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 7109599..22ab2fd 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -525,7 +525,7 @@ def compile(p, flags=0):
indexgroup[i] = k
return _sre.compile(
- pattern, flags, code,
+ pattern, flags | p.pattern.flags, code,
p.pattern.groups-1,
groupindex, indexgroup
)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index f1fdfba..3056ef3 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -642,6 +642,36 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.compile("bla").match(a), None)
self.assertEqual(re.compile("").match(a).groups(), ())
+ def test_inline_flags(self):
+ # Bug #1700
+ upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
+ lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
+
+ p = re.compile(upper_char, re.I | re.U)
+ q = p.match(lower_char)
+ self.assertNotEqual(q, None)
+
+ p = re.compile(lower_char, re.I | re.U)
+ q = p.match(upper_char)
+ self.assertNotEqual(q, None)
+
+ p = re.compile('(?i)' + upper_char, re.U)
+ q = p.match(lower_char)
+ self.assertNotEqual(q, None)
+
+ p = re.compile('(?i)' + lower_char, re.U)
+ q = p.match(upper_char)
+ self.assertNotEqual(q, None)
+
+ p = re.compile('(?iu)' + upper_char)
+ q = p.match(lower_char)
+ self.assertNotEqual(q, None)
+
+ p = re.compile('(?iu)' + lower_char)
+ q = p.match(upper_char)
+ self.assertNotEqual(q, None)
+
+
def run_re_tests():
from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
diff --git a/Misc/NEWS b/Misc/NEWS
index 50fecb7..3e3f074 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -348,6 +348,9 @@ Core and builtins
Library
-------
+- Issue #1700: Regular expression inline flags incorrectly handle certain
+ unicode characters.
+
- Issue #1689: PEP 3141, numeric abstract base classes.
- Tk issue #1851526: Return results from Python callbacks to Tcl as