diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-05-05 05:53:40 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-05 05:53:40 (GMT) |
commit | 898ff03e1e7925ecde3da66327d3cdc7e07625ba (patch) | |
tree | 977fc4b98c0e85816348cebd3b12026407c368b6 /Lib/test/test_re.py | |
parent | 647c3d381e67490e82cdbbe6c96e46d5e1628ce2 (diff) | |
download | cpython-898ff03e1e7925ecde3da66327d3cdc7e07625ba.zip cpython-898ff03e1e7925ecde3da66327d3cdc7e07625ba.tar.gz cpython-898ff03e1e7925ecde3da66327d3cdc7e07625ba.tar.bz2 |
bpo-30215: Make re.compile() locale agnostic. (#1361)
Compiled regular expression objects with the re.LOCALE flag no longer
depend on the locale at compile time. Only the locale at matching
time affects the result of matching.
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r-- | Lib/test/test_re.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index da5c953..7601dc8 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1730,6 +1730,38 @@ SUBPATTERN None 0 0 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) + def test_locale_compiled(self): + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + for loc in 'en_US.iso88591', 'en_US.utf8': + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs %s locale' % loc) + + locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') + p1 = re.compile(b'\xc5\xe5', re.L|re.I) + p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I) + p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I) + p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I) + for p in p1, p2, p3: + self.assertTrue(p.match(b'\xc5\xe5')) + self.assertTrue(p.match(b'\xe5\xe5')) + self.assertTrue(p.match(b'\xc5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xe5')) + self.assertIsNone(p4.match(b'\xc5\xc5')) + + locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') + for p in p1, p2, p3: + self.assertTrue(p.match(b'\xc5\xe5')) + self.assertIsNone(p.match(b'\xe5\xe5')) + self.assertIsNone(p.match(b'\xc5\xc5')) + self.assertTrue(p4.match(b'\xe5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xe5')) + self.assertIsNone(p4.match(b'\xc5\xc5')) + def test_error(self): with self.assertRaises(re.error) as cm: re.compile('(\u20ac))') |