From 4659cc075667f6a38f3f69c9838585c71ec44d53 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 31 Oct 2014 00:53:49 +0200 Subject: Issue #22410: Module level functions in the re module now cache compiled locale-dependent regular expressions taking into account the locale. --- Lib/re.py | 11 +++++++++-- Lib/test/test_re.py | 37 +++++++++++++++++++++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Lib/re.py b/Lib/re.py index 2e4d87c..46cea2b 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -122,6 +122,7 @@ This module also defines an exception 'error'. import sys import sre_compile import sre_parse +import _locale # public symbols __all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall", @@ -275,7 +276,9 @@ def _compile(pattern, flags): bypass_cache = flags & DEBUG if not bypass_cache: try: - return _cache[type(pattern), pattern, flags] + p, loc = _cache[type(pattern), pattern, flags] + if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE): + return p except KeyError: pass if isinstance(pattern, _pattern_type): @@ -289,7 +292,11 @@ def _compile(pattern, flags): if not bypass_cache: if len(_cache) >= _MAXCACHE: _cache.clear() - _cache[type(pattern), pattern, flags] = p + if p.flags & LOCALE: + loc = _locale.setlocale(_locale.LC_CTYPE) + else: + loc = None + _cache[type(pattern), pattern, flags] = p, loc return p def _compile_repl(repl, pattern): diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0584f19..fb57305 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,6 +1,7 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \ cpython_only, captured_stdout import io +import locale import re from re import Scanner import sre_compile @@ -1254,6 +1255,42 @@ subpattern None # with ignore case. self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) + def test_locale_caching(self): + # Issue #22410 + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + for loc in 'en_US.iso88591', 'en_US.utf8': + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs %s locale' % loc) + + re.purge() + self.check_en_US_iso88591() + self.check_en_US_utf8() + re.purge() + self.check_en_US_utf8() + self.check_en_US_iso88591() + + def check_en_US_iso88591(self): + locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') + self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) + self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) + self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) + self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) + self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) + self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) + + def check_en_US_utf8(self): + locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') + self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) + self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I)) + self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I)) + self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) + self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) + self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) + class PatternReprTests(unittest.TestCase): def check(self, pattern, expected): diff --git a/Misc/NEWS b/Misc/NEWS index 6245bd0..52bbcf1 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -33,6 +33,9 @@ Core and Builtins Library ------- +- Issue #22410: Module level functions in the re module now cache compiled + locale-dependent regular expressions taking into account the locale. + - Issue #8876: distutils now falls back to copying files when hard linking doesn't work. This allows use with special filesystems such as VirtualBox shared folders. -- cgit v0.12