diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-10-30 22:53:19 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-10-30 22:53:19 (GMT) |
commit | d4c7290368a82a265c9905dc5c1e95591fb96333 (patch) | |
tree | 1a360de9ff63adcb7d7fcd26dd62841fceb71ba0 | |
parent | 65566984b08ab8aebb15ed1ead5969791921f6ca (diff) | |
download | cpython-d4c7290368a82a265c9905dc5c1e95591fb96333.zip cpython-d4c7290368a82a265c9905dc5c1e95591fb96333.tar.gz cpython-d4c7290368a82a265c9905dc5c1e95591fb96333.tar.bz2 |
Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
-rw-r--r-- | Lib/re.py | 16 | ||||
-rw-r--r-- | Lib/test/test_re.py | 37 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 52 insertions, 4 deletions
@@ -104,6 +104,7 @@ This module also defines an exception 'error'. import sys import sre_compile import sre_parse +import _locale # public symbols __all__ = [ "match", "search", "sub", "subn", "split", "findall", @@ -229,9 +230,12 @@ def _compile(*key): bypass_cache = flags & DEBUG if not bypass_cache: cachekey = (type(key[0]),) + key - p = _cache.get(cachekey) - if p is not None: - return p + try: + p, loc = _cache[cachekey] + if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE): + return p + except KeyError: + pass if isinstance(pattern, _pattern_type): if flags: raise ValueError('Cannot process flags argument with a compiled pattern') @@ -245,7 +249,11 @@ def _compile(*key): if not bypass_cache: if len(_cache) >= _MAXCACHE: _cache.clear() - _cache[cachekey] = p + if p.flags & LOCALE: + loc = _locale.setlocale(_locale.LC_CTYPE) + else: + loc = None + _cache[cachekey] = p, loc return p def _compile_repl(*key): diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 3c779486..7921c4a 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,6 +1,7 @@ from test.test_support import verbose, run_unittest, import_module from test.test_support import precisionbigmemtest, _2G, cpython_only from test.test_support import captured_stdout, have_unicode, requires_unicode, u +import locale import re from re import Scanner import sre_constants @@ -975,6 +976,42 @@ subpattern None self.assertEqual(re.match("(foo)", "foo").group(1L), "foo") self.assertRaises(IndexError, re.match("", "").group, sys.maxint + 1) + def test_locale_caching(self): + # Issue #22410 + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + for loc in 'en_US.iso88591', 'en_US.utf8': + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs %s locale' % loc) + + re.purge() + self.check_en_US_iso88591() + self.check_en_US_utf8() + re.purge() + self.check_en_US_utf8() + self.check_en_US_iso88591() + + def check_en_US_iso88591(self): + locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') + self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) + self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) + self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) + self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) + self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) + self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) + + def check_en_US_utf8(self): + locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') + self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) + self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I)) + self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I)) + self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) + self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) + self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) + def run_re_tests(): from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR @@ -37,6 +37,9 @@ Core and Builtins Library ------- +- Issue #22410: Module level functions in the re module now cache compiled + locale-dependent regular expressions taking into account the locale. + - Issue #8876: distutils now falls back to copying files when hard linking doesn't work. This allows use with special filesystems such as VirtualBox shared folders. |