summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-10-30 22:53:19 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-10-30 22:53:19 (GMT)
commitd4c7290368a82a265c9905dc5c1e95591fb96333 (patch)
tree1a360de9ff63adcb7d7fcd26dd62841fceb71ba0
parent65566984b08ab8aebb15ed1ead5969791921f6ca (diff)
downloadcpython-d4c7290368a82a265c9905dc5c1e95591fb96333.zip
cpython-d4c7290368a82a265c9905dc5c1e95591fb96333.tar.gz
cpython-d4c7290368a82a265c9905dc5c1e95591fb96333.tar.bz2
Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
-rw-r--r--Lib/re.py16
-rw-r--r--Lib/test/test_re.py37
-rw-r--r--Misc/NEWS3
3 files changed, 52 insertions, 4 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 9b01f3e..a82a446 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -104,6 +104,7 @@ This module also defines an exception 'error'.
import sys
import sre_compile
import sre_parse
+import _locale
# public symbols
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
@@ -229,9 +230,12 @@ def _compile(*key):
bypass_cache = flags & DEBUG
if not bypass_cache:
cachekey = (type(key[0]),) + key
- p = _cache.get(cachekey)
- if p is not None:
- return p
+ try:
+ p, loc = _cache[cachekey]
+ if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
+ return p
+ except KeyError:
+ pass
if isinstance(pattern, _pattern_type):
if flags:
raise ValueError('Cannot process flags argument with a compiled pattern')
@@ -245,7 +249,11 @@ def _compile(*key):
if not bypass_cache:
if len(_cache) >= _MAXCACHE:
_cache.clear()
- _cache[cachekey] = p
+ if p.flags & LOCALE:
+ loc = _locale.setlocale(_locale.LC_CTYPE)
+ else:
+ loc = None
+ _cache[cachekey] = p, loc
return p
def _compile_repl(*key):
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 3c779486..7921c4a 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,6 +1,7 @@
from test.test_support import verbose, run_unittest, import_module
from test.test_support import precisionbigmemtest, _2G, cpython_only
from test.test_support import captured_stdout, have_unicode, requires_unicode, u
+import locale
import re
from re import Scanner
import sre_constants
@@ -975,6 +976,42 @@ subpattern None
self.assertEqual(re.match("(foo)", "foo").group(1L), "foo")
self.assertRaises(IndexError, re.match("", "").group, sys.maxint + 1)
+ def test_locale_caching(self):
+ # Issue #22410
+ oldlocale = locale.setlocale(locale.LC_CTYPE)
+ self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
+ for loc in 'en_US.iso88591', 'en_US.utf8':
+ try:
+ locale.setlocale(locale.LC_CTYPE, loc)
+ except locale.Error:
+ # Unsupported locale on this system
+ self.skipTest('test needs %s locale' % loc)
+
+ re.purge()
+ self.check_en_US_iso88591()
+ self.check_en_US_utf8()
+ re.purge()
+ self.check_en_US_utf8()
+ self.check_en_US_iso88591()
+
+ def check_en_US_iso88591(self):
+ locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
+ self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+ self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
+ self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
+ self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+ self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
+ self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
+
+ def check_en_US_utf8(self):
+ locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
+ self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+ self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
+ self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
+ self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+ self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
+ self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
+
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
diff --git a/Misc/NEWS b/Misc/NEWS
index 86adaeb..725c0ab 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -37,6 +37,9 @@ Core and Builtins
Library
-------
+- Issue #22410: Module level functions in the re module now cache compiled
+ locale-dependent regular expressions taking into account the locale.
+
- Issue #8876: distutils now falls back to copying files when hard linking
doesn't work. This allows use with special filesystems such as VirtualBox
shared folders.