summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-10-30 22:53:49 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-10-30 22:53:49 (GMT)
commit4659cc075667f6a38f3f69c9838585c71ec44d53 (patch)
treea8c87ba44a24a3e66f87d55f596b8b2962948f59 /Lib
parent35903c809f85de1532c2df4f82550e079a3711a7 (diff)
downloadcpython-4659cc075667f6a38f3f69c9838585c71ec44d53.zip
cpython-4659cc075667f6a38f3f69c9838585c71ec44d53.tar.gz
cpython-4659cc075667f6a38f3f69c9838585c71ec44d53.tar.bz2
Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/re.py11
-rw-r--r--Lib/test/test_re.py37
2 files changed, 46 insertions, 2 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 2e4d87c..46cea2b 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -122,6 +122,7 @@ This module also defines an exception 'error'.
import sys
import sre_compile
import sre_parse
+import _locale
# public symbols
__all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall",
@@ -275,7 +276,9 @@ def _compile(pattern, flags):
bypass_cache = flags & DEBUG
if not bypass_cache:
try:
- return _cache[type(pattern), pattern, flags]
+ p, loc = _cache[type(pattern), pattern, flags]
+ if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
+ return p
except KeyError:
pass
if isinstance(pattern, _pattern_type):
@@ -289,7 +292,11 @@ def _compile(pattern, flags):
if not bypass_cache:
if len(_cache) >= _MAXCACHE:
_cache.clear()
- _cache[type(pattern), pattern, flags] = p
+ if p.flags & LOCALE:
+ loc = _locale.setlocale(_locale.LC_CTYPE)
+ else:
+ loc = None
+ _cache[type(pattern), pattern, flags] = p, loc
return p
def _compile_repl(repl, pattern):
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 0584f19..fb57305 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,6 +1,7 @@
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
cpython_only, captured_stdout
import io
+import locale
import re
from re import Scanner
import sre_compile
@@ -1254,6 +1255,42 @@ subpattern None
# with ignore case.
self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
+ def test_locale_caching(self):
+ # Issue #22410
+ oldlocale = locale.setlocale(locale.LC_CTYPE)
+ self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
+ for loc in 'en_US.iso88591', 'en_US.utf8':
+ try:
+ locale.setlocale(locale.LC_CTYPE, loc)
+ except locale.Error:
+ # Unsupported locale on this system
+ self.skipTest('test needs %s locale' % loc)
+
+ re.purge()
+ self.check_en_US_iso88591()
+ self.check_en_US_utf8()
+ re.purge()
+ self.check_en_US_utf8()
+ self.check_en_US_iso88591()
+
+ def check_en_US_iso88591(self):
+ locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
+ self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+ self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
+ self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
+ self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+ self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
+ self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
+
+ def check_en_US_utf8(self):
+ locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
+ self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+ self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
+ self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
+ self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+ self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
+ self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
+
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):