summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorINADA Naoki <methane@users.noreply.github.com>2017-10-14 05:21:59 (GMT)
committerGitHub <noreply@github.com>2017-10-14 05:21:59 (GMT)
commit7060380d577690a40ebc201c0725076349e977cd (patch)
tree792fe8b9f5a18ff8680f13d03ae27b0544e0fce1 /Lib
parent6234e9068332f61f935cf13fa5b1a924a99c28b2 (diff)
downloadcpython-7060380d577690a40ebc201c0725076349e977cd.zip
cpython-7060380d577690a40ebc201c0725076349e977cd.tar.gz
cpython-7060380d577690a40ebc201c0725076349e977cd.tar.bz2
bpo-31672: Fix string.Template accidentally matched non-ASCII identifiers (GH-3872)
Pattern `[a-z]` with `IGNORECASE` flag can match to some non-ASCII characters. Straightforward solution for this is using `IGNORECASE | ASCII` flag. But users may subclass `Template` and override only `idpattern`. So we want to avoid changing `Template.flags`. So this commit uses local flag `-i` for `idpattern` and change `[a-z]` to `[a-zA-Z]`. (cherry picked from commit b22273ec5d1992b0cbe078b887427ae9977dfb78)
Diffstat (limited to 'Lib')
-rw-r--r--Lib/string.py6
-rw-r--r--Lib/test/test_string.py6
2 files changed, 11 insertions, 1 deletions
diff --git a/Lib/string.py b/Lib/string.py
index c902007..670c195 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -78,7 +78,11 @@ class Template(metaclass=_TemplateMetaclass):
"""A string class for supporting $-substitutions."""
delimiter = '$'
- idpattern = r'[_a-z][_a-z0-9]*'
+ # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE,
+ # but without ASCII flag. We can't add re.ASCII to flags because of
+ # backward compatibility. So we use local -i flag and [a-zA-Z] pattern.
+ # See https://bugs.python.org/issue31672
+ idpattern = r'(?-i:[_a-zA-Z][_a-zA-Z0-9]*)'
flags = _re.IGNORECASE
def __init__(self, template):
diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py
index 70439f8..8db23e7 100644
--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@@ -271,6 +271,12 @@ class TestTemplate(unittest.TestCase):
raises(ValueError, s.substitute, dict(who='tim'))
s = Template('$who likes $100')
raises(ValueError, s.substitute, dict(who='tim'))
+ # Template.idpattern should match to only ASCII characters.
+ # https://bugs.python.org/issue31672
+ s = Template("$who likes $\u0131") # (DOTLESS I)
+ raises(ValueError, s.substitute, dict(who='tim'))
+ s = Template("$who likes $\u0130") # (LATIN CAPITAL LETTER I WITH DOT ABOVE)
+ raises(ValueError, s.substitute, dict(who='tim'))
def test_idpattern_override(self):
class PathPattern(Template):