summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2013-04-14 09:40:00 (GMT)
committerGeorg Brandl <georg@python.org>2013-04-14 09:40:00 (GMT)
commit1d472b74cb21388c78e5e56c19f50b3140370863 (patch)
tree07f4d90c71d9704f6fccdb5efba460b329d8eba6 /Lib
parent991fc5736e73c9c197cbaf758127c1057d16bacd (diff)
downloadcpython-1d472b74cb21388c78e5e56c19f50b3140370863.zip
cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.gz
cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.bz2
Closes #14462: allow any valid Python identifier in sre group names, as documented.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sre_parse.py20
-rw-r--r--Lib/test/test_re.py8
2 files changed, 24 insertions, 4 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index b195fd0..2ebce89 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -225,13 +225,25 @@ class Tokenizer:
def seek(self, index):
self.index, self.next = index
+# The following three functions are not used in this module anymore, but we keep
+# them here (with DeprecationWarnings) for backwards compatibility.
+
def isident(char):
+ import warnings
+ warnings.warn('sre_parse.isident() will be removed in 3.5',
+ DeprecationWarning, stacklevel=2)
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
def isdigit(char):
+ import warnings
+ warnings.warn('sre_parse.isdigit() will be removed in 3.5',
+ DeprecationWarning, stacklevel=2)
return "0" <= char <= "9"
def isname(name):
+ import warnings
+ warnings.warn('sre_parse.isname() will be removed in 3.5',
+ DeprecationWarning, stacklevel=2)
# check that group name is a valid string
if not isident(name[0]):
return False
@@ -587,7 +599,7 @@ def _parse(source, state):
group = 1
if not name:
raise error("missing group name")
- if not isname(name):
+ if not name.isidentifier():
raise error("bad character in group name")
elif sourcematch("="):
# named backreference
@@ -601,7 +613,7 @@ def _parse(source, state):
name = name + char
if not name:
raise error("missing group name")
- if not isname(name):
+ if not name.isidentifier():
raise error("bad character in group name")
gid = state.groupdict.get(name)
if gid is None:
@@ -655,7 +667,7 @@ def _parse(source, state):
group = 2
if not condname:
raise error("missing group name")
- if isname(condname):
+ if condname.isidentifier():
condgroup = state.groupdict.get(condname)
if condgroup is None:
raise error("unknown group name")
@@ -792,7 +804,7 @@ def parse_template(source, pattern):
if index < 0:
raise error("negative group number")
except ValueError:
- if not isname(name):
+ if not name.isidentifier():
raise error("bad character in group name")
try:
index = pattern.groupindex[name]
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index ef19164..e90c770 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -180,6 +180,10 @@ class ReTests(unittest.TestCase):
self.assertRaises(re.error, re.compile, '(?(a))')
self.assertRaises(re.error, re.compile, '(?(1a))')
self.assertRaises(re.error, re.compile, '(?(a.))')
+ # New valid/invalid identifiers in Python 3
+ re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)')
+ re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)')
+ self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)')
def test_symbolic_refs(self):
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
@@ -192,6 +196,10 @@ class ReTests(unittest.TestCase):
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
+ # New valid/invalid identifiers in Python 3
+ self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx')
+ self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx')
+ self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<ยฉ>', 'xx')
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))