Closes #14462: allow any valid Python identifier in sre group names, as documented.

author: Georg Brandl <georg@python.org> 2013-04-14 09:40:00 (GMT)
committer: Georg Brandl <georg@python.org> 2013-04-14 09:40:00 (GMT)
commit: 1d472b74cb21388c78e5e56c19f50b3140370863 (patch)
tree: 07f4d90c71d9704f6fccdb5efba460b329d8eba6 /Lib
parent: 991fc5736e73c9c197cbaf758127c1057d16bacd (diff)
download: cpython-1d472b74cb21388c78e5e56c19f50b3140370863.zip
cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.gz
cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.bz2
2 files changed, 24 insertions, 4 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index b195fd0..2ebce89 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -225,13 +225,25 @@ class Tokenizer:
     def seek(self, index):
         self.index, self.next = index
 
+# The following three functions are not used in this module anymore, but we keep
+# them here (with DeprecationWarnings) for backwards compatibility.
+
 def isident(char):
+    import warnings
+    warnings.warn('sre_parse.isident() will be removed in 3.5',
+                  DeprecationWarning, stacklevel=2)
     return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
 
 def isdigit(char):
+    import warnings
+    warnings.warn('sre_parse.isdigit() will be removed in 3.5',
+                  DeprecationWarning, stacklevel=2)
     return "0" <= char <= "9"
 
 def isname(name):
+    import warnings
+    warnings.warn('sre_parse.isname() will be removed in 3.5',
+                  DeprecationWarning, stacklevel=2)
     # check that group name is a valid string
     if not isident(name[0]):
         return False
@@ -587,7 +599,7 @@ def _parse(source, state):
                         group = 1
                         if not name:
                             raise error("missing group name")
-                        if not isname(name):
+                        if not name.isidentifier():
                             raise error("bad character in group name")
                     elif sourcematch("="):
                         # named backreference
@@ -601,7 +613,7 @@ def _parse(source, state):
                             name = name + char
                         if not name:
                             raise error("missing group name")
-                        if not isname(name):
+                        if not name.isidentifier():
                             raise error("bad character in group name")
                         gid = state.groupdict.get(name)
                         if gid is None:
@@ -655,7 +667,7 @@ def _parse(source, state):
                     group = 2
                     if not condname:
                         raise error("missing group name")
-                    if isname(condname):
+                    if condname.isidentifier():
                         condgroup = state.groupdict.get(condname)
                         if condgroup is None:
                             raise error("unknown group name")
@@ -792,7 +804,7 @@ def parse_template(source, pattern):
                     if index < 0:
                         raise error("negative group number")
                 except ValueError:
-                    if not isname(name):
+                    if not name.isidentifier():
                         raise error("bad character in group name")
                     try:
                         index = pattern.groupindex[name]
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index ef19164..e90c770 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -180,6 +180,10 @@ class ReTests(unittest.TestCase):
         self.assertRaises(re.error, re.compile, '(?(a))')
         self.assertRaises(re.error, re.compile, '(?(1a))')
         self.assertRaises(re.error, re.compile, '(?(a.))')
+        # New valid/invalid identifiers in Python 3
+        re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
+        re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
+        self.assertRaises(re.error, re.compile, '(?P<©>x)')
 
     def test_symbolic_refs(self):
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
@@ -192,6 +196,10 @@ class ReTests(unittest.TestCase):
         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
+        # New valid/invalid identifiers in Python 3
+        self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
+        self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
 
     def test_re_subn(self):
         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
author	Georg Brandl <georg@python.org>	2013-04-14 09:40:00 (GMT)
committer	Georg Brandl <georg@python.org>	2013-04-14 09:40:00 (GMT)
commit	1d472b74cb21388c78e5e56c19f50b3140370863 (patch)
tree	07f4d90c71d9704f6fccdb5efba460b329d8eba6 /Lib
parent	991fc5736e73c9c197cbaf758127c1057d16bacd (diff)
download	cpython-1d472b74cb21388c78e5e56c19f50b3140370863.zip cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.gz cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.bz2