diff options
author | Georg Brandl <georg@python.org> | 2013-04-14 09:40:00 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2013-04-14 09:40:00 (GMT) |
commit | 1d472b74cb21388c78e5e56c19f50b3140370863 (patch) | |
tree | 07f4d90c71d9704f6fccdb5efba460b329d8eba6 /Lib | |
parent | 991fc5736e73c9c197cbaf758127c1057d16bacd (diff) | |
download | cpython-1d472b74cb21388c78e5e56c19f50b3140370863.zip cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.gz cpython-1d472b74cb21388c78e5e56c19f50b3140370863.tar.bz2 |
Closes #14462: allow any valid Python identifier in sre group names, as documented.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre_parse.py | 20 | ||||
-rw-r--r-- | Lib/test/test_re.py | 8 |
2 files changed, 24 insertions, 4 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index b195fd0..2ebce89 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -225,13 +225,25 @@ class Tokenizer: def seek(self, index): self.index, self.next = index +# The following three functions are not used in this module anymore, but we keep +# them here (with DeprecationWarnings) for backwards compatibility. + def isident(char): + import warnings + warnings.warn('sre_parse.isident() will be removed in 3.5', + DeprecationWarning, stacklevel=2) return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" def isdigit(char): + import warnings + warnings.warn('sre_parse.isdigit() will be removed in 3.5', + DeprecationWarning, stacklevel=2) return "0" <= char <= "9" def isname(name): + import warnings + warnings.warn('sre_parse.isname() will be removed in 3.5', + DeprecationWarning, stacklevel=2) # check that group name is a valid string if not isident(name[0]): return False @@ -587,7 +599,7 @@ def _parse(source, state): group = 1 if not name: raise error("missing group name") - if not isname(name): + if not name.isidentifier(): raise error("bad character in group name") elif sourcematch("="): # named backreference @@ -601,7 +613,7 @@ def _parse(source, state): name = name + char if not name: raise error("missing group name") - if not isname(name): + if not name.isidentifier(): raise error("bad character in group name") gid = state.groupdict.get(name) if gid is None: @@ -655,7 +667,7 @@ def _parse(source, state): group = 2 if not condname: raise error("missing group name") - if isname(condname): + if condname.isidentifier(): condgroup = state.groupdict.get(condname) if condgroup is None: raise error("unknown group name") @@ -792,7 +804,7 @@ def parse_template(source, pattern): if index < 0: raise error("negative group number") except ValueError: - if not isname(name): + if not name.isidentifier(): raise error("bad character in group name") try: index = pattern.groupindex[name] diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index ef19164..e90c770 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -180,6 +180,10 @@ class ReTests(unittest.TestCase): self.assertRaises(re.error, re.compile, '(?(a))') self.assertRaises(re.error, re.compile, '(?(1a))') self.assertRaises(re.error, re.compile, '(?(a.))') + # New valid/invalid identifiers in Python 3 + re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') + re.compile('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)(?P=๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)(?(๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)y)') + self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)') def test_symbolic_refs(self): self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') @@ -192,6 +196,10 @@ class ReTests(unittest.TestCase): self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') + # New valid/invalid identifiers in Python 3 + self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') + self.assertEqual(re.sub('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)', r'\g<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>', 'xx'), 'xx') + self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<ยฉ>', 'xx') def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) |