bpo-30688: Support \N{name} escapes in re patterns. (GH-5588)

Co-authored-by: Jonathan Eunice <jonathan.eunice@gmail.com>
author: Serhiy Storchaka <storchaka@gmail.com> 2018-02-09 22:08:17 (GMT)
committer: GitHub <noreply@github.com> 2018-02-09 22:08:17 (GMT)
commit: a445feb72902e4a3c5ae712f0c289309e1580d52 (patch)
tree: 5a4bbd53ad0fa579f9672370d469f6da000647ff /Lib/test/test_re.py
parent: 2411292ba8155327125d8a1da8a4c9fa003d5909 (diff)
download: cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.zip
cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.gz
cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.bz2
1 files changed, 36 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 9fed4be..ab1d985 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -694,6 +694,42 @@ class ReTests(unittest.TestCase):
             with self.subTest(c):
                 self.assertRaises(re.error, re.compile, '[\\%c]' % c)
 
+    def test_named_unicode_escapes(self):
+        # test individual Unicode named escapes
+        self.assertTrue(re.match(r'\N{LESS-THAN SIGN}', '<'))
+        self.assertTrue(re.match(r'\N{less-than sign}', '<'))
+        self.assertIsNone(re.match(r'\N{LESS-THAN SIGN}', '>'))
+        self.assertTrue(re.match(r'\N{SNAKE}', '\U0001f40d'))
+        self.assertTrue(re.match(r'\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH '
+                                 r'HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}',
+                                 '\ufbf9'))
+        self.assertTrue(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]',
+                                 '='))
+        self.assertIsNone(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]',
+                                   ';'))
+
+        # test errors in \N{name} handling - only valid names should pass
+        self.checkPatternError(r'\N', 'missing {', 2)
+        self.checkPatternError(r'[\N]', 'missing {', 3)
+        self.checkPatternError(r'\N{', 'missing character name', 3)
+        self.checkPatternError(r'[\N{', 'missing character name', 4)
+        self.checkPatternError(r'\N{}', 'missing character name', 3)
+        self.checkPatternError(r'[\N{}]', 'missing character name', 4)
+        self.checkPatternError(r'\NSNAKE}', 'missing {', 2)
+        self.checkPatternError(r'[\NSNAKE}]', 'missing {', 3)
+        self.checkPatternError(r'\N{SNAKE',
+                               'missing }, unterminated name', 3)
+        self.checkPatternError(r'[\N{SNAKE]',
+                               'missing }, unterminated name', 4)
+        self.checkPatternError(r'[\N{SNAKE]}',
+                               "undefined character name 'SNAKE]'", 1)
+        self.checkPatternError(r'\N{SPAM}',
+                               "undefined character name 'SPAM'", 0)
+        self.checkPatternError(r'[\N{SPAM}]',
+                               "undefined character name 'SPAM'", 1)
+        self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
+        self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
+
     def test_string_boundaries(self):
         # See http://bugs.python.org/issue10713
         self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
author	Serhiy Storchaka <storchaka@gmail.com>	2018-02-09 22:08:17 (GMT)
committer	GitHub <noreply@github.com>	2018-02-09 22:08:17 (GMT)
commit	a445feb72902e4a3c5ae712f0c289309e1580d52 (patch)
tree	5a4bbd53ad0fa579f9672370d469f6da000647ff /Lib/test/test_re.py
parent	2411292ba8155327125d8a1da8a4c9fa003d5909 (diff)
download	cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.zip cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.gz cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.bz2