diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2018-02-09 22:08:17 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-09 22:08:17 (GMT) |
commit | a445feb72902e4a3c5ae712f0c289309e1580d52 (patch) | |
tree | 5a4bbd53ad0fa579f9672370d469f6da000647ff /Lib/test/test_re.py | |
parent | 2411292ba8155327125d8a1da8a4c9fa003d5909 (diff) | |
download | cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.zip cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.gz cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.bz2 |
bpo-30688: Support \N{name} escapes in re patterns. (GH-5588)
Co-authored-by: Jonathan Eunice <jonathan.eunice@gmail.com>
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r-- | Lib/test/test_re.py | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 9fed4be..ab1d985 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -694,6 +694,42 @@ class ReTests(unittest.TestCase): with self.subTest(c): self.assertRaises(re.error, re.compile, '[\\%c]' % c) + def test_named_unicode_escapes(self): + # test individual Unicode named escapes + self.assertTrue(re.match(r'\N{LESS-THAN SIGN}', '<')) + self.assertTrue(re.match(r'\N{less-than sign}', '<')) + self.assertIsNone(re.match(r'\N{LESS-THAN SIGN}', '>')) + self.assertTrue(re.match(r'\N{SNAKE}', '\U0001f40d')) + self.assertTrue(re.match(r'\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH ' + r'HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}', + '\ufbf9')) + self.assertTrue(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]', + '=')) + self.assertIsNone(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]', + ';')) + + # test errors in \N{name} handling - only valid names should pass + self.checkPatternError(r'\N', 'missing {', 2) + self.checkPatternError(r'[\N]', 'missing {', 3) + self.checkPatternError(r'\N{', 'missing character name', 3) + self.checkPatternError(r'[\N{', 'missing character name', 4) + self.checkPatternError(r'\N{}', 'missing character name', 3) + self.checkPatternError(r'[\N{}]', 'missing character name', 4) + self.checkPatternError(r'\NSNAKE}', 'missing {', 2) + self.checkPatternError(r'[\NSNAKE}]', 'missing {', 3) + self.checkPatternError(r'\N{SNAKE', + 'missing }, unterminated name', 3) + self.checkPatternError(r'[\N{SNAKE]', + 'missing }, unterminated name', 4) + self.checkPatternError(r'[\N{SNAKE]}', + "undefined character name 'SNAKE]'", 1) + self.checkPatternError(r'\N{SPAM}', + "undefined character name 'SPAM'", 0) + self.checkPatternError(r'[\N{SPAM}]', + "undefined character name 'SPAM'", 1) + self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0) + self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1) + def test_string_boundaries(self): # See http://bugs.python.org/issue10713 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), |