summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_re.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2018-02-09 22:08:17 (GMT)
committerGitHub <noreply@github.com>2018-02-09 22:08:17 (GMT)
commita445feb72902e4a3c5ae712f0c289309e1580d52 (patch)
tree5a4bbd53ad0fa579f9672370d469f6da000647ff /Lib/test/test_re.py
parent2411292ba8155327125d8a1da8a4c9fa003d5909 (diff)
downloadcpython-a445feb72902e4a3c5ae712f0c289309e1580d52.zip
cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.gz
cpython-a445feb72902e4a3c5ae712f0c289309e1580d52.tar.bz2
bpo-30688: Support \N{name} escapes in re patterns. (GH-5588)
Co-authored-by: Jonathan Eunice <jonathan.eunice@gmail.com>
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r--Lib/test/test_re.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 9fed4be..ab1d985 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -694,6 +694,42 @@ class ReTests(unittest.TestCase):
with self.subTest(c):
self.assertRaises(re.error, re.compile, '[\\%c]' % c)
+ def test_named_unicode_escapes(self):
+ # test individual Unicode named escapes
+ self.assertTrue(re.match(r'\N{LESS-THAN SIGN}', '<'))
+ self.assertTrue(re.match(r'\N{less-than sign}', '<'))
+ self.assertIsNone(re.match(r'\N{LESS-THAN SIGN}', '>'))
+ self.assertTrue(re.match(r'\N{SNAKE}', '\U0001f40d'))
+ self.assertTrue(re.match(r'\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH '
+ r'HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}',
+ '\ufbf9'))
+ self.assertTrue(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]',
+ '='))
+ self.assertIsNone(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]',
+ ';'))
+
+ # test errors in \N{name} handling - only valid names should pass
+ self.checkPatternError(r'\N', 'missing {', 2)
+ self.checkPatternError(r'[\N]', 'missing {', 3)
+ self.checkPatternError(r'\N{', 'missing character name', 3)
+ self.checkPatternError(r'[\N{', 'missing character name', 4)
+ self.checkPatternError(r'\N{}', 'missing character name', 3)
+ self.checkPatternError(r'[\N{}]', 'missing character name', 4)
+ self.checkPatternError(r'\NSNAKE}', 'missing {', 2)
+ self.checkPatternError(r'[\NSNAKE}]', 'missing {', 3)
+ self.checkPatternError(r'\N{SNAKE',
+ 'missing }, unterminated name', 3)
+ self.checkPatternError(r'[\N{SNAKE]',
+ 'missing }, unterminated name', 4)
+ self.checkPatternError(r'[\N{SNAKE]}',
+ "undefined character name 'SNAKE]'", 1)
+ self.checkPatternError(r'\N{SPAM}',
+ "undefined character name 'SPAM'", 0)
+ self.checkPatternError(r'[\N{SPAM}]',
+ "undefined character name 'SPAM'", 1)
+ self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
+ self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
+
def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),