diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-06-23 11:29:19 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-06-23 11:29:19 (GMT) |
commit | 463badf06ce33d9de88dfea645c1253f44588aad (patch) | |
tree | fe3bf02bff59bfe3622237ede250fd74a06ee9ff /Lib/test | |
parent | c9aa8425c40e44362dd6abf0b60eb18b68539605 (diff) | |
download | cpython-463badf06ce33d9de88dfea645c1253f44588aad.zip cpython-463badf06ce33d9de88dfea645c1253f44588aad.tar.gz cpython-463badf06ce33d9de88dfea645c1253f44588aad.tar.bz2 |
Issue #3665: \u and \U escapes are now supported in unicode regular expressions.
Patch by Serhiy Storchaka.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_re.py | 98 |
1 files changed, 83 insertions, 15 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 74a7b71..9b0aa75 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -526,24 +526,92 @@ class ReTests(unittest.TestCase): self.assertNotEqual(re.compile('^pattern$', flag), None) def test_sre_character_literals(self): - for i in [0, 8, 16, 32, 64, 127, 128, 255]: - self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None) - self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None) - self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None) - self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) - self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) - self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) - self.assertRaises(re.error, re.match, "\911", "") + for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: + if i < 256: + self.assertIsNotNone(re.match(r"\%03o" % i, chr(i))) + self.assertIsNotNone(re.match(r"\%03o0" % i, chr(i)+"0")) + self.assertIsNotNone(re.match(r"\%03o8" % i, chr(i)+"8")) + self.assertIsNotNone(re.match(r"\x%02x" % i, chr(i))) + self.assertIsNotNone(re.match(r"\x%02x0" % i, chr(i)+"0")) + self.assertIsNotNone(re.match(r"\x%02xz" % i, chr(i)+"z")) + if i < 0x10000: + self.assertIsNotNone(re.match(r"\u%04x" % i, chr(i))) + self.assertIsNotNone(re.match(r"\u%04x0" % i, chr(i)+"0")) + self.assertIsNotNone(re.match(r"\u%04xz" % i, chr(i)+"z")) + self.assertIsNotNone(re.match(r"\U%08x" % i, chr(i))) + self.assertIsNotNone(re.match(r"\U%08x0" % i, chr(i)+"0")) + self.assertIsNotNone(re.match(r"\U%08xz" % i, chr(i)+"z")) + self.assertIsNotNone(re.match(r"\0", "\000")) + self.assertIsNotNone(re.match(r"\08", "\0008")) + self.assertIsNotNone(re.match(r"\01", "\001")) + self.assertIsNotNone(re.match(r"\018", "\0018")) + self.assertIsNotNone(re.match(r"\567", chr(0o167))) + self.assertRaises(re.error, re.match, r"\911", "") + self.assertRaises(re.error, re.match, r"\x1", "") + self.assertRaises(re.error, re.match, r"\x1z", "") + self.assertRaises(re.error, re.match, r"\u123", "") + self.assertRaises(re.error, re.match, r"\u123z", "") + self.assertRaises(re.error, re.match, r"\U0001234", "") + self.assertRaises(re.error, re.match, r"\U0001234z", "") + self.assertRaises(re.error, re.match, r"\U00110000", "") def test_sre_character_class_literals(self): + for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: + if i < 256: + self.assertIsNotNone(re.match(r"[\%o]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\%o8]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\%03o]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\%03o0]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\%03o8]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i))) + if i < 0x10000: + self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i))) + self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0")) + self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z")) + self.assertRaises(re.error, re.match, r"[\911]", "") + self.assertRaises(re.error, re.match, r"[\x1z]", "") + self.assertRaises(re.error, re.match, r"[\u123z]", "") + self.assertRaises(re.error, re.match, r"[\U0001234z]", "") + self.assertRaises(re.error, re.match, r"[\U00110000]", "") + + def test_sre_byte_literals(self): + for i in [0, 8, 16, 32, 64, 127, 128, 255]: + self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0")) + self.assertIsNotNone(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8")) + self.assertIsNotNone(re.match((r"\x%02x" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0")) + self.assertIsNotNone(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z")) + self.assertIsNotNone(re.match(br"\u", b'u')) + self.assertIsNotNone(re.match(br"\U", b'U')) + self.assertIsNotNone(re.match(br"\0", b"\000")) + self.assertIsNotNone(re.match(br"\08", b"\0008")) + self.assertIsNotNone(re.match(br"\01", b"\001")) + self.assertIsNotNone(re.match(br"\018", b"\0018")) + self.assertIsNotNone(re.match(br"\567", bytes([0o167]))) + self.assertRaises(re.error, re.match, br"\911", b"") + self.assertRaises(re.error, re.match, br"\x1", b"") + self.assertRaises(re.error, re.match, br"\x1z", b"") + + def test_sre_byte_class_literals(self): for i in [0, 8, 16, 32, 64, 127, 128, 255]: - self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None) - self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None) - self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None) - self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) - self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) - self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) - self.assertRaises(re.error, re.match, "[\911]", "") + self.assertIsNotNone(re.match((r"[\%o]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"[\%o8]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"[\%03o]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"[\%03o0]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"[\%03o8]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(), bytes([i]))) + self.assertIsNotNone(re.match(br"[\u]", b'u')) + self.assertIsNotNone(re.match(br"[\U]", b'U')) + self.assertRaises(re.error, re.match, br"[\911]", "") + self.assertRaises(re.error, re.match, br"[\x1z]", "") def test_bug_113254(self): self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) |