From c563caf3a21115fa1e2eb6c7b455ec58ca85e8e4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 23 Sep 2014 23:22:41 +0300 Subject: Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in regular expressions. --- Lib/sre_parse.py | 20 ++++++++++++++++---- Lib/test/test_re.py | 12 +++++++----- Misc/NEWS | 3 +++ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index b56d437..7fd145b 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -295,7 +295,11 @@ def _class_escape(source, escape): elif c in OCTDIGITS: # octal escape (up to three digits) escape += source.getwhile(2, OCTDIGITS) - return LITERAL, int(escape[1:], 8) & 0xff + c = int(escape[1:], 8) + if c > 0o377: + raise error('octal escape value %r outside of ' + 'range 0-0o377' % escape) + return LITERAL, c elif c in DIGITS: raise ValueError if len(escape) == 2: @@ -337,7 +341,7 @@ def _escape(source, escape, state): elif c == "0": # octal escape escape += source.getwhile(2, OCTDIGITS) - return LITERAL, int(escape[1:], 8) & 0xff + return LITERAL, int(escape[1:], 8) elif c in DIGITS: # octal escape *or* decimal group reference (sigh) if source.next in DIGITS: @@ -346,7 +350,11 @@ def _escape(source, escape, state): source.next in OCTDIGITS): # got three octal digits; this is an octal escape escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff + c = int(escape[1:], 8) + if c > 0o377: + raise error('octal escape value %r outside of ' + 'range 0-0o377' % escape) + return LITERAL, c # not an octal escape, so this is a group reference group = int(escape[1:]) if group < state.groups: @@ -837,7 +845,11 @@ def parse_template(source, pattern): s.next in OCTDIGITS): this += sget() isoctal = True - lappend(chr(int(this[1:], 8) & 0xff)) + c = int(this[1:], 8) + if c > 0o377: + raise error('octal escape value %r outside of ' + 'range 0-0o377' % this) + lappend(chr(c)) if not isoctal: addgroup(int(this[1:])) else: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0584f19..d85b767 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -154,8 +154,8 @@ class ReTests(unittest.TestCase): self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') - self.assertEqual(re.sub('x', r'\400', 'x'), '\0') - self.assertEqual(re.sub('x', r'\777', 'x'), '\377') + self.assertRaises(re.error, re.sub, 'x', r'\400', 'x') + self.assertRaises(re.error, re.sub, 'x', r'\777', 'x') self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') @@ -700,7 +700,7 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match(r"\08", "\0008")) self.assertTrue(re.match(r"\01", "\001")) self.assertTrue(re.match(r"\018", "\0018")) - self.assertTrue(re.match(r"\567", chr(0o167))) + self.assertRaises(re.error, re.match, r"\567", "") self.assertRaises(re.error, re.match, r"\911", "") self.assertRaises(re.error, re.match, r"\x1", "") self.assertRaises(re.error, re.match, r"\x1z", "") @@ -728,12 +728,13 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match(r"[\U%08x]" % i, chr(i))) self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0")) self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z")) - self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e")) + self.assertRaises(re.error, re.match, r"[\567]", "") self.assertRaises(re.error, re.match, r"[\911]", "") self.assertRaises(re.error, re.match, r"[\x1z]", "") self.assertRaises(re.error, re.match, r"[\u123z]", "") self.assertRaises(re.error, re.match, r"[\U0001234z]", "") self.assertRaises(re.error, re.match, r"[\U00110000]", "") + self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e")) def test_sre_byte_literals(self): for i in [0, 8, 16, 32, 64, 127, 128, 255]: @@ -749,7 +750,7 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match(br"\08", b"\0008")) self.assertTrue(re.match(br"\01", b"\001")) self.assertTrue(re.match(br"\018", b"\0018")) - self.assertTrue(re.match(br"\567", bytes([0o167]))) + self.assertRaises(re.error, re.match, br"\567", b"") self.assertRaises(re.error, re.match, br"\911", b"") self.assertRaises(re.error, re.match, br"\x1", b"") self.assertRaises(re.error, re.match, br"\x1z", b"") @@ -766,6 +767,7 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i]))) self.assertTrue(re.match(br"[\u]", b'u')) self.assertTrue(re.match(br"[\U]", b'U')) + self.assertRaises(re.error, re.match, br"[\567]", b"") self.assertRaises(re.error, re.match, br"[\911]", b"") self.assertRaises(re.error, re.match, br"[\x1z]", b"") diff --git a/Misc/NEWS b/Misc/NEWS index 3a9d086..9c6f7db 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -137,6 +137,9 @@ Core and Builtins Library ------- +- Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in + regular expressions. + - Issue #20912: Now directories added to ZIP file have correct Unix and MS-DOS directory attributes. -- cgit v0.12