summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-01-25 21:31:43 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-01-25 21:31:43 (GMT)
commitace3ad3bf79a8a3b0ea9d1de97c8f8b59d1ac7b2 (patch)
treecdf8d1abca62783de7eafefcc9e924b34f04be2d
parent697e56d0f592165209cbeb87583c75dc231c6338 (diff)
downloadcpython-ace3ad3bf79a8a3b0ea9d1de97c8f8b59d1ac7b2.zip
cpython-ace3ad3bf79a8a3b0ea9d1de97c8f8b59d1ac7b2.tar.gz
cpython-ace3ad3bf79a8a3b0ea9d1de97c8f8b59d1ac7b2.tar.bz2
Issue #16975: Fix error handling bug in the escape-decode bytes decoder.
-rw-r--r--Lib/test/test_codecs.py44
-rw-r--r--Misc/NEWS2
-rw-r--r--Objects/bytesobject.c4
3 files changed, 50 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 1fa9ee0..2d1b249 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -805,6 +805,50 @@ class EscapeDecodeTest(unittest.TestCase):
def test_empty(self):
self.assertEqual(codecs.escape_decode(""), (b"", 0))
+ def test_raw(self):
+ for b in range(256):
+ if b != b'\\'[0]:
+ self.assertEqual(codecs.escape_decode(bytes([b]) + b'0'),
+ (bytes([b]) + b'0', 2))
+
+ def test_escape(self):
+ self.assertEqual(codecs.escape_decode(b"[\\\n]"), (b"[]", 4))
+ self.assertEqual(codecs.escape_decode(br'[\"]'), (b'["]', 4))
+ self.assertEqual(codecs.escape_decode(br"[\']"), (b"[']", 4))
+ self.assertEqual(codecs.escape_decode(br"[\\]"), (br"[\]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\a]"), (b"[\x07]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\b]"), (b"[\x08]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\t]"), (b"[\x09]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\n]"), (b"[\x0a]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\v]"), (b"[\x0b]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\f]"), (b"[\x0c]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\r]"), (b"[\x0d]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\7]"), (b"[\x07]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\8]"), (br"[\8]", 4))
+ self.assertEqual(codecs.escape_decode(br"[\78]"), (b"[\x078]", 5))
+ self.assertEqual(codecs.escape_decode(br"[\41]"), (b"[!]", 5))
+ self.assertEqual(codecs.escape_decode(br"[\418]"), (b"[!8]", 6))
+ self.assertEqual(codecs.escape_decode(br"[\101]"), (b"[A]", 6))
+ self.assertEqual(codecs.escape_decode(br"[\1010]"), (b"[A0]", 7))
+ self.assertEqual(codecs.escape_decode(br"[\501]"), (b"[A]", 6))
+ self.assertEqual(codecs.escape_decode(br"[\x41]"), (b"[A]", 6))
+ self.assertEqual(codecs.escape_decode(br"[\X41]"), (br"[\X41]", 6))
+ self.assertEqual(codecs.escape_decode(br"[\x410]"), (b"[A0]", 7))
+ for b in range(256):
+ if b not in b'\n"\'\\abtnvfr01234567x':
+ self.assertEqual(codecs.escape_decode(b'\\' + bytes([b])),
+ (b'\\' + bytes([b]), 2))
+
+ def test_errors(self):
+ self.assertRaises(ValueError, codecs.escape_decode, br"\x")
+ self.assertRaises(ValueError, codecs.escape_decode, br"[\x]")
+ self.assertEqual(codecs.escape_decode(br"[\x]\x", "ignore"), (b"[]", 6))
+ self.assertEqual(codecs.escape_decode(br"[\x]\x", "replace"), (b"[?]?", 6))
+ self.assertRaises(ValueError, codecs.escape_decode, br"\x0")
+ self.assertRaises(ValueError, codecs.escape_decode, br"[\x0]")
+ self.assertEqual(codecs.escape_decode(br"[\x0]\x0", "ignore"), (b"[]", 8))
+ self.assertEqual(codecs.escape_decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
+
class RecodingTest(unittest.TestCase):
def test_recoding(self):
f = io.BytesIO()
diff --git a/Misc/NEWS b/Misc/NEWS
index e19c069..4dd981b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ What's New in Python 3.2.4
Core and Builtins
-----------------
+- Issue #16975: Fix error handling bug in the escape-decode bytes decoder.
+
- Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping"
in any mapping, not only in a string.
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index ef3a5a1..c0f5aff 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -484,6 +484,10 @@ PyObject *PyBytes_DecodeEscape(const char *s,
errors);
goto failed;
}
+ /* skip \x */
+ if (s < end && Py_ISXDIGIT(s[0]))
+ s++; /* and a hexdigit */
+ break;
default:
*p++ = '\\';
s--;