diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2021-10-14 10:17:00 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-14 10:17:00 (GMT) |
commit | c96d1546b11b4c282a7e21737cb1f5d16349656d (patch) | |
tree | 5e6e49378a4207591316f08e6b7fd42d4e3ff40c /Lib | |
parent | e71662c1ae817e728233ce93882c5b20f4c31ebc (diff) | |
download | cpython-c96d1546b11b4c282a7e21737cb1f5d16349656d.zip cpython-c96d1546b11b4c282a7e21737cb1f5d16349656d.tar.gz cpython-c96d1546b11b4c282a7e21737cb1f5d16349656d.tar.bz2 |
bpo-45461: Fix IncrementalDecoder and StreamReader in the "unicode-escape" codec (GH-28939)
They support now splitting escape sequences between input chunks.
Add the third parameter "final" in codecs.unicode_escape_decode().
It is True by default to match the former behavior.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/encodings/unicode_escape.py | 9 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 50 |
2 files changed, 51 insertions, 8 deletions
diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py index 817f932..9b1ce99b 100644 --- a/Lib/encodings/unicode_escape.py +++ b/Lib/encodings/unicode_escape.py @@ -21,15 +21,16 @@ class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.unicode_escape_encode(input, self.errors)[0] -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.unicode_escape_decode(input, self.errors)[0] +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, input, errors, final): + return codecs.unicode_escape_decode(input, errors, final) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): - pass + def decode(self, input, errors='strict'): + return codecs.unicode_escape_decode(input, errors, False) ### encodings module API diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index f1a149f..288a300 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -114,7 +114,7 @@ class ReadTest(MixInCheckStateHandling): q = Queue(b"") r = codecs.getreader(self.encoding)(q) result = "" - for (c, partialresult) in zip(input.encode(self.encoding), partialresults): + for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True): q.write(bytes([c])) result += r.read() self.assertEqual(result, partialresult) @@ -125,7 +125,7 @@ class ReadTest(MixInCheckStateHandling): # do the check again, this time using an incremental decoder d = codecs.getincrementaldecoder(self.encoding)() result = "" - for (c, partialresult) in zip(input.encode(self.encoding), partialresults): + for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True): result += d.decode(bytes([c])) self.assertEqual(result, partialresult) # check that there's nothing left in the buffers @@ -135,7 +135,7 @@ class ReadTest(MixInCheckStateHandling): # Check whether the reset method works properly d.reset() result = "" - for (c, partialresult) in zip(input.encode(self.encoding), partialresults): + for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True): result += d.decode(bytes([c])) self.assertEqual(result, partialresult) # check that there's nothing left in the buffers @@ -2353,7 +2353,11 @@ class TypesTest(unittest.TestCase): (r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10)) -class UnicodeEscapeTest(unittest.TestCase): +class UnicodeEscapeTest(ReadTest, unittest.TestCase): + encoding = "unicode-escape" + + test_lone_surrogates = None + def test_empty(self): self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0)) self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0)) @@ -2440,6 +2444,44 @@ class UnicodeEscapeTest(unittest.TestCase): self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10)) self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10)) + def test_partial(self): + self.check_partial( + "\x00\t\n\r\\\xff\uffff\U00010000", + [ + '', + '', + '', + '\x00', + '\x00', + '\x00\t', + '\x00\t', + '\x00\t\n', + '\x00\t\n', + '\x00\t\n\r', + '\x00\t\n\r', + '\x00\t\n\r\\', + '\x00\t\n\r\\', + '\x00\t\n\r\\', + '\x00\t\n\r\\', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff\U00010000', + ] + ) class RawUnicodeEscapeTest(unittest.TestCase): def test_empty(self): |