diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2021-10-14 17:04:19 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-14 17:04:19 (GMT) |
commit | 39aa98346d5dd8ac591a7cafb467af21c53f1e5d (patch) | |
tree | d363b62f299171467fce0cd3e1fe155c2ca41a09 /Lib | |
parent | d413c503636cde2a6ab0ada25dccb0134633a8e6 (diff) | |
download | cpython-39aa98346d5dd8ac591a7cafb467af21c53f1e5d.zip cpython-39aa98346d5dd8ac591a7cafb467af21c53f1e5d.tar.gz cpython-39aa98346d5dd8ac591a7cafb467af21c53f1e5d.tar.bz2 |
bpo-45467: Fix IncrementalDecoder and StreamReader in the "raw-unicode-escape" codec (GH-28944)
They support now splitting escape sequences between input chunks.
Add the third parameter "final" in codecs.raw_unicode_escape_decode().
It is True by default to match the former behavior.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/encodings/raw_unicode_escape.py | 9 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 35 |
2 files changed, 39 insertions, 5 deletions
diff --git a/Lib/encodings/raw_unicode_escape.py b/Lib/encodings/raw_unicode_escape.py index 2b919b4..46c8e07 100644 --- a/Lib/encodings/raw_unicode_escape.py +++ b/Lib/encodings/raw_unicode_escape.py @@ -21,15 +21,16 @@ class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.raw_unicode_escape_encode(input, self.errors)[0] -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.raw_unicode_escape_decode(input, self.errors)[0] +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, input, errors, final): + return codecs.raw_unicode_escape_decode(input, errors, final) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): - pass + def decode(self, input, errors='strict'): + return codecs.raw_unicode_escape_decode(input, errors, False) ### encodings module API diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 288a300..506b51c 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2483,7 +2483,11 @@ class UnicodeEscapeTest(ReadTest, unittest.TestCase): ] ) -class RawUnicodeEscapeTest(unittest.TestCase): +class RawUnicodeEscapeTest(ReadTest, unittest.TestCase): + encoding = "raw-unicode-escape" + + test_lone_surrogates = None + def test_empty(self): self.assertEqual(codecs.raw_unicode_escape_encode(""), (b"", 0)) self.assertEqual(codecs.raw_unicode_escape_decode(b""), ("", 0)) @@ -2532,6 +2536,35 @@ class RawUnicodeEscapeTest(unittest.TestCase): self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10)) self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10)) + def test_partial(self): + self.check_partial( + "\x00\t\n\r\\\xff\uffff\U00010000", + [ + '\x00', + '\x00\t', + '\x00\t\n', + '\x00\t\n\r', + '\x00\t\n\r', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff', + '\x00\t\n\r\\\xff\uffff\U00010000', + ] + ) + class EscapeEncodeTest(unittest.TestCase): |