summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2021-10-14 10:17:00 (GMT)
committerGitHub <noreply@github.com>2021-10-14 10:17:00 (GMT)
commitc96d1546b11b4c282a7e21737cb1f5d16349656d (patch)
tree5e6e49378a4207591316f08e6b7fd42d4e3ff40c /Lib
parente71662c1ae817e728233ce93882c5b20f4c31ebc (diff)
downloadcpython-c96d1546b11b4c282a7e21737cb1f5d16349656d.zip
cpython-c96d1546b11b4c282a7e21737cb1f5d16349656d.tar.gz
cpython-c96d1546b11b4c282a7e21737cb1f5d16349656d.tar.bz2
bpo-45461: Fix IncrementalDecoder and StreamReader in the "unicode-escape" codec (GH-28939)
They support now splitting escape sequences between input chunks. Add the third parameter "final" in codecs.unicode_escape_decode(). It is True by default to match the former behavior.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/encodings/unicode_escape.py9
-rw-r--r--Lib/test/test_codecs.py50
2 files changed, 51 insertions, 8 deletions
diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py
index 817f932..9b1ce99b 100644
--- a/Lib/encodings/unicode_escape.py
+++ b/Lib/encodings/unicode_escape.py
@@ -21,15 +21,16 @@ class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.unicode_escape_encode(input, self.errors)[0]
-class IncrementalDecoder(codecs.IncrementalDecoder):
- def decode(self, input, final=False):
- return codecs.unicode_escape_decode(input, self.errors)[0]
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ def _buffer_decode(self, input, errors, final):
+ return codecs.unicode_escape_decode(input, errors, final)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
- pass
+ def decode(self, input, errors='strict'):
+ return codecs.unicode_escape_decode(input, errors, False)
### encodings module API
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index f1a149f..288a300 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -114,7 +114,7 @@ class ReadTest(MixInCheckStateHandling):
q = Queue(b"")
r = codecs.getreader(self.encoding)(q)
result = ""
- for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
+ for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
q.write(bytes([c]))
result += r.read()
self.assertEqual(result, partialresult)
@@ -125,7 +125,7 @@ class ReadTest(MixInCheckStateHandling):
# do the check again, this time using an incremental decoder
d = codecs.getincrementaldecoder(self.encoding)()
result = ""
- for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
+ for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
result += d.decode(bytes([c]))
self.assertEqual(result, partialresult)
# check that there's nothing left in the buffers
@@ -135,7 +135,7 @@ class ReadTest(MixInCheckStateHandling):
# Check whether the reset method works properly
d.reset()
result = ""
- for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
+ for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
result += d.decode(bytes([c]))
self.assertEqual(result, partialresult)
# check that there's nothing left in the buffers
@@ -2353,7 +2353,11 @@ class TypesTest(unittest.TestCase):
(r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))
-class UnicodeEscapeTest(unittest.TestCase):
+class UnicodeEscapeTest(ReadTest, unittest.TestCase):
+ encoding = "unicode-escape"
+
+ test_lone_surrogates = None
+
def test_empty(self):
self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
@@ -2440,6 +2444,44 @@ class UnicodeEscapeTest(unittest.TestCase):
self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
+ def test_partial(self):
+ self.check_partial(
+ "\x00\t\n\r\\\xff\uffff\U00010000",
+ [
+ '',
+ '',
+ '',
+ '\x00',
+ '\x00',
+ '\x00\t',
+ '\x00\t',
+ '\x00\t\n',
+ '\x00\t\n',
+ '\x00\t\n\r',
+ '\x00\t\n\r',
+ '\x00\t\n\r\\',
+ '\x00\t\n\r\\',
+ '\x00\t\n\r\\',
+ '\x00\t\n\r\\',
+ '\x00\t\n\r\\\xff',
+ '\x00\t\n\r\\\xff',
+ '\x00\t\n\r\\\xff',
+ '\x00\t\n\r\\\xff',
+ '\x00\t\n\r\\\xff',
+ '\x00\t\n\r\\\xff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff',
+ '\x00\t\n\r\\\xff\uffff\U00010000',
+ ]
+ )
class RawUnicodeEscapeTest(unittest.TestCase):
def test_empty(self):