diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-01-26 17:27:56 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-01-26 17:27:56 (GMT) |
commit | dbe0982bc515cb1a881d4bf7728d265e58803bf0 (patch) | |
tree | b2d664e02a4fe763adc7b4d808ed18169551db69 /Lib | |
parent | 0742cae3357fcd7c41498b21060050e7cca788b1 (diff) | |
download | cpython-dbe0982bc515cb1a881d4bf7728d265e58803bf0.zip cpython-dbe0982bc515cb1a881d4bf7728d265e58803bf0.tar.gz cpython-dbe0982bc515cb1a881d4bf7728d265e58803bf0.tar.bz2 |
Issue #8260: The read(), readline() and readlines() methods of
codecs.StreamReader returned incomplete data when were called after
readline() or read(size). Based on patch by Amaury Forgeot d'Arc.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/codecs.py | 13 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 36 |
2 files changed, 40 insertions, 9 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index 2e2e755..c2065da 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -475,15 +475,12 @@ class StreamReader(Codec): # read until we get the required number of characters (if available) while True: # can the request be satisfied from the character buffer? - if chars < 0: - if size < 0: - if self.charbuffer: - break - elif len(self.charbuffer) >= size: - break - else: + if chars >= 0: if len(self.charbuffer) >= chars: break + elif size >= 0: + if len(self.charbuffer) >= size: + break # we need more data if size < 0: newdata = self.stream.read() @@ -491,6 +488,8 @@ class StreamReader(Codec): newdata = self.stream.read(size) # decode bytes (those remaining from the last call included) data = self.bytebuffer + newdata + if not data: + break try: newchars, decodedbytes = self.decode(data, self.errors) except UnicodeDecodeError as exc: diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a32ce76..3950c3b 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -175,6 +175,40 @@ class ReadTest(MixInCheckStateHandling): size*"a", ) + def test_mixed_readline_and_read(self): + lines = ["Humpty Dumpty sat on a wall,\n", + "Humpty Dumpty had a great fall.\r\n", + "All the king's horses and all the king's men\r", + "Couldn't put Humpty together again."] + data = ''.join(lines) + def getreader(): + stream = io.BytesIO(data.encode(self.encoding)) + return codecs.getreader(self.encoding)(stream) + + # Issue #8260: Test readline() followed by read() + f = getreader() + self.assertEqual(f.readline(), lines[0]) + self.assertEqual(f.read(), ''.join(lines[1:])) + self.assertEqual(f.read(), '') + + # Issue #16636: Test readline() followed by readlines() + f = getreader() + self.assertEqual(f.readline(), lines[0]) + self.assertEqual(f.readlines(), lines[1:]) + self.assertEqual(f.read(), '') + + # Test read() followed by read() + f = getreader() + self.assertEqual(f.read(size=40, chars=5), data[:5]) + self.assertEqual(f.read(), data[5:]) + self.assertEqual(f.read(), '') + + # Issue #12446: Test read() followed by readlines() + f = getreader() + self.assertEqual(f.read(size=40, chars=5), data[:5]) + self.assertEqual(f.readlines(), [lines[0][5:]] + lines[1:]) + self.assertEqual(f.read(), '') + def test_bug1175396(self): s = [ '<%!--===================================================\r\n', @@ -2370,8 +2404,6 @@ class TransformCodecTest(unittest.TestCase): def test_readline(self): for encoding in bytes_transform_encodings: - if encoding in ['uu_codec', 'zlib_codec']: - continue with self.subTest(encoding=encoding): sin = codecs.encode(b"\x80", encoding) reader = codecs.getreader(encoding)(io.BytesIO(sin)) |