From 8003850e22788c9e02b8dea7076858e01aa24f65 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 26 Jan 2014 19:21:00 +0200 Subject: Issue #8260: The read(), readline() and readlines() methods of codecs.StreamReader returned incomplete data when were called after readline() or read(size). Based on patch by Amaury Forgeot d'Arc. --- Lib/codecs.py | 13 ++++++------- Lib/test/test_codecs.py | 36 ++++++++++++++++++++++++++++++++++-- Misc/NEWS | 4 ++++ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/Lib/codecs.py b/Lib/codecs.py index 6a6eb90..01ae0f3 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -463,15 +463,12 @@ class StreamReader(Codec): # read until we get the required number of characters (if available) while True: # can the request be satisfied from the character buffer? - if chars < 0: - if size < 0: - if self.charbuffer: - break - elif len(self.charbuffer) >= size: - break - else: + if chars >= 0: if len(self.charbuffer) >= chars: break + elif size >= 0: + if len(self.charbuffer) >= size: + break # we need more data if size < 0: newdata = self.stream.read() @@ -479,6 +476,8 @@ class StreamReader(Codec): newdata = self.stream.read(size) # decode bytes (those remaining from the last call included) data = self.bytebuffer + newdata + if not data: + break try: newchars, decodedbytes = self.decode(data, self.errors) except UnicodeDecodeError as exc: diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 3517057..5c51ef5 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -173,6 +173,40 @@ class ReadTest(MixInCheckStateHandling): size*"a", ) + def test_mixed_readline_and_read(self): + lines = ["Humpty Dumpty sat on a wall,\n", + "Humpty Dumpty had a great fall.\r\n", + "All the king's horses and all the king's men\r", + "Couldn't put Humpty together again."] + data = ''.join(lines) + def getreader(): + stream = io.BytesIO(data.encode(self.encoding)) + return codecs.getreader(self.encoding)(stream) + + # Issue #8260: Test readline() followed by read() + f = getreader() + self.assertEqual(f.readline(), lines[0]) + self.assertEqual(f.read(), ''.join(lines[1:])) + self.assertEqual(f.read(), '') + + # Issue #16636: Test readline() followed by readlines() + f = getreader() + self.assertEqual(f.readline(), lines[0]) + self.assertEqual(f.readlines(), lines[1:]) + self.assertEqual(f.read(), '') + + # Test read() followed by read() + f = getreader() + self.assertEqual(f.read(size=40, chars=5), data[:5]) + self.assertEqual(f.read(), data[5:]) + self.assertEqual(f.read(), '') + + # Issue #12446: Test read() followed by readlines() + f = getreader() + self.assertEqual(f.read(size=40, chars=5), data[:5]) + self.assertEqual(f.readlines(), [lines[0][5:]] + lines[1:]) + self.assertEqual(f.read(), '') + def test_bug1175396(self): s = [ '<%!--===================================================\r\n', @@ -2307,8 +2341,6 @@ class TransformCodecTest(unittest.TestCase): def test_readline(self): for encoding in bytes_transform_encodings: - if encoding in ['uu_codec', 'zlib_codec']: - continue sin = codecs.encode(b"\x80", encoding) reader = codecs.getreader(encoding)(io.BytesIO(sin)) sout = reader.readline() diff --git a/Misc/NEWS b/Misc/NEWS index ea6f1d7..89610c9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -50,6 +50,10 @@ Core and Builtins Library ------- +- Issue #8260: The read(), readline() and readlines() methods of + codecs.StreamReader returned incomplete data when were called after + readline() or read(size). Based on patch by Amaury Forgeot d'Arc. + - Issue #20317: ExitStack.__exit__ could create a self-referential loop if an exception raised by a cleanup operation already had its context set correctly (for example, by the @contextmanager decorator). The infinite -- cgit v0.12