From 37d3ff14871a25429fb93167aeace0589be45426 Mon Sep 17 00:00:00 2001 From: Nadeem Vawda Date: Sun, 5 Aug 2012 02:19:09 +0200 Subject: #15546: Fix {GzipFile,LZMAFile}.read1()'s handling of pathological input data. --- Lib/gzip.py | 5 ++++- Lib/lzma.py | 51 ++++++++++++++++++++++++++++----------------------- Misc/NEWS | 3 +++ 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 8b89426..5bcfe61 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -385,7 +385,10 @@ class GzipFile(io.BufferedIOBase): return b'' try: - self._read() + # For certain input data, a single call to _read() may not return + # any data. In this case, retry until we get some data or reach EOF. + while self.extrasize <= 0: + self._read() except EOFError: pass if size < 0 or size > self.extrasize: diff --git a/Lib/lzma.py b/Lib/lzma.py index ffcf812..1a1b065 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -204,29 +204,31 @@ class LZMAFile(io.BufferedIOBase): # Fill the readahead buffer if it is empty. Returns False on EOF. def _fill_buffer(self): - if self._buffer: - return True - - if self._decompressor.unused_data: - rawblock = self._decompressor.unused_data - else: - rawblock = self._fp.read(_BUFFER_SIZE) - - if not rawblock: - if self._decompressor.eof: - self._mode = _MODE_READ_EOF - self._size = self._pos - return False + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while True: + if self._buffer: + return True + + if self._decompressor.unused_data: + rawblock = self._decompressor.unused_data else: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") - - # Continue to next stream. - if self._decompressor.eof: - self._decompressor = LZMADecompressor(**self._init_args) + rawblock = self._fp.read(_BUFFER_SIZE) + + if not rawblock: + if self._decompressor.eof: + self._mode = _MODE_READ_EOF + self._size = self._pos + return False + else: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + # Continue to next stream. + if self._decompressor.eof: + self._decompressor = LZMADecompressor(**self._init_args) - self._buffer = self._decompressor.decompress(rawblock) - return True + self._buffer = self._decompressor.decompress(rawblock) # Read data until EOF. # If return_data is false, consume the data without returning it. @@ -284,11 +286,14 @@ class LZMAFile(io.BufferedIOBase): return self._read_block(size) def read1(self, size=-1): - """Read up to size uncompressed bytes with at most one read - from the underlying stream. + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Returns b"" if the file is at EOF. """ + # Usually, read1() calls _fp.read() at most once. However, sometimes + # this does not give enough data for the decompressor to make progress. + # In this case we make multiple reads, to avoid returning b"". self._check_can_read() if (size == 0 or self._mode == _MODE_READ_EOF or not self._fill_buffer()): diff --git a/Misc/NEWS b/Misc/NEWS index ee88215..99aac25 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -77,6 +77,9 @@ Core and Builtins Library ------- +- Issue #15546: Fix handling of pathological input data in the read1() method of + the BZ2File, GzipFile and LZMAFile classes. + - Issue #13052: Fix IDLE crashing when replace string in Search/Replace dialog ended with '\'. Patch by Roger Serwy. -- cgit v0.12