summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadeem Vawda <nadeem.vawda@gmail.com>2012-08-05 00:19:09 (GMT)
committerNadeem Vawda <nadeem.vawda@gmail.com>2012-08-05 00:19:09 (GMT)
commit37d3ff14871a25429fb93167aeace0589be45426 (patch)
treef0053233c44a4013f6f157e4c30984cc83089cbb
parent9c92a691e1f0f889fffd22a48d02ceaea38c05c6 (diff)
downloadcpython-37d3ff14871a25429fb93167aeace0589be45426.zip
cpython-37d3ff14871a25429fb93167aeace0589be45426.tar.gz
cpython-37d3ff14871a25429fb93167aeace0589be45426.tar.bz2
#15546: Fix {GzipFile,LZMAFile}.read1()'s handling of pathological input data.
-rw-r--r--Lib/gzip.py5
-rw-r--r--Lib/lzma.py51
-rw-r--r--Misc/NEWS3
3 files changed, 35 insertions, 24 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 8b89426..5bcfe61 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -385,7 +385,10 @@ class GzipFile(io.BufferedIOBase):
return b''
try:
- self._read()
+ # For certain input data, a single call to _read() may not return
+ # any data. In this case, retry until we get some data or reach EOF.
+ while self.extrasize <= 0:
+ self._read()
except EOFError:
pass
if size < 0 or size > self.extrasize:
diff --git a/Lib/lzma.py b/Lib/lzma.py
index ffcf812..1a1b065 100644
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -204,29 +204,31 @@ class LZMAFile(io.BufferedIOBase):
# Fill the readahead buffer if it is empty. Returns False on EOF.
def _fill_buffer(self):
- if self._buffer:
- return True
-
- if self._decompressor.unused_data:
- rawblock = self._decompressor.unused_data
- else:
- rawblock = self._fp.read(_BUFFER_SIZE)
-
- if not rawblock:
- if self._decompressor.eof:
- self._mode = _MODE_READ_EOF
- self._size = self._pos
- return False
+ # Depending on the input data, our call to the decompressor may not
+ # return any data. In this case, try again after reading another block.
+ while True:
+ if self._buffer:
+ return True
+
+ if self._decompressor.unused_data:
+ rawblock = self._decompressor.unused_data
else:
- raise EOFError("Compressed file ended before the "
- "end-of-stream marker was reached")
-
- # Continue to next stream.
- if self._decompressor.eof:
- self._decompressor = LZMADecompressor(**self._init_args)
+ rawblock = self._fp.read(_BUFFER_SIZE)
+
+ if not rawblock:
+ if self._decompressor.eof:
+ self._mode = _MODE_READ_EOF
+ self._size = self._pos
+ return False
+ else:
+ raise EOFError("Compressed file ended before the "
+ "end-of-stream marker was reached")
+
+ # Continue to next stream.
+ if self._decompressor.eof:
+ self._decompressor = LZMADecompressor(**self._init_args)
- self._buffer = self._decompressor.decompress(rawblock)
- return True
+ self._buffer = self._decompressor.decompress(rawblock)
# Read data until EOF.
# If return_data is false, consume the data without returning it.
@@ -284,11 +286,14 @@ class LZMAFile(io.BufferedIOBase):
return self._read_block(size)
def read1(self, size=-1):
- """Read up to size uncompressed bytes with at most one read
- from the underlying stream.
+ """Read up to size uncompressed bytes, while trying to avoid
+ making multiple reads from the underlying stream.
Returns b"" if the file is at EOF.
"""
+ # Usually, read1() calls _fp.read() at most once. However, sometimes
+ # this does not give enough data for the decompressor to make progress.
+ # In this case we make multiple reads, to avoid returning b"".
self._check_can_read()
if (size == 0 or self._mode == _MODE_READ_EOF or
not self._fill_buffer()):
diff --git a/Misc/NEWS b/Misc/NEWS
index ee88215..99aac25 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -77,6 +77,9 @@ Core and Builtins
Library
-------
+- Issue #15546: Fix handling of pathological input data in the read1() method of
+ the BZ2File, GzipFile and LZMAFile classes.
+
- Issue #13052: Fix IDLE crashing when replace string in Search/Replace dialog
ended with '\'. Patch by Roger Serwy.