#15546: Fix {GzipFile,LZMAFile}.read1()'s handling of pathological input data.

author: Nadeem Vawda <nadeem.vawda@gmail.com> 2012-08-05 00:19:09 (GMT)
committer: Nadeem Vawda <nadeem.vawda@gmail.com> 2012-08-05 00:19:09 (GMT)
commit: 37d3ff14871a25429fb93167aeace0589be45426 (patch)
tree: f0053233c44a4013f6f157e4c30984cc83089cbb /Lib
parent: 9c92a691e1f0f889fffd22a48d02ceaea38c05c6 (diff)
download: cpython-37d3ff14871a25429fb93167aeace0589be45426.zip
cpython-37d3ff14871a25429fb93167aeace0589be45426.tar.gz
cpython-37d3ff14871a25429fb93167aeace0589be45426.tar.bz2
2 files changed, 32 insertions, 24 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 8b89426..5bcfe61 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -385,7 +385,10 @@ class GzipFile(io.BufferedIOBase):
             return b''
 
         try:
-            self._read()
+            # For certain input data, a single call to _read() may not return
+            # any data. In this case, retry until we get some data or reach EOF.
+            while self.extrasize <= 0:
+                self._read()
         except EOFError:
             pass
         if size < 0 or size > self.extrasize:
diff --git a/Lib/lzma.py b/Lib/lzma.py
index ffcf812..1a1b065 100644
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -204,29 +204,31 @@ class LZMAFile(io.BufferedIOBase):
 
     # Fill the readahead buffer if it is empty. Returns False on EOF.
     def _fill_buffer(self):
-        if self._buffer:
-            return True
-
-        if self._decompressor.unused_data:
-            rawblock = self._decompressor.unused_data
-        else:
-            rawblock = self._fp.read(_BUFFER_SIZE)
-
-        if not rawblock:
-            if self._decompressor.eof:
-                self._mode = _MODE_READ_EOF
-                self._size = self._pos
-                return False
+        # Depending on the input data, our call to the decompressor may not
+        # return any data. In this case, try again after reading another block.
+        while True:
+            if self._buffer:
+                return True
+
+            if self._decompressor.unused_data:
+                rawblock = self._decompressor.unused_data
             else:
-                raise EOFError("Compressed file ended before the "
-                               "end-of-stream marker was reached")
-
-        # Continue to next stream.
-        if self._decompressor.eof:
-            self._decompressor = LZMADecompressor(**self._init_args)
+                rawblock = self._fp.read(_BUFFER_SIZE)
+
+            if not rawblock:
+                if self._decompressor.eof:
+                    self._mode = _MODE_READ_EOF
+                    self._size = self._pos
+                    return False
+                else:
+                    raise EOFError("Compressed file ended before the "
+                                   "end-of-stream marker was reached")
+
+            # Continue to next stream.
+            if self._decompressor.eof:
+                self._decompressor = LZMADecompressor(**self._init_args)
 
-        self._buffer = self._decompressor.decompress(rawblock)
-        return True
+            self._buffer = self._decompressor.decompress(rawblock)
 
     # Read data until EOF.
     # If return_data is false, consume the data without returning it.
@@ -284,11 +286,14 @@ class LZMAFile(io.BufferedIOBase):
             return self._read_block(size)
 
     def read1(self, size=-1):
-        """Read up to size uncompressed bytes with at most one read
-        from the underlying stream.
+        """Read up to size uncompressed bytes, while trying to avoid
+        making multiple reads from the underlying stream.
 
         Returns b"" if the file is at EOF.
         """
+        # Usually, read1() calls _fp.read() at most once. However, sometimes
+        # this does not give enough data for the decompressor to make progress.
+        # In this case we make multiple reads, to avoid returning b"".
         self._check_can_read()
         if (size == 0 or self._mode == _MODE_READ_EOF or
             not self._fill_buffer()):
author	Nadeem Vawda <nadeem.vawda@gmail.com>	2012-08-05 00:19:09 (GMT)
committer	Nadeem Vawda <nadeem.vawda@gmail.com>	2012-08-05 00:19:09 (GMT)
commit	37d3ff14871a25429fb93167aeace0589be45426 (patch)
tree	f0053233c44a4013f6f157e4c30984cc83089cbb /Lib
parent	9c92a691e1f0f889fffd22a48d02ceaea38c05c6 (diff)
download	cpython-37d3ff14871a25429fb93167aeace0589be45426.zip cpython-37d3ff14871a25429fb93167aeace0589be45426.tar.gz cpython-37d3ff14871a25429fb93167aeace0589be45426.tar.bz2