diff options
-rw-r--r-- | Lib/tarfile.py | 9 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 25 | ||||
-rw-r--r-- | Misc/NEWS | 5 |
3 files changed, 34 insertions, 5 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 4264da3..3a3d2c9 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -639,12 +639,11 @@ class _BZ2Proxy(object): def read(self, size): x = len(self.buf) while x < size: - try: - raw = self.fileobj.read(self.blocksize) - data = self.bz2obj.decompress(raw) - self.buf += data - except EOFError: + raw = self.fileobj.read(self.blocksize) + if not raw: break + data = self.bz2obj.decompress(raw) + self.buf += data x += len(data) buf = self.buf[:size] diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index affbeeb..f4c0035 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1121,6 +1121,30 @@ class Bz2WriteTest(WriteTest): class Bz2StreamWriteTest(StreamWriteTest): mode = "w|bz2" +class Bz2PartialReadTest(unittest.TestCase): + # Issue5068: The _BZ2Proxy.read() method loops forever + # on an empty or partial bzipped file. + + def _test_partial_input(self, mode): + class MyBytesIO(io.BytesIO): + hit_eof = False + def read(self, n): + if self.hit_eof: + raise AssertionError("infinite loop detected in tarfile.open()") + self.hit_eof = self.tell() == len(self.getvalue()) + return super(MyBytesIO, self).read(n) + + data = bz2.compress(tarfile.TarInfo("foo").tobuf()) + for x in range(len(data) + 1): + tarfile.open(fileobj=MyBytesIO(data[:x]), mode=mode) + + def test_partial_input(self): + self._test_partial_input("r") + + def test_partial_input_bz2(self): + self._test_partial_input("r:bz2") + + def test_main(): if not os.path.exists(TEMPDIR): os.mkdir(TEMPDIR) @@ -1178,6 +1202,7 @@ def test_main(): Bz2StreamReadTest, Bz2WriteTest, Bz2StreamWriteTest, + Bz2PartialReadTest, ] try: @@ -32,6 +32,11 @@ Core and Builtins Library ------- +- Issue #5068: Fixed the tarfile._BZ2Proxy.read() method that would loop + forever on incomplete input. That caused tarfile.open() to hang when used + with mode 'r' or 'r:bz2' and a fileobj argument that contained no data or + partial bzip2 compressed data. + - Issue #2110: Add support for thousands separator and 'n' type specifier to Decimal.__format__ |