diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-22 15:11:07 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-22 15:11:07 (GMT) |
commit | cc0172c00704e8292c90e02e776b0c193ca75477 (patch) | |
tree | 07543135f64a58a54b91ee7fd96de40acbc1780f | |
parent | 791c97a6a89e30d02fe4b0746daec61de44a5ad3 (diff) | |
parent | 57f9b7a12420d461e8ea5cc1ba63f80de778c7d5 (diff) | |
download | cpython-cc0172c00704e8292c90e02e776b0c193ca75477.zip cpython-cc0172c00704e8292c90e02e776b0c193ca75477.tar.gz cpython-cc0172c00704e8292c90e02e776b0c193ca75477.tar.bz2 |
Issue #1159051: GzipFile now raises EOFError when reading a corrupted file
with truncated header or footer.
Added tests for reading truncated gzip, bzip2, and lzma files.
-rw-r--r-- | Lib/gzip.py | 81 | ||||
-rw-r--r-- | Lib/test/test_bz2.py | 13 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 14 | ||||
-rw-r--r-- | Lib/test/test_lzma.py | 14 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
5 files changed, 81 insertions, 44 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index 698f0c2..d5b5743 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -65,9 +65,6 @@ def write32u(output, value): # or unsigned. output.write(struct.pack("<L", value)) -def read32(input): - return struct.unpack("<I", input.read(4))[0] - class _PaddedFile: """Minimal read-only file object that prepends a string to the contents of an actual file. Shouldn't be used outside of gzip.py, as it lacks @@ -281,27 +278,31 @@ class GzipFile(io.BufferedIOBase): self.crc = zlib.crc32(b"") & 0xffffffff self.size = 0 + def _read_exact(self, n): + data = self.fileobj.read(n) + while len(data) < n: + b = self.fileobj.read(n - len(data)) + if not b: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + data += b + return data + def _read_gzip_header(self): magic = self.fileobj.read(2) if magic == b'': - raise EOFError("Reached EOF") + return False if magic != b'\037\213': raise OSError('Not a gzipped file') - method = ord( self.fileobj.read(1) ) + + method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8)) if method != 8: raise OSError('Unknown compression method') - flag = ord( self.fileobj.read(1) ) - self.mtime = read32(self.fileobj) - # extraflag = self.fileobj.read(1) - # os = self.fileobj.read(1) - self.fileobj.read(2) if flag & FEXTRA: # Read & discard the extra field, if present - xlen = ord(self.fileobj.read(1)) - xlen = xlen + 256*ord(self.fileobj.read(1)) - self.fileobj.read(xlen) + self._read_exact(struct.unpack("<H", self._read_exact(2))) if flag & FNAME: # Read and discard a null-terminated string containing the filename while True: @@ -315,12 +316,13 @@ class GzipFile(io.BufferedIOBase): if not s or s==b'\000': break if flag & FHCRC: - self.fileobj.read(2) # Read & discard the 16-bit header CRC + self._read_exact(2) # Read & discard the 16-bit header CRC unused = self.fileobj.unused() if unused: uncompress = self.decompress.decompress(unused) self._add_read_data(uncompress) + return True def write(self,data): self._check_closed() @@ -354,20 +356,16 @@ class GzipFile(io.BufferedIOBase): readsize = 1024 if size < 0: # get the whole thing - try: - while True: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - size = self.extrasize + while self._read(readsize): + readsize = min(self.max_read_chunk, readsize * 2) + size = self.extrasize else: # just get some more of it - try: - while size > self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: - size = self.extrasize + while size > self.extrasize: + if not self._read(readsize): + if size > self.extrasize: + size = self.extrasize + break + readsize = min(self.max_read_chunk, readsize * 2) offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] @@ -385,12 +383,9 @@ class GzipFile(io.BufferedIOBase): if self.extrasize <= 0 and self.fileobj is None: return b'' - try: - # For certain input data, a single call to _read() may not return - # any data. In this case, retry until we get some data or reach EOF. - while self.extrasize <= 0: - self._read() - except EOFError: + # For certain input data, a single call to _read() may not return + # any data. In this case, retry until we get some data or reach EOF. + while self.extrasize <= 0 and self._read(): pass if size < 0 or size > self.extrasize: size = self.extrasize @@ -413,12 +408,9 @@ class GzipFile(io.BufferedIOBase): if self.extrasize == 0: if self.fileobj is None: return b'' - try: - # Ensure that we don't return b"" if we haven't reached EOF. - while self.extrasize == 0: - # 1024 is the same buffering heuristic used in read() - self._read(max(n, 1024)) - except EOFError: + # Ensure that we don't return b"" if we haven't reached EOF. + # 1024 is the same buffering heuristic used in read() + while self.extrasize == 0 and self._read(max(n, 1024)): pass offset = self.offset - self.extrastart remaining = self.extrasize @@ -431,13 +423,14 @@ class GzipFile(io.BufferedIOBase): def _read(self, size=1024): if self.fileobj is None: - raise EOFError("Reached EOF") + return False if self._new_member: # If the _new_member flag is set, we have to # jump to the next member, if there is one. self._init_read() - self._read_gzip_header() + if not self._read_gzip_header(): + return False self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self._new_member = False @@ -454,7 +447,7 @@ class GzipFile(io.BufferedIOBase): self.fileobj.prepend(self.decompress.unused_data, True) self._read_eof() self._add_read_data( uncompress ) - raise EOFError('Reached EOF') + return False uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) @@ -470,6 +463,7 @@ class GzipFile(io.BufferedIOBase): # a new member on the next call self._read_eof() self._new_member = True + return True def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffff @@ -484,8 +478,7 @@ class GzipFile(io.BufferedIOBase): # We check the that the computed CRC and size of the # uncompressed data matches the stored values. Note that the size # stored is the true file size mod 2**32. - crc32 = read32(self.fileobj) - isize = read32(self.fileobj) # may exceed 2GB + crc32, isize = struct.unpack("<II", self._read_exact(8)) if crc32 != self.crc: raise OSError("CRC check failed %s != %s" % (hex(crc32), hex(self.crc))) diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 8703df7..7090cd6 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -569,6 +569,19 @@ class BZ2FileTest(BaseTest): bz2f.seek(-150, 1) self.assertEqual(bz2f.read(), self.TEXT[500-150:]) + def test_read_truncated(self): + # Drop the eos_magic field (6 bytes) and CRC (4 bytes). + truncated = self.DATA[:-10] + with BZ2File(BytesIO(truncated)) as f: + self.assertRaises(EOFError, f.read) + with BZ2File(BytesIO(truncated)) as f: + self.assertEqual(f.read(len(self.TEXT)), self.TEXT) + self.assertRaises(EOFError, f.read, 1) + # Incomplete 4-byte file header, and block header of at least 146 bits. + for i in range(22): + with BZ2File(BytesIO(truncated[:i])) as f: + self.assertRaises(EOFError, f.read, 1) + class BZ2CompressorTest(BaseTest): def testCompress(self): diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index af73953..ebd4c43 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -389,6 +389,20 @@ class TestGzip(BaseTest): datac = gzip.compress(data) self.assertEqual(gzip.decompress(datac), data) + def test_read_truncated(self): + data = data1*50 + # Drop the CRC (4 bytes) and file size (4 bytes). + truncated = gzip.compress(data)[:-8] + with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f: + self.assertRaises(EOFError, f.read) + with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f: + self.assertEqual(f.read(len(data)), data) + self.assertRaises(EOFError, f.read, 1) + # Incomplete 10-byte header. + for i in range(2, 10): + with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f: + self.assertRaises(EOFError, f.read, 1) + class TestOpen(BaseTest): def test_binary_modes(self): diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index a13cf3b..4669ee2 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -669,6 +669,20 @@ class FileTestCase(unittest.TestCase): with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f: self.assertRaises(EOFError, f.read) + def test_read_truncated(self): + # Drop stream footer: CRC (4 bytes), index size (4 bytes), + # flagsĀ (2 bytes) and magic number (2 bytes). + truncated = COMPRESSED_XZ[:-12] + with LZMAFile(BytesIO(truncated)) as f: + self.assertRaises(EOFError, f.read) + with LZMAFile(BytesIO(truncated)) as f: + self.assertEqual(f.read(len(INPUT)), INPUT) + self.assertRaises(EOFError, f.read, 1) + # Incomplete 12-byte header. + for i in range(12): + with LZMAFile(BytesIO(truncated[:i])) as f: + self.assertRaises(EOFError, f.read, 1) + def test_read_bad_args(self): f = LZMAFile(BytesIO(COMPRESSED_XZ)) f.close() @@ -220,6 +220,9 @@ Core and Builtins Library ------- +- Issue #1159051: GzipFile now raises EOFError when reading a corrupted file + with truncated header or footer. + - Issue #16993: shutil.which() now preserves the case of the path and extension on Windows. |