diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-22 15:01:59 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-22 15:01:59 (GMT) |
commit | 7c3922f44c226eac29a497648bbc3cc8702905a8 (patch) | |
tree | 395bde42013572a48d3cf30ddc6d28fdd0cc1cb7 | |
parent | fc6e8aabf58d748369e0d3b08495ac35a67d2870 (diff) | |
download | cpython-7c3922f44c226eac29a497648bbc3cc8702905a8.zip cpython-7c3922f44c226eac29a497648bbc3cc8702905a8.tar.gz cpython-7c3922f44c226eac29a497648bbc3cc8702905a8.tar.bz2 |
Issue #1159051: GzipFile now raises EOFError when reading a corrupted file
with truncated header or footer.
Added tests for reading truncated gzip and bzip2 files.
-rw-r--r-- | Lib/gzip.py | 72 | ||||
-rw-r--r-- | Lib/test/test_bz2.py | 18 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 15 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
4 files changed, 70 insertions, 38 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index 6aacc9a..8fb1ed0 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -33,9 +33,6 @@ def write32u(output, value): # or unsigned. output.write(struct.pack("<L", value)) -def read32(input): - return struct.unpack("<I", input.read(4))[0] - def open(filename, mode="rb", compresslevel=9): """Shorthand for GzipFile(filename, mode, compresslevel). @@ -259,27 +256,31 @@ class GzipFile(io.BufferedIOBase): self.crc = zlib.crc32(b"") & 0xffffffff self.size = 0 + def _read_exact(self, n): + data = self.fileobj.read(n) + while len(data) < n: + b = self.fileobj.read(n - len(data)) + if not b: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + data += b + return data + def _read_gzip_header(self): magic = self.fileobj.read(2) if magic == b'': - raise EOFError("Reached EOF") + return False if magic != b'\037\213': raise IOError('Not a gzipped file') - method = ord( self.fileobj.read(1) ) + + method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8)) if method != 8: raise IOError('Unknown compression method') - flag = ord( self.fileobj.read(1) ) - self.mtime = read32(self.fileobj) - # extraflag = self.fileobj.read(1) - # os = self.fileobj.read(1) - self.fileobj.read(2) if flag & FEXTRA: # Read & discard the extra field, if present - xlen = ord(self.fileobj.read(1)) - xlen = xlen + 256*ord(self.fileobj.read(1)) - self.fileobj.read(xlen) + self._read_exact(struct.unpack("<H", self._read_exact(2))) if flag & FNAME: # Read and discard a null-terminated string containing the filename while True: @@ -293,12 +294,13 @@ class GzipFile(io.BufferedIOBase): if not s or s==b'\000': break if flag & FHCRC: - self.fileobj.read(2) # Read & discard the 16-bit header CRC + self._read_exact(2) # Read & discard the 16-bit header CRC unused = self.fileobj.unused() if unused: uncompress = self.decompress.decompress(unused) self._add_read_data(uncompress) + return True def write(self,data): self._check_closed() @@ -332,20 +334,16 @@ class GzipFile(io.BufferedIOBase): readsize = 1024 if size < 0: # get the whole thing - try: - while True: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - size = self.extrasize + while self._read(readsize): + readsize = min(self.max_read_chunk, readsize * 2) + size = self.extrasize else: # just get some more of it - try: - while size > self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: - size = self.extrasize + while size > self.extrasize: + if not self._read(readsize): + if size > self.extrasize: + size = self.extrasize + break + readsize = min(self.max_read_chunk, readsize * 2) offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] @@ -366,12 +364,9 @@ class GzipFile(io.BufferedIOBase): if self.extrasize == 0: if self.fileobj is None: return b'' - try: - # Ensure that we don't return b"" if we haven't reached EOF. - while self.extrasize == 0: - # 1024 is the same buffering heuristic used in read() - self._read(max(n, 1024)) - except EOFError: + # Ensure that we don't return b"" if we haven't reached EOF. + # 1024 is the same buffering heuristic used in read() + while self.extrasize == 0 and self._read(max(n, 1024)): pass offset = self.offset - self.extrastart remaining = self.extrasize @@ -384,13 +379,14 @@ class GzipFile(io.BufferedIOBase): def _read(self, size=1024): if self.fileobj is None: - raise EOFError("Reached EOF") + return False if self._new_member: # If the _new_member flag is set, we have to # jump to the next member, if there is one. self._init_read() - self._read_gzip_header() + if not self._read_gzip_header(): + return False self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self._new_member = False @@ -407,7 +403,7 @@ class GzipFile(io.BufferedIOBase): self.fileobj.prepend(self.decompress.unused_data, True) self._read_eof() self._add_read_data( uncompress ) - raise EOFError('Reached EOF') + return False uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) @@ -423,6 +419,7 @@ class GzipFile(io.BufferedIOBase): # a new member on the next call self._read_eof() self._new_member = True + return True def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffff @@ -437,8 +434,7 @@ class GzipFile(io.BufferedIOBase): # We check the that the computed CRC and size of the # uncompressed data matches the stored values. Note that the size # stored is the true file size mod 2**32. - crc32 = read32(self.fileobj) - isize = read32(self.fileobj) # may exceed 2GB + crc32, isize = struct.unpack("<II", self._read_exact(8)) if crc32 != self.crc: raise IOError("CRC check failed %s != %s" % (hex(crc32), hex(self.crc))) diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 977d10b..fb104d7 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -295,6 +295,24 @@ class BZ2FileTest(BaseTest): self.assertRaises(ValueError, f.readline) self.assertRaises(ValueError, f.readlines) + def test_read_truncated(self): + # Drop the eos_magic field (6 bytes) and CRC (4 bytes). + truncated = self.DATA[:-10] + with open(self.filename, 'wb') as f: + f.write(truncated) + with BZ2File(self.filename) as f: + self.assertRaises(EOFError, f.read) + with BZ2File(self.filename) as f: + self.assertEqual(f.read(len(self.TEXT)), self.TEXT) + self.assertRaises(EOFError, f.read, 1) + # Incomplete 4-byte file header, and block header of at least 146 bits. + for i in range(22): + with open(self.filename, 'wb') as f: + f.write(truncated[:i]) + with BZ2File(self.filename) as f: + self.assertRaises(EOFError, f.read, 1) + + class BZ2CompressorTest(BaseTest): def testCompress(self): # "Test BZ2Compressor.compress()/flush()" diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index ced226f..ba9d7da 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -365,6 +365,21 @@ class TestGzip(unittest.TestCase): datac = gzip.compress(data) self.assertEqual(gzip.decompress(datac), data) + def test_read_truncated(self): + data = data1*50 + # Drop the CRC (4 bytes) and file size (4 bytes). + truncated = gzip.compress(data)[:-8] + with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f: + self.assertRaises(EOFError, f.read) + with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f: + self.assertEqual(f.read(len(data)), data) + self.assertRaises(EOFError, f.read, 1) + # Incomplete 10-byte header. + for i in range(2, 10): + with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f: + self.assertRaises(EOFError, f.read, 1) + + def test_main(verbose=None): support.run_unittest(TestGzip) @@ -202,6 +202,9 @@ Core and Builtins Library ------- +- Issue #1159051: GzipFile now raises EOFError when reading a corrupted file + with truncated header or footer. + - Issue #15861: tkinter now correctly works with lists and tuples containing strings with whitespaces, backslashes or unbalanced braces. |