From 5f2a7bce2ed3dce23b9e167e17746ba3dbab9d58 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 12 Aug 2010 15:30:13 +0000 Subject: Merged revisions 83959-83960 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ........ r83959 | antoine.pitrou | 2010-08-12 17:11:50 +0200 (jeu., 12 août 2010) | 5 lines Issue #7467: when a file from a ZIP archive, its CRC is checked and a BadZipfile error is raised if it doesn't match (as used to be the case in Python 2.5 and earlier). ........ r83960 | antoine.pitrou | 2010-08-12 17:15:01 +0200 (jeu., 12 août 2010) | 3 lines Typo. ........ --- Lib/test/test_zipfile.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ Lib/zipfile.py | 24 +++++++++++++++-- Misc/NEWS | 4 +++ 3 files changed, 96 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 55cfaa9..474ec58 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -582,6 +582,27 @@ class PyZipFileTests(unittest.TestCase): class OtherTests(unittest.TestCase): + zips_with_bad_crc = { + zipfile.ZIP_STORED: ( + b'PK\003\004\024\0\0\0\0\0 \213\212;:r' + b'\253\377\f\0\0\0\f\0\0\0\005\0\0\000af' + b'ilehello,AworldP' + b'K\001\002\024\003\024\0\0\0\0\0 \213\212;:' + b'r\253\377\f\0\0\0\f\0\0\0\005\0\0\0\0' + b'\0\0\0\0\0\0\0\200\001\0\0\0\000afi' + b'lePK\005\006\0\0\0\0\001\0\001\0003\000' + b'\0\0/\0\0\0\0\0'), + zipfile.ZIP_DEFLATED: ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00n}\x0c=FA' + b'KE\x10\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00af' + b'ile\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\xc9\xa0' + b'=\x13\x00PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00n' + b'}\x0c=FAKE\x10\x00\x00\x00n\x00\x00\x00\x05' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01\x00\x00\x00' + b'\x00afilePK\x05\x06\x00\x00\x00\x00\x01\x00' + b'\x01\x003\x00\x00\x003\x00\x00\x00\x00\x00'), + } + def testUnicodeFilenames(self): zf = zipfile.ZipFile(TESTFN, "w") zf.writestr("foo.txt", "Test for unicode filename") @@ -809,6 +830,55 @@ class OtherTests(unittest.TestCase): self.assertEqual(zipfr.comment, comment2) zipfr.close() + def check_testzip_with_bad_crc(self, compression): + """Tests that files with bad CRCs return their name from testzip.""" + zipdata = self.zips_with_bad_crc[compression] + + zipf = zipfile.ZipFile(io.BytesIO(zipdata), mode="r") + # testzip returns the name of the first corrupt file, or None + self.assertEqual('afile', zipf.testzip()) + zipf.close() + + def test_testzip_with_bad_crc_stored(self): + self.check_testzip_with_bad_crc(zipfile.ZIP_STORED) + + if zlib: + def test_testzip_with_bad_crc_deflated(self): + self.check_testzip_with_bad_crc(zipfile.ZIP_DEFLATED) + + def check_read_with_bad_crc(self, compression): + """Tests that files with bad CRCs raise a BadZipfile exception when read.""" + zipdata = self.zips_with_bad_crc[compression] + + # Using ZipFile.read() + zipf = zipfile.ZipFile(io.BytesIO(zipdata), mode="r") + self.assertRaises(zipfile.BadZipfile, zipf.read, 'afile') + zipf.close() + + # Using ZipExtFile.read() + zipf = zipfile.ZipFile(io.BytesIO(zipdata), mode="r") + corrupt_file = zipf.open('afile', 'r') + self.assertRaises(zipfile.BadZipfile, corrupt_file.read) + corrupt_file.close() + zipf.close() + + # Same with small reads (in order to exercise the buffering logic) + zipf = zipfile.ZipFile(io.BytesIO(zipdata), mode="r") + corrupt_file = zipf.open('afile', 'r') + corrupt_file.MIN_READ_SIZE = 2 + with self.assertRaises(zipfile.BadZipfile): + while corrupt_file.read(2): + pass + corrupt_file.close() + zipf.close() + + def test_read_with_bad_crc_stored(self): + self.check_read_with_bad_crc(zipfile.ZIP_STORED) + + if zlib: + def test_read_with_bad_crc_deflated(self): + self.check_read_with_bad_crc(zipfile.ZIP_DEFLATED) + def tearDown(self): support.unlink(TESTFN) support.unlink(TESTFN2) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 79ca152..3d2d57b 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -472,6 +472,12 @@ class ZipExtFile: if self.compress_type == ZIP_DEFLATED: self.dc = zlib.decompressobj(-15) + if hasattr(zipinfo, 'CRC'): + self._expected_crc = zipinfo.CRC + self._running_crc = crc32(b'') & 0xffffffff + else: + self._expected_crc = None + def set_univ_newlines(self, univ_newlines): self.univ_newlines = univ_newlines @@ -565,6 +571,16 @@ class ZipExtFile: result.append(line) return result + def _update_crc(self, newdata, eof): + # Update the CRC using the given data. + if self._expected_crc is None: + # No need to compute the CRC if we don't have a reference value + return + self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff + # Check the CRC if we're at the end of the file + if eof and self._running_crc != self._expected_crc: + raise BadZipfile("Bad CRC-32 for file %r" % self.name) + def read(self, size = None): # act like file obj and return empty string if size is 0 if size == 0: @@ -628,9 +644,11 @@ class ZipExtFile: # prevent decompressor from being used again self.dc = None + self._update_crc(newdata, eof=( + self.compress_size == self.bytes_read and + len(self.rawbuffer) == 0)) self.readbuffer += newdata - # return what the user asked for if size is None or len(self.readbuffer) <= size: data = self.readbuffer @@ -1382,7 +1400,9 @@ def main(args = None): print(USAGE) sys.exit(1) zf = ZipFile(args[1], 'r') - zf.testzip() + badfile = zf.testzip() + if badfile: + print("The following enclosed file is corrupted: {!r}".format(badfile)) print("Done testing") elif args[0] == '-e': diff --git a/Misc/NEWS b/Misc/NEWS index fd30974..b386182 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -93,6 +93,10 @@ C-API Library ------- +- Issue #7467: when reading a file from a ZIP archive, its CRC is checked + and a BadZipfile error is raised if it doesn't match (as used to be the + case in Python 2.5 and earlier). + - Issue #9550: a BufferedReader could issue an additional read when the original read request had been satisfied, which could block indefinitely when the underlying raw IO channel was e.g. a socket. Report and original -- cgit v0.12