diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-10-13 13:54:50 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-10-13 13:54:50 (GMT) |
commit | 6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0 (patch) | |
tree | 605866d9f1c5a661320bbfc235bd091765c9458c | |
parent | b24e3477dc0f9895573a37b8fcedcf65dc99ef2c (diff) | |
download | cpython-6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0.zip cpython-6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0.tar.gz cpython-6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0.tar.bz2 |
Patch #611760: read archives with comments.
-rw-r--r-- | Lib/zipfile.py | 71 |
1 files changed, 48 insertions, 23 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 2df16ba..4f2b946 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -66,21 +66,51 @@ _FH_FILENAME_LENGTH = 10 _FH_EXTRA_FIELD_LENGTH = 11 def is_zipfile(filename): - """Quickly see if file is a ZIP file by checking the magic number. - - Will not accept a ZIP archive with an ending comment. - """ + """Quickly see if file is a ZIP file by checking the magic number.""" try: fpin = open(filename, "rb") - fpin.seek(-22, 2) # Seek to end-of-file record - endrec = fpin.read() + endrec = _EndRecData(fpin) fpin.close() - if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000": + if endrec: return True # file has correct magic number except IOError: pass return False +def _EndRecData(fpin): + """Return data from the "End of Central Directory" record, or None. + + The data is a list of the nine items in the ZIP "End of central dir" + record followed by a tenth item, the file seek offset of this record.""" + fpin.seek(-22, 2) # Assume no archive comment. + filesize = fpin.tell() + 22 # Get file size + data = fpin.read() + if data[0:4] == stringEndArchive and data[-2:] == "\000\000": + endrec = struct.unpack(structEndArchive, data) + endrec = list(endrec) + endrec.append("") # Append the archive comment + endrec.append(filesize - 22) # Append the record start offset + return endrec + # Search the last END_BLOCK bytes of the file for the record signature. + # The comment is appended to the ZIP file and has a 16 bit length. + # So the comment may be up to 64K long. We limit the search for the + # signature to a few Kbytes at the end of the file for efficiency. + # also, the signature must not appear in the comment. + END_BLOCK = min(filesize, 1024 * 4) + fpin.seek(filesize - END_BLOCK, 0) + data = fpin.read() + start = data.rfind(stringEndArchive) + if start >= 0: # Correct signature string was found + endrec = struct.unpack(structEndArchive, data[start:start+22]) + endrec = list(endrec) + comment = data[start+22:] + if endrec[7] == len(comment): # Comment length checks out + # Append the archive comment and start offset + endrec.append(comment) + endrec.append(filesize - END_BLOCK + start) + return endrec + return # Error, return None + class ZipInfo: """Class with attributes describing each file in the ZIP archive.""" @@ -183,16 +213,12 @@ class ZipFile: elif key == 'w': pass elif key == 'a': - fp = self.fp - fp.seek(-22, 2) # Seek to end-of-file record - endrec = fp.read() - if endrec[0:4] == stringEndArchive and \ - endrec[-2:] == "\000\000": - self._GetContents() # file is a zip file + try: # See if file is a zip file + self._RealGetContents() # seek to start of directory and overwrite - fp.seek(self.start_dir, 0) - else: # file is not a zip file, just append - fp.seek(0, 2) + self.fp.seek(self.start_dir, 0) + except BadZipfile: # file is not a zip file, just append + self.fp.seek(0, 2) else: if not self._filePassed: self.fp.close() @@ -213,17 +239,16 @@ class ZipFile: def _RealGetContents(self): """Read in the table of contents for the ZIP file.""" fp = self.fp - fp.seek(-22, 2) # Start of end-of-archive record - filesize = fp.tell() + 22 # Get file size - endrec = fp.read(22) # Archive must not end with a comment! - if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000": - raise BadZipfile, "File is not a zip file, or ends with a comment" - endrec = struct.unpack(structEndArchive, endrec) + endrec = _EndRecData(fp) + if not endrec: + raise BadZipfile, "File is not a zip file" if self.debug > 1: print endrec size_cd = endrec[5] # bytes in central directory offset_cd = endrec[6] # offset of central directory - x = filesize - 22 - size_cd + self.comment = endrec[8] # archive comment + # endrec[9] is the offset of the "End of Central Dir" record + x = endrec[9] - size_cd # "concat" is zero, unless zip was concatenated to another file concat = x - offset_cd if self.debug > 2: |