summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-10-13 13:54:50 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2002-10-13 13:54:50 (GMT)
commit6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0 (patch)
tree605866d9f1c5a661320bbfc235bd091765c9458c
parentb24e3477dc0f9895573a37b8fcedcf65dc99ef2c (diff)
downloadcpython-6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0.zip
cpython-6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0.tar.gz
cpython-6f6873baa1b0bad2cf91ae58982ca161b3e8a5b0.tar.bz2
Patch #611760: read archives with comments.
-rw-r--r--Lib/zipfile.py71
1 files changed, 48 insertions, 23 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 2df16ba..4f2b946 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -66,21 +66,51 @@ _FH_FILENAME_LENGTH = 10
_FH_EXTRA_FIELD_LENGTH = 11
def is_zipfile(filename):
- """Quickly see if file is a ZIP file by checking the magic number.
-
- Will not accept a ZIP archive with an ending comment.
- """
+ """Quickly see if file is a ZIP file by checking the magic number."""
try:
fpin = open(filename, "rb")
- fpin.seek(-22, 2) # Seek to end-of-file record
- endrec = fpin.read()
+ endrec = _EndRecData(fpin)
fpin.close()
- if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
+ if endrec:
return True # file has correct magic number
except IOError:
pass
return False
+def _EndRecData(fpin):
+ """Return data from the "End of Central Directory" record, or None.
+
+ The data is a list of the nine items in the ZIP "End of central dir"
+ record followed by a tenth item, the file seek offset of this record."""
+ fpin.seek(-22, 2) # Assume no archive comment.
+ filesize = fpin.tell() + 22 # Get file size
+ data = fpin.read()
+ if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
+ endrec = struct.unpack(structEndArchive, data)
+ endrec = list(endrec)
+ endrec.append("") # Append the archive comment
+ endrec.append(filesize - 22) # Append the record start offset
+ return endrec
+ # Search the last END_BLOCK bytes of the file for the record signature.
+ # The comment is appended to the ZIP file and has a 16 bit length.
+ # So the comment may be up to 64K long. We limit the search for the
+ # signature to a few Kbytes at the end of the file for efficiency.
+ # also, the signature must not appear in the comment.
+ END_BLOCK = min(filesize, 1024 * 4)
+ fpin.seek(filesize - END_BLOCK, 0)
+ data = fpin.read()
+ start = data.rfind(stringEndArchive)
+ if start >= 0: # Correct signature string was found
+ endrec = struct.unpack(structEndArchive, data[start:start+22])
+ endrec = list(endrec)
+ comment = data[start+22:]
+ if endrec[7] == len(comment): # Comment length checks out
+ # Append the archive comment and start offset
+ endrec.append(comment)
+ endrec.append(filesize - END_BLOCK + start)
+ return endrec
+ return # Error, return None
+
class ZipInfo:
"""Class with attributes describing each file in the ZIP archive."""
@@ -183,16 +213,12 @@ class ZipFile:
elif key == 'w':
pass
elif key == 'a':
- fp = self.fp
- fp.seek(-22, 2) # Seek to end-of-file record
- endrec = fp.read()
- if endrec[0:4] == stringEndArchive and \
- endrec[-2:] == "\000\000":
- self._GetContents() # file is a zip file
+ try: # See if file is a zip file
+ self._RealGetContents()
# seek to start of directory and overwrite
- fp.seek(self.start_dir, 0)
- else: # file is not a zip file, just append
- fp.seek(0, 2)
+ self.fp.seek(self.start_dir, 0)
+ except BadZipfile: # file is not a zip file, just append
+ self.fp.seek(0, 2)
else:
if not self._filePassed:
self.fp.close()
@@ -213,17 +239,16 @@ class ZipFile:
def _RealGetContents(self):
"""Read in the table of contents for the ZIP file."""
fp = self.fp
- fp.seek(-22, 2) # Start of end-of-archive record
- filesize = fp.tell() + 22 # Get file size
- endrec = fp.read(22) # Archive must not end with a comment!
- if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
- raise BadZipfile, "File is not a zip file, or ends with a comment"
- endrec = struct.unpack(structEndArchive, endrec)
+ endrec = _EndRecData(fp)
+ if not endrec:
+ raise BadZipfile, "File is not a zip file"
if self.debug > 1:
print endrec
size_cd = endrec[5] # bytes in central directory
offset_cd = endrec[6] # offset of central directory
- x = filesize - 22 - size_cd
+ self.comment = endrec[8] # archive comment
+ # endrec[9] is the offset of the "End of Central Dir" record
+ x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd
if self.debug > 2: