diff options
author | Guido van Rossum <guido@python.org> | 2007-05-22 01:29:33 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-05-22 01:29:33 (GMT) |
commit | d6ca5460914926e54c2a063728a6c785ced8a702 (patch) | |
tree | 2d56c7e93b4cc7b37233d6fd33b5b035d8445945 /Lib/zipfile.py | |
parent | 94ca1c620ebc63b0860579835f2c16fe056e1225 (diff) | |
download | cpython-d6ca5460914926e54c2a063728a6c785ced8a702.zip cpython-d6ca5460914926e54c2a063728a6c785ced8a702.tar.gz cpython-d6ca5460914926e54c2a063728a6c785ced8a702.tar.bz2 |
Make test_zipfile pass.
The zipfile module now does all I/O in binary mode using bytes.
(Maybe we should support wrapping a TextIOWrapper around it
when text mode reading is requested?)
Even the password is a bytes array now.
Had to fix py_compile.py to use bytes while I was at it.
The _struct needed a patch to support bytes, str8 and str
for the 's' and 'p' formats.
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r-- | Lib/zipfile.py | 112 |
1 files changed, 59 insertions, 53 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 1e180fc..6cff722 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -1,5 +1,7 @@ """ Read and write ZIP files. + +XXX references to utf-8 need further investigation. """ import struct, os, time, sys import binascii, io @@ -33,15 +35,15 @@ ZIP_DEFLATED = 8 # Here are some struct module formats for reading headers structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes -stringEndArchive = "PK\005\006" # magic number for end of archive record +stringEndArchive = b"PK\005\006" # magic number for end of archive record structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes -stringCentralDir = "PK\001\002" # magic number for central directory +stringCentralDir = b"PK\001\002" # magic number for central directory structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes -stringFileHeader = "PK\003\004" # magic number for file header +stringFileHeader = b"PK\003\004" # magic number for file header structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes -stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header +stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes -stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header +stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header # indexes of entries in the central directory structure @@ -82,7 +84,7 @@ _FH_EXTRA_FIELD_LENGTH = 11 def is_zipfile(filename): """Quickly see if file is a ZIP file by checking the magic number.""" try: - fpin = open(filename, "rb") + fpin = io.open(filename, "rb") endrec = _EndRecData(fpin) fpin.close() if endrec: @@ -206,8 +208,8 @@ class ZipInfo (object): self.date_time = date_time # year, month, day, hour, min, sec # Standard values: self.compress_type = ZIP_STORED # Type of compression for the file - self.comment = "" # Comment for each file - self.extra = "" # ZIP extra data + self.comment = b"" # Comment for each file + self.extra = b"" # ZIP extra data if sys.platform == 'win32': self.create_system = 0 # System which created ZIP archive else: @@ -257,7 +259,7 @@ class ZipInfo (object): self.compress_type, dostime, dosdate, CRC, compress_size, file_size, len(self.filename), len(extra)) - return header + self.filename + extra + return header + self.filename.encode("utf-8") + extra def _decodeExtra(self): # Try to decode the extra field. @@ -331,7 +333,7 @@ class _ZipDecrypter: def _crc32(self, ch, crc): """Compute the CRC32 primitive on one byte.""" - return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] + return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] def __init__(self, pwd): self.key0 = 305419896 @@ -344,20 +346,13 @@ class _ZipDecrypter: self.key0 = self._crc32(c, self.key0) self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 - self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) + self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2) def __call__(self, c): """Decrypt a single character.""" - # XXX When this is called with a byte instead of a char, ord() - # isn't needed. Don't die in that case. In the future we should - # just leave this out, once we're always using bytes. - try: - c = ord(c) - except TypeError: - pass + assert isinstance(c, int) k = self.key2 | 2 c = c ^ (((k * (k^1)) >> 8) & 255) - c = chr(c) self._UpdateKeys(c) return c @@ -370,13 +365,13 @@ class ZipExtFile: self.fileobj = fileobj self.decrypter = decrypt self.bytes_read = 0 - self.rawbuffer = '' - self.readbuffer = '' - self.linebuffer = '' + self.rawbuffer = b'' + self.readbuffer = b'' + self.linebuffer = b'' self.eof = False self.univ_newlines = False - self.nlSeps = ("\n", ) - self.lastdiscard = '' + self.nlSeps = (b"\n", ) + self.lastdiscard = b'' self.compress_type = zipinfo.compress_type self.compress_size = zipinfo.compress_size @@ -394,9 +389,9 @@ class ZipExtFile: self.univ_newlines = univ_newlines # pick line separator char(s) based on universal newlines flag - self.nlSeps = ("\n", ) + self.nlSeps = (b"\n", ) if self.univ_newlines: - self.nlSeps = ("\r\n", "\r", "\n") + self.nlSeps = (b"\r\n", b"\r", b"\n") def __iter__(self): return self @@ -417,7 +412,7 @@ class ZipExtFile: # ugly check for cases where half of an \r\n pair was # read on the last pass, and the \r was discarded. In this # case we just throw away the \n at the start of the buffer. - if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'): + if (self.lastdiscard, self.linebuffer[0]) == (b'\r', b'\n'): self.linebuffer = self.linebuffer[1:] for sep in self.nlSeps: @@ -435,7 +430,7 @@ class ZipExtFile: if size < 0: size = sys.maxint elif size == 0: - return '' + return b'' # check for a newline already in buffer nl, nllen = self._checkfornewline() @@ -461,7 +456,7 @@ class ZipExtFile: # so return current buffer if nl < 0: s = self.linebuffer - self.linebuffer = '' + self.linebuffer = b'' return s buf = self.linebuffer[:nl] @@ -470,7 +465,7 @@ class ZipExtFile: # line is always returned with \n as newline char (except possibly # for a final incomplete line in the file, which is handled above). - return buf + "\n" + return buf + b"\n" def readlines(self, sizehint = -1): """Return a list with all (following) lines. The sizehint parameter @@ -516,18 +511,23 @@ class ZipExtFile: # try to read from file (if necessary) if bytesToRead > 0: - bytes = self.fileobj.read(bytesToRead) - self.bytes_read += len(bytes) - self.rawbuffer += bytes + data = self.fileobj.read(bytesToRead) + self.bytes_read += len(data) + try: + self.rawbuffer += data + except: + print(repr(self.fileobj), repr(self.rawbuffer), + repr(data)) + raise # handle contents of raw buffer if self.rawbuffer: newdata = self.rawbuffer - self.rawbuffer = '' + self.rawbuffer = b'' # decrypt new data if we were given an object to handle that if newdata and self.decrypter is not None: - newdata = ''.join(map(self.decrypter, newdata)) + newdata = bytes(map(self.decrypter, newdata)) # decompress newly read data if necessary if newdata and self.compress_type == ZIP_DEFLATED: @@ -546,13 +546,13 @@ class ZipExtFile: # return what the user asked for if size is None or len(self.readbuffer) <= size: - bytes = self.readbuffer - self.readbuffer = '' + data = self.readbuffer + self.readbuffer = b'' else: - bytes = self.readbuffer[:size] + data = self.readbuffer[:size] self.readbuffer = self.readbuffer[size:] - return bytes + return data class ZipFile: @@ -593,15 +593,16 @@ class ZipFile: # Check if we were passed a file-like object if isinstance(file, basestring): + # No, it's a filename self._filePassed = 0 self.filename = file modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} try: - self.fp = open(file, modeDict[mode]) + self.fp = io.open(file, modeDict[mode]) except IOError: if mode == 'a': mode = key = 'w' - self.fp = open(file, modeDict[mode]) + self.fp = io.open(file, modeDict[mode]) else: raise else: @@ -661,7 +662,7 @@ class ZipFile: self.start_dir = offset_cd + concat fp.seek(self.start_dir, 0) data = fp.read(size_cd) - fp = io.StringIO(data) + fp = io.BytesIO(data) total = 0 while total < size_cd: centdir = fp.read(46) @@ -673,7 +674,7 @@ class ZipFile: print(centdir) filename = fp.read(centdir[_CD_FILENAME_LENGTH]) # Create ZipInfo instance to store file information - x = ZipInfo(filename) + x = ZipInfo(str(filename)) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) total = (total + centdir[_CD_FILENAME_LENGTH] @@ -708,12 +709,16 @@ class ZipFile: archive.""" return self.filelist - def printdir(self): + def printdir(self, file=None): """Print a table of contents for the zip file.""" - print("%-46s %19s %12s" % ("File Name", "Modified ", "Size")) + if file is None: + file = sys.stdout + print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), + file=file) for zinfo in self.filelist: date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time - print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)) + print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), + file=file) def testzip(self): """Read all the files and check the CRC.""" @@ -730,6 +735,7 @@ class ZipFile: def setpassword(self, pwd): """Set default password for encrypted files.""" + assert isinstance(pwd, bytes) self.pwd = pwd def read(self, name, pwd=None): @@ -749,7 +755,7 @@ class ZipFile: if self._filePassed: zef_file = self.fp else: - zef_file = open(self.filename, 'rb') + zef_file = io.open(self.filename, 'rb') # Get info object for name zinfo = self.getinfo(name) @@ -768,9 +774,9 @@ class ZipFile: if fheader[_FH_EXTRA_FIELD_LENGTH]: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) - if fname != zinfo.orig_filename: + if fname != zinfo.orig_filename.encode("utf-8"): raise BadZipfile, \ - 'File name in directory "%s" and header "%s" differ.' % ( + 'File name in directory %r and header %r differ.' % ( zinfo.orig_filename, fname) # check for encrypted flag & handle password @@ -790,7 +796,7 @@ class ZipFile: # and is used to check the correctness of the password. bytes = zef_file.read(12) h = map(zd, bytes[0:12]) - if ord(h[11]) != ((zinfo.CRC>>24)&255): + if h[11] != ((zinfo.CRC>>24) & 255): raise RuntimeError, "Bad password for file %s" % name # build and return a ZipExtFile @@ -852,7 +858,7 @@ class ZipFile: self._writecheck(zinfo) self._didModify = True - fp = open(filename, "rb") + fp = io.open(filename, "rb") # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 @@ -982,7 +988,7 @@ class ZipFile: 0, zinfo.internal_attr, zinfo.external_attr, header_offset) self.fp.write(centdir) - self.fp.write(zinfo.filename) + self.fp.write(zinfo.filename.encode("utf-8")) self.fp.write(extra_data) self.fp.write(zinfo.comment) @@ -1163,7 +1169,7 @@ def main(args = None): tgtdir = os.path.dirname(tgt) if not os.path.exists(tgtdir): os.makedirs(tgtdir) - fp = open(tgt, 'wb') + fp = io.open(tgt, 'wb') fp.write(zf.read(path)) fp.close() zf.close() |