summaryrefslogtreecommitdiffstats
path: root/Lib/zipfile.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-05-22 01:29:33 (GMT)
committerGuido van Rossum <guido@python.org>2007-05-22 01:29:33 (GMT)
commitd6ca5460914926e54c2a063728a6c785ced8a702 (patch)
tree2d56c7e93b4cc7b37233d6fd33b5b035d8445945 /Lib/zipfile.py
parent94ca1c620ebc63b0860579835f2c16fe056e1225 (diff)
downloadcpython-d6ca5460914926e54c2a063728a6c785ced8a702.zip
cpython-d6ca5460914926e54c2a063728a6c785ced8a702.tar.gz
cpython-d6ca5460914926e54c2a063728a6c785ced8a702.tar.bz2
Make test_zipfile pass.
The zipfile module now does all I/O in binary mode using bytes. (Maybe we should support wrapping a TextIOWrapper around it when text mode reading is requested?) Even the password is a bytes array now. Had to fix py_compile.py to use bytes while I was at it. The _struct needed a patch to support bytes, str8 and str for the 's' and 'p' formats.
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r--Lib/zipfile.py112
1 files changed, 59 insertions, 53 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 1e180fc..6cff722 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1,5 +1,7 @@
"""
Read and write ZIP files.
+
+XXX references to utf-8 need further investigation.
"""
import struct, os, time, sys
import binascii, io
@@ -33,15 +35,15 @@ ZIP_DEFLATED = 8
# Here are some struct module formats for reading headers
structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
-stringEndArchive = "PK\005\006" # magic number for end of archive record
+stringEndArchive = b"PK\005\006" # magic number for end of archive record
structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
-stringCentralDir = "PK\001\002" # magic number for central directory
+stringCentralDir = b"PK\001\002" # magic number for central directory
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
-stringFileHeader = "PK\003\004" # magic number for file header
+stringFileHeader = b"PK\003\004" # magic number for file header
structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
-stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
+stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
-stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
+stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
# indexes of entries in the central directory structure
@@ -82,7 +84,7 @@ _FH_EXTRA_FIELD_LENGTH = 11
def is_zipfile(filename):
"""Quickly see if file is a ZIP file by checking the magic number."""
try:
- fpin = open(filename, "rb")
+ fpin = io.open(filename, "rb")
endrec = _EndRecData(fpin)
fpin.close()
if endrec:
@@ -206,8 +208,8 @@ class ZipInfo (object):
self.date_time = date_time # year, month, day, hour, min, sec
# Standard values:
self.compress_type = ZIP_STORED # Type of compression for the file
- self.comment = "" # Comment for each file
- self.extra = "" # ZIP extra data
+ self.comment = b"" # Comment for each file
+ self.extra = b"" # ZIP extra data
if sys.platform == 'win32':
self.create_system = 0 # System which created ZIP archive
else:
@@ -257,7 +259,7 @@ class ZipInfo (object):
self.compress_type, dostime, dosdate, CRC,
compress_size, file_size,
len(self.filename), len(extra))
- return header + self.filename + extra
+ return header + self.filename.encode("utf-8") + extra
def _decodeExtra(self):
# Try to decode the extra field.
@@ -331,7 +333,7 @@ class _ZipDecrypter:
def _crc32(self, ch, crc):
"""Compute the CRC32 primitive on one byte."""
- return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
+ return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
def __init__(self, pwd):
self.key0 = 305419896
@@ -344,20 +346,13 @@ class _ZipDecrypter:
self.key0 = self._crc32(c, self.key0)
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
self.key1 = (self.key1 * 134775813 + 1) & 4294967295
- self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
+ self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
def __call__(self, c):
"""Decrypt a single character."""
- # XXX When this is called with a byte instead of a char, ord()
- # isn't needed. Don't die in that case. In the future we should
- # just leave this out, once we're always using bytes.
- try:
- c = ord(c)
- except TypeError:
- pass
+ assert isinstance(c, int)
k = self.key2 | 2
c = c ^ (((k * (k^1)) >> 8) & 255)
- c = chr(c)
self._UpdateKeys(c)
return c
@@ -370,13 +365,13 @@ class ZipExtFile:
self.fileobj = fileobj
self.decrypter = decrypt
self.bytes_read = 0
- self.rawbuffer = ''
- self.readbuffer = ''
- self.linebuffer = ''
+ self.rawbuffer = b''
+ self.readbuffer = b''
+ self.linebuffer = b''
self.eof = False
self.univ_newlines = False
- self.nlSeps = ("\n", )
- self.lastdiscard = ''
+ self.nlSeps = (b"\n", )
+ self.lastdiscard = b''
self.compress_type = zipinfo.compress_type
self.compress_size = zipinfo.compress_size
@@ -394,9 +389,9 @@ class ZipExtFile:
self.univ_newlines = univ_newlines
# pick line separator char(s) based on universal newlines flag
- self.nlSeps = ("\n", )
+ self.nlSeps = (b"\n", )
if self.univ_newlines:
- self.nlSeps = ("\r\n", "\r", "\n")
+ self.nlSeps = (b"\r\n", b"\r", b"\n")
def __iter__(self):
return self
@@ -417,7 +412,7 @@ class ZipExtFile:
# ugly check for cases where half of an \r\n pair was
# read on the last pass, and the \r was discarded. In this
# case we just throw away the \n at the start of the buffer.
- if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
+ if (self.lastdiscard, self.linebuffer[0]) == (b'\r', b'\n'):
self.linebuffer = self.linebuffer[1:]
for sep in self.nlSeps:
@@ -435,7 +430,7 @@ class ZipExtFile:
if size < 0:
size = sys.maxint
elif size == 0:
- return ''
+ return b''
# check for a newline already in buffer
nl, nllen = self._checkfornewline()
@@ -461,7 +456,7 @@ class ZipExtFile:
# so return current buffer
if nl < 0:
s = self.linebuffer
- self.linebuffer = ''
+ self.linebuffer = b''
return s
buf = self.linebuffer[:nl]
@@ -470,7 +465,7 @@ class ZipExtFile:
# line is always returned with \n as newline char (except possibly
# for a final incomplete line in the file, which is handled above).
- return buf + "\n"
+ return buf + b"\n"
def readlines(self, sizehint = -1):
"""Return a list with all (following) lines. The sizehint parameter
@@ -516,18 +511,23 @@ class ZipExtFile:
# try to read from file (if necessary)
if bytesToRead > 0:
- bytes = self.fileobj.read(bytesToRead)
- self.bytes_read += len(bytes)
- self.rawbuffer += bytes
+ data = self.fileobj.read(bytesToRead)
+ self.bytes_read += len(data)
+ try:
+ self.rawbuffer += data
+ except:
+ print(repr(self.fileobj), repr(self.rawbuffer),
+ repr(data))
+ raise
# handle contents of raw buffer
if self.rawbuffer:
newdata = self.rawbuffer
- self.rawbuffer = ''
+ self.rawbuffer = b''
# decrypt new data if we were given an object to handle that
if newdata and self.decrypter is not None:
- newdata = ''.join(map(self.decrypter, newdata))
+ newdata = bytes(map(self.decrypter, newdata))
# decompress newly read data if necessary
if newdata and self.compress_type == ZIP_DEFLATED:
@@ -546,13 +546,13 @@ class ZipExtFile:
# return what the user asked for
if size is None or len(self.readbuffer) <= size:
- bytes = self.readbuffer
- self.readbuffer = ''
+ data = self.readbuffer
+ self.readbuffer = b''
else:
- bytes = self.readbuffer[:size]
+ data = self.readbuffer[:size]
self.readbuffer = self.readbuffer[size:]
- return bytes
+ return data
class ZipFile:
@@ -593,15 +593,16 @@ class ZipFile:
# Check if we were passed a file-like object
if isinstance(file, basestring):
+ # No, it's a filename
self._filePassed = 0
self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
try:
- self.fp = open(file, modeDict[mode])
+ self.fp = io.open(file, modeDict[mode])
except IOError:
if mode == 'a':
mode = key = 'w'
- self.fp = open(file, modeDict[mode])
+ self.fp = io.open(file, modeDict[mode])
else:
raise
else:
@@ -661,7 +662,7 @@ class ZipFile:
self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0)
data = fp.read(size_cd)
- fp = io.StringIO(data)
+ fp = io.BytesIO(data)
total = 0
while total < size_cd:
centdir = fp.read(46)
@@ -673,7 +674,7 @@ class ZipFile:
print(centdir)
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
# Create ZipInfo instance to store file information
- x = ZipInfo(filename)
+ x = ZipInfo(str(filename))
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
total = (total + centdir[_CD_FILENAME_LENGTH]
@@ -708,12 +709,16 @@ class ZipFile:
archive."""
return self.filelist
- def printdir(self):
+ def printdir(self, file=None):
"""Print a table of contents for the zip file."""
- print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"))
+ if file is None:
+ file = sys.stdout
+ print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
+ file=file)
for zinfo in self.filelist:
date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
- print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size))
+ print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
+ file=file)
def testzip(self):
"""Read all the files and check the CRC."""
@@ -730,6 +735,7 @@ class ZipFile:
def setpassword(self, pwd):
"""Set default password for encrypted files."""
+ assert isinstance(pwd, bytes)
self.pwd = pwd
def read(self, name, pwd=None):
@@ -749,7 +755,7 @@ class ZipFile:
if self._filePassed:
zef_file = self.fp
else:
- zef_file = open(self.filename, 'rb')
+ zef_file = io.open(self.filename, 'rb')
# Get info object for name
zinfo = self.getinfo(name)
@@ -768,9 +774,9 @@ class ZipFile:
if fheader[_FH_EXTRA_FIELD_LENGTH]:
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
- if fname != zinfo.orig_filename:
+ if fname != zinfo.orig_filename.encode("utf-8"):
raise BadZipfile, \
- 'File name in directory "%s" and header "%s" differ.' % (
+ 'File name in directory %r and header %r differ.' % (
zinfo.orig_filename, fname)
# check for encrypted flag & handle password
@@ -790,7 +796,7 @@ class ZipFile:
# and is used to check the correctness of the password.
bytes = zef_file.read(12)
h = map(zd, bytes[0:12])
- if ord(h[11]) != ((zinfo.CRC>>24)&255):
+ if h[11] != ((zinfo.CRC>>24) & 255):
raise RuntimeError, "Bad password for file %s" % name
# build and return a ZipExtFile
@@ -852,7 +858,7 @@ class ZipFile:
self._writecheck(zinfo)
self._didModify = True
- fp = open(filename, "rb")
+ fp = io.open(filename, "rb")
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
@@ -982,7 +988,7 @@ class ZipFile:
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
self.fp.write(centdir)
- self.fp.write(zinfo.filename)
+ self.fp.write(zinfo.filename.encode("utf-8"))
self.fp.write(extra_data)
self.fp.write(zinfo.comment)
@@ -1163,7 +1169,7 @@ def main(args = None):
tgtdir = os.path.dirname(tgt)
if not os.path.exists(tgtdir):
os.makedirs(tgtdir)
- fp = open(tgt, 'wb')
+ fp = io.open(tgt, 'wb')
fp.write(zf.read(path))
fp.close()
zf.close()