summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-03-22 23:09:35 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-03-22 23:09:35 (GMT)
commit77d899726f4f7c78757e4214c147e699b285a8a7 (patch)
tree5cd058cc833c7e0614c3f21ab99dc16c77d5557a
parentf07a4b663daafaf792ea39e118345fa1f10bbf8f (diff)
downloadcpython-77d899726f4f7c78757e4214c147e699b285a8a7.zip
cpython-77d899726f4f7c78757e4214c147e699b285a8a7.tar.gz
cpython-77d899726f4f7c78757e4214c147e699b285a8a7.tar.bz2
Issue #23252: Added support for writing ZIP files to unseekable streams.
-rw-r--r--Doc/library/zipfile.rst5
-rw-r--r--Doc/whatsnew/3.5.rst6
-rw-r--r--Lib/test/test_zipfile.py62
-rw-r--r--Lib/zipfile.py80
-rw-r--r--Misc/NEWS2
5 files changed, 120 insertions, 35 deletions
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index ef9a2ea..c1dda25 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -140,6 +140,7 @@ ZipFile Objects
ZIP file, then a new ZIP archive is appended to the file. This is meant for
adding a ZIP archive to another file (such as :file:`python.exe`). If
*mode* is ``a`` and the file does not exist at all, it is created.
+ If *mode* is ``r`` or ``a``, the file should be seekable.
*compression* is the ZIP compression method to use when writing the archive,
and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
:const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
@@ -171,6 +172,9 @@ ZipFile Objects
.. versionchanged:: 3.4
ZIP64 extensions are enabled by default.
+ .. versionchanged:: 3.5
+ Added support for writing to unseekable streams.
+
.. method:: ZipFile.close()
@@ -328,7 +332,6 @@ ZipFile Objects
If ``arcname`` (or ``filename``, if ``arcname`` is not given) contains a null
byte, the name of the file in the archive will be truncated at the null byte.
-
.. method:: ZipFile.writestr(zinfo_or_arcname, bytes[, compress_type])
Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index b22c657..44915a3 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -448,6 +448,12 @@ faulthandler
:func:`~faulthandler.dump_traceback_later` functions now accept file
descriptors. (Contributed by Wei Wu in :issue:`23566`.)
+zipfile
+-------
+
+* Added support for writing ZIP files to unseekable streams.
+ (Contributed by Serhiy Storchaka in :issue:`23252`.)
+
Optimizations
=============
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index f458f30..4cd5fe3 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1685,25 +1685,63 @@ class Tellable:
self.offset = 0
def write(self, data):
- self.offset += self.fp.write(data)
+ n = self.fp.write(data)
+ self.offset += n
+ return n
def tell(self):
return self.offset
def flush(self):
- pass
+ self.fp.flush()
+
+class Unseekable:
+ def __init__(self, fp):
+ self.fp = fp
+
+ def write(self, data):
+ return self.fp.write(data)
+
+ def flush(self):
+ self.fp.flush()
class UnseekableTests(unittest.TestCase):
- def test_writestr_tellable(self):
- f = io.BytesIO()
- with zipfile.ZipFile(Tellable(f), 'w', zipfile.ZIP_STORED) as zipfp:
- zipfp.writestr('ones', b'111')
- zipfp.writestr('twos', b'222')
- with zipfile.ZipFile(f, mode='r') as zipf:
- with zipf.open('ones') as zopen:
- self.assertEqual(zopen.read(), b'111')
- with zipf.open('twos') as zopen:
- self.assertEqual(zopen.read(), b'222')
+ def test_writestr(self):
+ for wrapper in (lambda f: f), Tellable, Unseekable:
+ with self.subTest(wrapper=wrapper):
+ f = io.BytesIO()
+ f.write(b'abc')
+ bf = io.BufferedWriter(f)
+ with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
+ zipfp.writestr('ones', b'111')
+ zipfp.writestr('twos', b'222')
+ self.assertEqual(f.getvalue()[:5], b'abcPK')
+ with zipfile.ZipFile(f, mode='r') as zipf:
+ with zipf.open('ones') as zopen:
+ self.assertEqual(zopen.read(), b'111')
+ with zipf.open('twos') as zopen:
+ self.assertEqual(zopen.read(), b'222')
+
+ def test_write(self):
+ for wrapper in (lambda f: f), Tellable, Unseekable:
+ with self.subTest(wrapper=wrapper):
+ f = io.BytesIO()
+ f.write(b'abc')
+ bf = io.BufferedWriter(f)
+ with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
+ self.addCleanup(unlink, TESTFN)
+ with open(TESTFN, 'wb') as f2:
+ f2.write(b'111')
+ zipfp.write(TESTFN, 'ones')
+ with open(TESTFN, 'wb') as f2:
+ f2.write(b'222')
+ zipfp.write(TESTFN, 'twos')
+ self.assertEqual(f.getvalue()[:5], b'abcPK')
+ with zipfile.ZipFile(f, mode='r') as zipf:
+ with zipf.open('ones') as zopen:
+ self.assertEqual(zopen.read(), b'111')
+ with zipf.open('twos') as zopen:
+ self.assertEqual(zopen.read(), b'222')
@requires_zlib
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 8c2950f..55afa08 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -667,6 +667,26 @@ class _SharedFile:
self._file = None
self._close(fileobj)
+# Provide the tell method for unseekable stream
+class _Tellable:
+ def __init__(self, fp):
+ self.fp = fp
+ self.offset = 0
+
+ def write(self, data):
+ n = self.fp.write(data)
+ self.offset += n
+ return n
+
+ def tell(self):
+ return self.offset
+
+ def flush(self):
+ self.fp.flush()
+
+ def close(self):
+ self.fp.close()
+
class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member.
@@ -994,6 +1014,7 @@ class ZipFile:
self.filename = getattr(file, 'name', None)
self._fileRefCnt = 1
self._lock = threading.RLock()
+ self._seekable = True
try:
if mode == 'r':
@@ -1002,13 +1023,24 @@ class ZipFile:
# set the modified flag so central directory gets written
# even if no files are added to the archive
self._didModify = True
- self.start_dir = self.fp.tell()
+ try:
+ self.start_dir = self.fp.tell()
+ except (AttributeError, OSError):
+ self.fp = _Tellable(self.fp)
+ self.start_dir = 0
+ self._seekable = False
+ else:
+ # Some file-like objects can provide tell() but not seek()
+ try:
+ self.fp.seek(self.start_dir)
+ except (AttributeError, OSError):
+ self._seekable = False
elif mode == 'a':
try:
# See if file is a zip file
self._RealGetContents()
# seek to start of directory and overwrite
- self.fp.seek(self.start_dir, 0)
+ self.fp.seek(self.start_dir)
except BadZipFile:
# file is not a zip file, just append
self.fp.seek(0, 2)
@@ -1415,7 +1447,8 @@ class ZipFile:
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
with self._lock:
- self.fp.seek(self.start_dir, 0)
+ if self._seekable:
+ self.fp.seek(self.start_dir)
zinfo.header_offset = self.fp.tell() # Start of header bytes
if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
@@ -1436,6 +1469,8 @@ class ZipFile:
return
cmpr = _get_compressor(zinfo.compress_type)
+ if not self._seekable:
+ zinfo.flag_bits |= 0x08
with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
@@ -1464,17 +1499,24 @@ class ZipFile:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
- if not zip64 and self._allowZip64:
- if file_size > ZIP64_LIMIT:
- raise RuntimeError('File size has increased during compressing')
- if compress_size > ZIP64_LIMIT:
- raise RuntimeError('Compressed size larger than uncompressed size')
- # Seek backwards and write file header (which will now include
- # correct CRC and file sizes)
- self.start_dir = self.fp.tell() # Preserve current position in file
- self.fp.seek(zinfo.header_offset, 0)
- self.fp.write(zinfo.FileHeader(zip64))
- self.fp.seek(self.start_dir, 0)
+ if zinfo.flag_bits & 0x08:
+ # Write CRC and file sizes after the file data
+ fmt = '<LQQ' if zip64 else '<LLL'
+ self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
+ zinfo.file_size))
+ self.start_dir = self.fp.tell()
+ else:
+ if not zip64 and self._allowZip64:
+ if file_size > ZIP64_LIMIT:
+ raise RuntimeError('File size has increased during compressing')
+ if compress_size > ZIP64_LIMIT:
+ raise RuntimeError('Compressed size larger than uncompressed size')
+ # Seek backwards and write file header (which will now include
+ # correct CRC and file sizes)
+ self.start_dir = self.fp.tell() # Preserve current position in file
+ self.fp.seek(zinfo.header_offset)
+ self.fp.write(zinfo.FileHeader(zip64))
+ self.fp.seek(self.start_dir)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
@@ -1504,11 +1546,8 @@ class ZipFile:
zinfo.file_size = len(data) # Uncompressed size
with self._lock:
- try:
+ if self._seekable:
self.fp.seek(self.start_dir)
- except (AttributeError, io.UnsupportedOperation):
- # Some file-like objects can provide tell() but not seek()
- pass
zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None:
zinfo.compress_type = compress_type
@@ -1557,11 +1596,8 @@ class ZipFile:
try:
if self.mode in ("w", "a") and self._didModify: # write ending records
with self._lock:
- try:
+ if self._seekable:
self.fp.seek(self.start_dir)
- except (AttributeError, io.UnsupportedOperation):
- # Some file-like objects can provide tell() but not seek()
- pass
self._write_end_record()
finally:
fp = self.fp
diff --git a/Misc/NEWS b/Misc/NEWS
index 590f0a5..0d362d8 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -23,6 +23,8 @@ Core and Builtins
Library
-------
+- Issue #23252: Added support for writing ZIP files to unseekable streams.
+
- Issue #21526: Tkinter now supports new boolean type in Tcl 8.5.
- Issue #23647: Increase impalib's MAXLINE to accommodate modern mailbox sizes.