summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-12-03 07:14:36 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-12-03 07:14:36 (GMT)
commitf4adbae92738b96915664461fcc3be17e19eb674 (patch)
tree87c096e873b499206bd05fa7c92a6f3fa0cc0496
parent4954f9fcab59e38f2229998b3276567d4bf92b53 (diff)
parent1ad088f3ea14f4b598040e1d266e2567aa260dea (diff)
downloadcpython-f4adbae92738b96915664461fcc3be17e19eb674.zip
cpython-f4adbae92738b96915664461fcc3be17e19eb674.tar.gz
cpython-f4adbae92738b96915664461fcc3be17e19eb674.tar.bz2
Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects
returned by ZipFile.open() can now operate independently of the ZipFile even if the ZipFile was created by passing in a file-like object as the first argument to the constructor.
-rw-r--r--Doc/library/zipfile.rst10
-rw-r--r--Lib/test/test_zipfile.py121
-rw-r--r--Lib/zipfile.py103
-rw-r--r--Misc/NEWS5
4 files changed, 166 insertions, 73 deletions
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 1d23a7c..465d786 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -219,14 +219,8 @@ ZipFile Objects
.. note::
- If the ZipFile was created by passing in a file-like object as the first
- argument to the constructor, then the object returned by :meth:`.open` shares the
- ZipFile's file pointer. Under these circumstances, the object returned by
- :meth:`.open` should not be used after any additional operations are performed
- on the ZipFile object. If the ZipFile was created by passing in a string (the
- filename) as the first argument to the constructor, then :meth:`.open` will
- create a new file object that will be held by the ZipExtFile, allowing it to
- operate independently of the ZipFile.
+ Objects returned by :meth:`.open` can operate independently of the
+ ZipFile.
.. note::
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 7f2f1ec..57431c7 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1,3 +1,4 @@
+import contextlib
import io
import os
import sys
@@ -25,6 +26,9 @@ SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'),
('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')]
+def getrandbytes(size):
+ return getrandbits(8 * size).to_bytes(size, 'little')
+
def get_files(test):
yield TESTFN2
with TemporaryFile() as f:
@@ -289,7 +293,7 @@ class AbstractTestsWithSourceFile:
# than requested.
for test_size in (1, 4095, 4096, 4097, 16384):
file_size = test_size + 1
- junk = getrandbits(8 * file_size).to_bytes(file_size, 'little')
+ junk = getrandbytes(file_size)
with zipfile.ZipFile(io.BytesIO(), "w", self.compression) as zipf:
zipf.writestr('foo', junk)
with zipf.open('foo', 'r') as fp:
@@ -1666,46 +1670,111 @@ class LzmaTestsWithRandomBinaryFiles(AbstractTestsWithRandomBinaryFiles,
@requires_zlib
class TestsWithMultipleOpens(unittest.TestCase):
- def setUp(self):
+ @classmethod
+ def setUpClass(cls):
+ cls.data1 = b'111' + getrandbytes(10000)
+ cls.data2 = b'222' + getrandbytes(10000)
+
+ def make_test_archive(self, f):
# Create the ZIP archive
- with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipfp:
- zipfp.writestr('ones', '1'*FIXEDTEST_SIZE)
- zipfp.writestr('twos', '2'*FIXEDTEST_SIZE)
+ with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipfp:
+ zipfp.writestr('ones', self.data1)
+ zipfp.writestr('twos', self.data2)
def test_same_file(self):
# Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other.
- with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
- with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
- data1 = zopen1.read(500)
- data2 = zopen2.read(500)
- data1 += zopen1.read(500)
- data2 += zopen2.read(500)
- self.assertEqual(data1, data2)
+ for f in get_files(self):
+ self.make_test_archive(f)
+ with zipfile.ZipFile(f, mode="r") as zipf:
+ with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
+ data1 = zopen1.read(500)
+ data2 = zopen2.read(500)
+ data1 += zopen1.read()
+ data2 += zopen2.read()
+ self.assertEqual(data1, data2)
+ self.assertEqual(data1, self.data1)
def test_different_file(self):
# Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other.
- with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
- with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
- data1 = zopen1.read(500)
- data2 = zopen2.read(500)
- data1 += zopen1.read(500)
- data2 += zopen2.read(500)
- self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)
- self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)
+ for f in get_files(self):
+ self.make_test_archive(f)
+ with zipfile.ZipFile(f, mode="r") as zipf:
+ with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
+ data1 = zopen1.read(500)
+ data2 = zopen2.read(500)
+ data1 += zopen1.read()
+ data2 += zopen2.read()
+ self.assertEqual(data1, self.data1)
+ self.assertEqual(data2, self.data2)
def test_interleaved(self):
# Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other.
- with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
- with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
+ for f in get_files(self):
+ self.make_test_archive(f)
+ with zipfile.ZipFile(f, mode="r") as zipf:
+ with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
+ data1 = zopen1.read(500)
+ data2 = zopen2.read(500)
+ data1 += zopen1.read()
+ data2 += zopen2.read()
+ self.assertEqual(data1, self.data1)
+ self.assertEqual(data2, self.data2)
+
+ def test_read_after_close(self):
+ for f in get_files(self):
+ self.make_test_archive(f)
+ with contextlib.ExitStack() as stack:
+ with zipfile.ZipFile(f, 'r') as zipf:
+ zopen1 = stack.enter_context(zipf.open('ones'))
+ zopen2 = stack.enter_context(zipf.open('twos'))
data1 = zopen1.read(500)
data2 = zopen2.read(500)
- data1 += zopen1.read(500)
- data2 += zopen2.read(500)
- self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)
- self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)
+ data1 += zopen1.read()
+ data2 += zopen2.read()
+ self.assertEqual(data1, self.data1)
+ self.assertEqual(data2, self.data2)
+
+ def test_read_after_write(self):
+ for f in get_files(self):
+ with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf:
+ zipf.writestr('ones', self.data1)
+ zipf.writestr('twos', self.data2)
+ with zipf.open('ones') as zopen1:
+ data1 = zopen1.read(500)
+ self.assertEqual(data1, self.data1[:500])
+ with zipfile.ZipFile(f, 'r') as zipf:
+ data1 = zipf.read('ones')
+ data2 = zipf.read('twos')
+ self.assertEqual(data1, self.data1)
+ self.assertEqual(data2, self.data2)
+
+ def test_write_after_read(self):
+ for f in get_files(self):
+ with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf:
+ zipf.writestr('ones', self.data1)
+ with zipf.open('ones') as zopen1:
+ zopen1.read(500)
+ zipf.writestr('twos', self.data2)
+ with zipfile.ZipFile(f, 'r') as zipf:
+ data1 = zipf.read('ones')
+ data2 = zipf.read('twos')
+ self.assertEqual(data1, self.data1)
+ self.assertEqual(data2, self.data2)
+
+ def test_many_opens(self):
+ # Verify that read() and open() promptly close the file descriptor,
+ # and don't rely on the garbage collector to free resources.
+ self.make_test_archive(TESTFN2)
+ with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
+ for x in range(100):
+ zipf.read('ones')
+ with zipf.open('ones') as zopen1:
+ pass
+ with open(os.devnull) as f:
+ self.assertLess(f.fileno(), 100)
def tearDown(self):
unlink(TESTFN2)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 74e1566..a1f5b18 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -646,6 +646,25 @@ def _get_decompressor(compress_type):
raise NotImplementedError("compression type %d" % (compress_type,))
+class _SharedFile:
+ def __init__(self, file, pos, close):
+ self._file = file
+ self._pos = pos
+ self._close = close
+
+ def read(self, n=-1):
+ self._file.seek(self._pos)
+ data = self._file.read(n)
+ self._pos = self._file.tell()
+ return data
+
+ def close(self):
+ if self._file is not None:
+ fileobj = self._file
+ self._file = None
+ self._close(fileobj)
+
+
class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member.
Is returned by ZipFile.open().
@@ -945,7 +964,7 @@ class ZipFile:
self.NameToInfo = {} # Find file info given name
self.filelist = [] # List of ZipInfo instances for archive
self.compression = compression # Method of compression
- self.mode = key = mode.replace('b', '')[0]
+ self.mode = mode
self.pwd = None
self._comment = b''
@@ -954,28 +973,33 @@ class ZipFile:
# No, it's a filename
self._filePassed = 0
self.filename = file
- modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
- try:
- self.fp = io.open(file, modeDict[mode])
- except OSError:
- if mode == 'a':
- mode = key = 'w'
- self.fp = io.open(file, modeDict[mode])
- else:
+ modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
+ 'r+b': 'w+b', 'w+b': 'wb'}
+ filemode = modeDict[mode]
+ while True:
+ try:
+ self.fp = io.open(file, filemode)
+ except OSError:
+ if filemode in modeDict:
+ filemode = modeDict[filemode]
+ continue
raise
+ break
else:
self._filePassed = 1
self.fp = file
self.filename = getattr(file, 'name', None)
+ self._fileRefCnt = 1
try:
- if key == 'r':
+ if mode == 'r':
self._RealGetContents()
- elif key == 'w':
+ elif mode == 'w':
# set the modified flag so central directory gets written
# even if no files are added to the archive
self._didModify = True
- elif key == 'a':
+ self.start_dir = 0
+ elif mode == 'a':
try:
# See if file is a zip file
self._RealGetContents()
@@ -988,13 +1012,13 @@ class ZipFile:
# set the modified flag so central directory gets written
# even if no files are added to the archive
self._didModify = True
+ self.start_dir = self.fp.tell()
else:
raise RuntimeError('Mode must be "r", "w" or "a"')
except:
fp = self.fp
self.fp = None
- if not self._filePassed:
- fp.close()
+ self._fpclose(fp)
raise
def __enter__(self):
@@ -1181,23 +1205,17 @@ class ZipFile:
raise RuntimeError(
"Attempt to read ZIP archive that was already closed")
- # Only open a new file for instances where we were not
- # given a file object in the constructor
- if self._filePassed:
- zef_file = self.fp
+ # Make sure we have an info object
+ if isinstance(name, ZipInfo):
+ # 'name' is already an info object
+ zinfo = name
else:
- zef_file = io.open(self.filename, 'rb')
+ # Get info object for name
+ zinfo = self.getinfo(name)
+ self._fileRefCnt += 1
+ zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
try:
- # Make sure we have an info object
- if isinstance(name, ZipInfo):
- # 'name' is already an info object
- zinfo = name
- else:
- # Get info object for name
- zinfo = self.getinfo(name)
- zef_file.seek(zinfo.header_offset, 0)
-
# Skip the file header:
fheader = zef_file.read(sizeFileHeader)
if len(fheader) != sizeFileHeader:
@@ -1256,11 +1274,9 @@ class ZipFile:
if h[11] != check_byte:
raise RuntimeError("Bad password for file", name)
- return ZipExtFile(zef_file, mode, zinfo, zd,
- close_fileobj=not self._filePassed)
+ return ZipExtFile(zef_file, mode, zinfo, zd, True)
except:
- if not self._filePassed:
- zef_file.close()
+ zef_file.close()
raise
def extract(self, member, path=None, pwd=None):
@@ -1394,6 +1410,7 @@ class ZipFile:
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
+ self.fp.seek(self.start_dir, 0)
zinfo.header_offset = self.fp.tell() # Start of header bytes
if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
@@ -1410,6 +1427,7 @@ class ZipFile:
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
+ self.start_dir = self.fp.tell()
return
cmpr = _get_compressor(zinfo.compress_type)
@@ -1448,10 +1466,10 @@ class ZipFile:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
- position = self.fp.tell() # Preserve current position in file
+ self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader(zip64))
- self.fp.seek(position, 0)
+ self.fp.seek(self.start_dir, 0)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
@@ -1480,6 +1498,7 @@ class ZipFile:
"Attempt to write to ZIP archive that was already closed")
zinfo.file_size = len(data) # Uncompressed size
+ self.fp.seek(self.start_dir, 0)
zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None:
zinfo.compress_type = compress_type
@@ -1508,6 +1527,7 @@ class ZipFile:
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size))
self.fp.flush()
+ self.start_dir = self.fp.tell()
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
@@ -1523,7 +1543,7 @@ class ZipFile:
try:
if self.mode in ("w", "a") and self._didModify: # write ending records
- pos1 = self.fp.tell()
+ self.fp.seek(self.start_dir, 0)
for zinfo in self.filelist: # write central directory
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
@@ -1589,8 +1609,8 @@ class ZipFile:
pos2 = self.fp.tell()
# Write end-of-zip-archive record
centDirCount = len(self.filelist)
- centDirSize = pos2 - pos1
- centDirOffset = pos1
+ centDirSize = pos2 - self.start_dir
+ centDirOffset = self.start_dir
requires_zip64 = None
if centDirCount > ZIP_FILECOUNT_LIMIT:
requires_zip64 = "Files count"
@@ -1626,8 +1646,13 @@ class ZipFile:
finally:
fp = self.fp
self.fp = None
- if not self._filePassed:
- fp.close()
+ self._fpclose(fp)
+
+ def _fpclose(self, fp):
+ assert self._fileRefCnt > 0
+ self._fileRefCnt -= 1
+ if not self._fileRefCnt and not self._filePassed:
+ fp.close()
class PyZipFile(ZipFile):
diff --git a/Misc/NEWS b/Misc/NEWS
index 2a43e3a..496be4e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -194,6 +194,11 @@ Core and Builtins
Library
-------
+- Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects
+ returned by ZipFile.open() can now operate independently of the ZipFile even
+ if the ZipFile was created by passing in a file-like object as the first
+ argument to the constructor.
+
- Issue #22966: Fix __pycache__ pyc file name clobber when pyc_compile is
asked to compile a source file containing multiple dots in the source file
name.