From b76bcc4ffc1f81a3cb5e6cabb77f127f71e2eddb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 26 Jan 2015 13:45:39 +0200 Subject: Issue #14099: Backout changeset e5bb3044402b (except adapted tests). --- Doc/library/zipfile.rst | 10 +++- Lib/test/test_zipfile.py | 116 ++++++++++++++++++++++------------------------- Lib/zipfile.py | 103 ++++++++++++++++------------------------- Misc/NEWS | 5 -- 4 files changed, 102 insertions(+), 132 deletions(-) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 465d786..1d23a7c 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -219,8 +219,14 @@ ZipFile Objects .. note:: - Objects returned by :meth:`.open` can operate independently of the - ZipFile. + If the ZipFile was created by passing in a file-like object as the first + argument to the constructor, then the object returned by :meth:`.open` shares the + ZipFile's file pointer. Under these circumstances, the object returned by + :meth:`.open` should not be used after any additional operations are performed + on the ZipFile object. If the ZipFile was created by passing in a string (the + filename) as the first argument to the constructor, then :meth:`.open` will + create a new file object that will be held by the ZipExtFile, allowing it to + operate independently of the ZipFile. .. note:: diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index d2da392..76e32fb 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -1653,52 +1653,35 @@ class TestsWithMultipleOpens(unittest.TestCase): def test_same_file(self): # Verify that (when the ZipFile is in control of creating file objects) # multiple open() calls can be made without interfering with each other. - for f in get_files(self): - self.make_test_archive(f) - with zipfile.ZipFile(f, mode="r") as zipf: - with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2: - data1 = zopen1.read(500) - data2 = zopen2.read(500) - data1 += zopen1.read() - data2 += zopen2.read() - self.assertEqual(data1, data2) - self.assertEqual(data1, self.data1) + self.make_test_archive(TESTFN2) + with zipfile.ZipFile(TESTFN2, mode="r") as zipf: + with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2: + data1 = zopen1.read(500) + data2 = zopen2.read(500) + data1 += zopen1.read() + data2 += zopen2.read() + self.assertEqual(data1, data2) + self.assertEqual(data1, self.data1) def test_different_file(self): # Verify that (when the ZipFile is in control of creating file objects) # multiple open() calls can be made without interfering with each other. - for f in get_files(self): - self.make_test_archive(f) - with zipfile.ZipFile(f, mode="r") as zipf: - with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: - data1 = zopen1.read(500) - data2 = zopen2.read(500) - data1 += zopen1.read() - data2 += zopen2.read() - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) + self.make_test_archive(TESTFN2) + with zipfile.ZipFile(TESTFN2, mode="r") as zipf: + with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: + data1 = zopen1.read(500) + data2 = zopen2.read(500) + data1 += zopen1.read() + data2 += zopen2.read() + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) def test_interleaved(self): # Verify that (when the ZipFile is in control of creating file objects) # multiple open() calls can be made without interfering with each other. - for f in get_files(self): - self.make_test_archive(f) - with zipfile.ZipFile(f, mode="r") as zipf: - with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: - data1 = zopen1.read(500) - data2 = zopen2.read(500) - data1 += zopen1.read() - data2 += zopen2.read() - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) - - def test_read_after_close(self): - for f in get_files(self): - self.make_test_archive(f) - with contextlib.ExitStack() as stack: - with zipfile.ZipFile(f, 'r') as zipf: - zopen1 = stack.enter_context(zipf.open('ones')) - zopen2 = stack.enter_context(zipf.open('twos')) + self.make_test_archive(TESTFN2) + with zipfile.ZipFile(TESTFN2, mode="r") as zipf: + with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: data1 = zopen1.read(500) data2 = zopen2.read(500) data1 += zopen1.read() @@ -1706,32 +1689,43 @@ class TestsWithMultipleOpens(unittest.TestCase): self.assertEqual(data1, self.data1) self.assertEqual(data2, self.data2) + def test_read_after_close(self): + self.make_test_archive(TESTFN2) + with contextlib.ExitStack() as stack: + with zipfile.ZipFile(TESTFN2, 'r') as zipf: + zopen1 = stack.enter_context(zipf.open('ones')) + zopen2 = stack.enter_context(zipf.open('twos')) + data1 = zopen1.read(500) + data2 = zopen2.read(500) + data1 += zopen1.read() + data2 += zopen2.read() + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) + def test_read_after_write(self): - for f in get_files(self): - with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf: - zipf.writestr('ones', self.data1) - zipf.writestr('twos', self.data2) - with zipf.open('ones') as zopen1: - data1 = zopen1.read(500) - self.assertEqual(data1, self.data1[:500]) - with zipfile.ZipFile(f, 'r') as zipf: - data1 = zipf.read('ones') - data2 = zipf.read('twos') - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) + with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_DEFLATED) as zipf: + zipf.writestr('ones', self.data1) + zipf.writestr('twos', self.data2) + with zipf.open('ones') as zopen1: + data1 = zopen1.read(500) + self.assertEqual(data1, self.data1[:500]) + with zipfile.ZipFile(TESTFN2, 'r') as zipf: + data1 = zipf.read('ones') + data2 = zipf.read('twos') + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) def test_write_after_read(self): - for f in get_files(self): - with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf: - zipf.writestr('ones', self.data1) - with zipf.open('ones') as zopen1: - zopen1.read(500) - zipf.writestr('twos', self.data2) - with zipfile.ZipFile(f, 'r') as zipf: - data1 = zipf.read('ones') - data2 = zipf.read('twos') - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) + with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipf: + zipf.writestr('ones', self.data1) + with zipf.open('ones') as zopen1: + zopen1.read(500) + zipf.writestr('twos', self.data2) + with zipfile.ZipFile(TESTFN2, 'r') as zipf: + data1 = zipf.read('ones') + data2 = zipf.read('twos') + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) def test_many_opens(self): # Verify that read() and open() promptly close the file descriptor, diff --git a/Lib/zipfile.py b/Lib/zipfile.py index cc15ed3..bda6134 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -624,25 +624,6 @@ def _get_decompressor(compress_type): raise NotImplementedError("compression type %d" % (compress_type,)) -class _SharedFile: - def __init__(self, file, pos, close): - self._file = file - self._pos = pos - self._close = close - - def read(self, n=-1): - self._file.seek(self._pos) - data = self._file.read(n) - self._pos = self._file.tell() - return data - - def close(self): - if self._file is not None: - fileobj = self._file - self._file = None - self._close(fileobj) - - class ZipExtFile(io.BufferedIOBase): """File-like object for reading an archive member. Is returned by ZipFile.open(). @@ -928,7 +909,7 @@ class ZipFile: self.NameToInfo = {} # Find file info given name self.filelist = [] # List of ZipInfo instances for archive self.compression = compression # Method of compression - self.mode = mode + self.mode = key = mode.replace('b', '')[0] self.pwd = None self._comment = b'' @@ -937,33 +918,28 @@ class ZipFile: # No, it's a filename self._filePassed = 0 self.filename = file - modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b', - 'r+b': 'w+b', 'w+b': 'wb'} - filemode = modeDict[mode] - while True: - try: - self.fp = io.open(file, filemode) - except OSError: - if filemode in modeDict: - filemode = modeDict[filemode] - continue + modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} + try: + self.fp = io.open(file, modeDict[mode]) + except OSError: + if mode == 'a': + mode = key = 'w' + self.fp = io.open(file, modeDict[mode]) + else: raise - break else: self._filePassed = 1 self.fp = file self.filename = getattr(file, 'name', None) - self._fileRefCnt = 1 try: - if mode == 'r': + if key == 'r': self._RealGetContents() - elif mode == 'w': + elif key == 'w': # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True - self.start_dir = 0 - elif mode == 'a': + elif key == 'a': try: # See if file is a zip file self._RealGetContents() @@ -976,13 +952,13 @@ class ZipFile: # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True - self.start_dir = self.fp.tell() else: raise RuntimeError('Mode must be "r", "w" or "a"') except: fp = self.fp self.fp = None - self._fpclose(fp) + if not self._filePassed: + fp.close() raise def __enter__(self): @@ -1155,17 +1131,23 @@ class ZipFile: raise RuntimeError( "Attempt to read ZIP archive that was already closed") - # Make sure we have an info object - if isinstance(name, ZipInfo): - # 'name' is already an info object - zinfo = name + # Only open a new file for instances where we were not + # given a file object in the constructor + if self._filePassed: + zef_file = self.fp else: - # Get info object for name - zinfo = self.getinfo(name) + zef_file = io.open(self.filename, 'rb') - self._fileRefCnt += 1 - zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose) try: + # Make sure we have an info object + if isinstance(name, ZipInfo): + # 'name' is already an info object + zinfo = name + else: + # Get info object for name + zinfo = self.getinfo(name) + zef_file.seek(zinfo.header_offset, 0) + # Skip the file header: fheader = zef_file.read(sizeFileHeader) if len(fheader) != sizeFileHeader: @@ -1224,9 +1206,11 @@ class ZipFile: if h[11] != check_byte: raise RuntimeError("Bad password for file", name) - return ZipExtFile(zef_file, mode, zinfo, zd, True) + return ZipExtFile(zef_file, mode, zinfo, zd, + close_fileobj=not self._filePassed) except: - zef_file.close() + if not self._filePassed: + zef_file.close() raise def extract(self, member, path=None, pwd=None): @@ -1360,7 +1344,6 @@ class ZipFile: zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 - self.fp.seek(self.start_dir, 0) zinfo.header_offset = self.fp.tell() # Start of header bytes if zinfo.compress_type == ZIP_LZMA: # Compressed data includes an end-of-stream (EOS) marker @@ -1377,7 +1360,6 @@ class ZipFile: self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo self.fp.write(zinfo.FileHeader(False)) - self.start_dir = self.fp.tell() return cmpr = _get_compressor(zinfo.compress_type) @@ -1416,10 +1398,10 @@ class ZipFile: raise RuntimeError('Compressed size larger than uncompressed size') # Seek backwards and write file header (which will now include # correct CRC and file sizes) - self.start_dir = self.fp.tell() # Preserve current position in file + position = self.fp.tell() # Preserve current position in file self.fp.seek(zinfo.header_offset, 0) self.fp.write(zinfo.FileHeader(zip64)) - self.fp.seek(self.start_dir, 0) + self.fp.seek(position, 0) self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo @@ -1448,7 +1430,6 @@ class ZipFile: "Attempt to write to ZIP archive that was already closed") zinfo.file_size = len(data) # Uncompressed size - self.fp.seek(self.start_dir, 0) zinfo.header_offset = self.fp.tell() # Start of header data if compress_type is not None: zinfo.compress_type = compress_type @@ -1477,7 +1458,6 @@ class ZipFile: self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, zinfo.file_size)) self.fp.flush() - self.start_dir = self.fp.tell() self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo @@ -1493,7 +1473,7 @@ class ZipFile: try: if self.mode in ("w", "a") and self._didModify: # write ending records - self.fp.seek(self.start_dir, 0) + pos1 = self.fp.tell() for zinfo in self.filelist: # write central directory dt = zinfo.date_time dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] @@ -1559,8 +1539,8 @@ class ZipFile: pos2 = self.fp.tell() # Write end-of-zip-archive record centDirCount = len(self.filelist) - centDirSize = pos2 - self.start_dir - centDirOffset = self.start_dir + centDirSize = pos2 - pos1 + centDirOffset = pos1 requires_zip64 = None if centDirCount > ZIP_FILECOUNT_LIMIT: requires_zip64 = "Files count" @@ -1596,13 +1576,8 @@ class ZipFile: finally: fp = self.fp self.fp = None - self._fpclose(fp) - - def _fpclose(self, fp): - assert self._fileRefCnt > 0 - self._fileRefCnt -= 1 - if not self._fileRefCnt and not self._filePassed: - fp.close() + if not self._filePassed: + fp.close() class PyZipFile(ZipFile): diff --git a/Misc/NEWS b/Misc/NEWS index 198092c..bf643d0 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -132,11 +132,6 @@ Library - Issue #16043: Add a default limit for the amount of data xmlrpclib.gzip_decode will return. This resolves CVE-2013-1753. -- Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects - returned by ZipFile.open() can now operate independently of the ZipFile even - if the ZipFile was created by passing in a file-like object as the first - argument to the constructor. - - Issue #22966: Fix __pycache__ pyc file name clobber when pyc_compile is asked to compile a source file containing multiple dots in the source file name. -- cgit v0.12