summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_zipfile.py109
-rw-r--r--Lib/zipfile.py77
-rw-r--r--Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst1
3 files changed, 135 insertions, 52 deletions
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 0c8ffcd..8e437e5 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -2397,37 +2397,49 @@ class CommandLineTest(unittest.TestCase):
consume = tuple
-def add_dirs(zipfile):
+def add_dirs(zf):
"""
- Given a writable zipfile, inject directory entries for
+ Given a writable zip file zf, inject directory entries for
any directories implied by the presence of children.
"""
- names = zipfile.namelist()
- consume(
- zipfile.writestr(name + "/", b"")
- for name in map(posixpath.dirname, names)
- if name and name + "/" not in names
- )
- return zipfile
+ for name in zipfile.Path._implied_dirs(zf.namelist()):
+ zf.writestr(name, b"")
+ return zf
-def build_abcde_files():
+def build_alpharep_fixture():
"""
Create a zip file with this structure:
.
├── a.txt
- └── b
- ├── c.txt
- └── d
- └── e.txt
+ ├── b
+ │ ├── c.txt
+ │ ├── d
+ │ │ └── e.txt
+ │ └── f.txt
+ └── g
+ └── h
+ └── i.txt
+
+ This fixture has the following key characteristics:
+
+ - a file at the root (a)
+ - a file two levels deep (b/d/e)
+ - multiple files in a directory (b/c, b/f)
+ - a directory containing only a directory (g/h)
+
+ "alpha" because it uses alphabet
+ "rep" because it's a representative example
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("a.txt", b"content of a")
zf.writestr("b/c.txt", b"content of c")
zf.writestr("b/d/e.txt", b"content of e")
- zf.filename = "abcde.zip"
+ zf.writestr("b/f.txt", b"content of f")
+ zf.writestr("g/h/i.txt", b"content of i")
+ zf.filename = "alpharep.zip"
return zf
@@ -2436,60 +2448,64 @@ class TestPath(unittest.TestCase):
self.fixtures = contextlib.ExitStack()
self.addCleanup(self.fixtures.close)
- def zipfile_abcde(self):
+ def zipfile_alpharep(self):
with self.subTest():
- yield build_abcde_files()
+ yield build_alpharep_fixture()
with self.subTest():
- yield add_dirs(build_abcde_files())
+ yield add_dirs(build_alpharep_fixture())
def zipfile_ondisk(self):
tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
- for zipfile_abcde in self.zipfile_abcde():
- buffer = zipfile_abcde.fp
- zipfile_abcde.close()
- path = tmpdir / zipfile_abcde.filename
+ for alpharep in self.zipfile_alpharep():
+ buffer = alpharep.fp
+ alpharep.close()
+ path = tmpdir / alpharep.filename
with path.open("wb") as strm:
strm.write(buffer.getvalue())
yield path
- def test_iterdir_istype(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ def test_iterdir_and_types(self):
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert root.is_dir()
- a, b = root.iterdir()
+ a, b, g = root.iterdir()
assert a.is_file()
assert b.is_dir()
- c, d = b.iterdir()
- assert c.is_file()
+ assert g.is_dir()
+ c, f, d = b.iterdir()
+ assert c.is_file() and f.is_file()
e, = d.iterdir()
assert e.is_file()
+ h, = g.iterdir()
+ i, = h.iterdir()
+ assert i.is_file()
def test_open(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
- a, b = root.iterdir()
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
+ a, b, g = root.iterdir()
with a.open() as strm:
data = strm.read()
assert data == b"content of a"
def test_read(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
- a, b = root.iterdir()
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
+ a, b, g = root.iterdir()
assert a.read_text() == "content of a"
assert a.read_bytes() == b"content of a"
def test_joinpath(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
a = root.joinpath("a")
assert a.is_file()
e = root.joinpath("b").joinpath("d").joinpath("e.txt")
assert e.read_text() == "content of e"
def test_traverse_truediv(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
a = root / "a"
assert a.is_file()
e = root / "b" / "d" / "e.txt"
@@ -2504,26 +2520,27 @@ class TestPath(unittest.TestCase):
zipfile.Path(pathlike)
def test_traverse_pathlike(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
root / pathlib.Path("a")
def test_parent(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert (root / 'a').parent.at == ''
assert (root / 'a' / 'b').parent.at == 'a/'
def test_dir_parent(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert (root / 'b').parent.at == ''
assert (root / 'b/').parent.at == ''
def test_missing_dir_parent(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert (root / 'missing dir/').parent.at == ''
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 3c1f123..dfd0907 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -7,6 +7,7 @@ import binascii
import functools
import importlib.util
import io
+import itertools
import os
import posixpath
import shutil
@@ -2104,6 +2105,65 @@ class PyZipFile(ZipFile):
return (fname, archivename)
+def _unique_everseen(iterable, key=None):
+ "List unique elements, preserving order. Remember all elements ever seen."
+ # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+ # unique_everseen('ABBCcAD', str.lower) --> A B C D
+ seen = set()
+ seen_add = seen.add
+ if key is None:
+ for element in itertools.filterfalse(seen.__contains__, iterable):
+ seen_add(element)
+ yield element
+ else:
+ for element in iterable:
+ k = key(element)
+ if k not in seen:
+ seen_add(k)
+ yield element
+
+
+def _parents(path):
+ """
+ Given a path with elements separated by
+ posixpath.sep, generate all parents of that path.
+
+ >>> list(_parents('b/d'))
+ ['b']
+ >>> list(_parents('/b/d/'))
+ ['/b']
+ >>> list(_parents('b/d/f/'))
+ ['b/d', 'b']
+ >>> list(_parents('b'))
+ []
+ >>> list(_parents(''))
+ []
+ """
+ return itertools.islice(_ancestry(path), 1, None)
+
+
+def _ancestry(path):
+ """
+ Given a path with elements separated by
+ posixpath.sep, generate all elements of that path
+
+ >>> list(_ancestry('b/d'))
+ ['b/d', 'b']
+ >>> list(_ancestry('/b/d/'))
+ ['/b/d', '/b']
+ >>> list(_ancestry('b/d/f/'))
+ ['b/d/f', 'b/d', 'b']
+ >>> list(_ancestry('b'))
+ ['b']
+ >>> list(_ancestry(''))
+ []
+ """
+ path = path.rstrip(posixpath.sep)
+ while path and path != posixpath.sep:
+ yield path
+ path, tail = posixpath.split(path)
+
+
class Path:
"""
A pathlib-compatible interface for zip files.
@@ -2227,12 +2287,17 @@ class Path:
__truediv__ = joinpath
@staticmethod
- def _add_implied_dirs(names):
- return names + [
- name + "/"
- for name in map(posixpath.dirname, names)
- if name and name + "/" not in names
- ]
+ def _implied_dirs(names):
+ return _unique_everseen(
+ parent + "/"
+ for name in names
+ for parent in _parents(name)
+ if parent + "/" not in names
+ )
+
+ @classmethod
+ def _add_implied_dirs(cls, names):
+ return names + list(cls._implied_dirs(names))
@property
def parent(self):
diff --git a/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst
new file mode 100644
index 0000000..f9ec6a3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst
@@ -0,0 +1 @@
+In ``zipfile.Path``, when adding implicit dirs, ensure that ancestral directories are added and that duplicates are excluded.