From 03185f0c150ebc52d41dd5ea6f369c7b5ba9fc16 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 14 Jul 2023 16:40:46 -0400 Subject: gh-106752: Move zipfile._path into its own package (#106753) * gh-106752: Move zipfile._path into its own package so it may have supplementary behavior. * Add blurb --- .github/CODEOWNERS | 2 +- Lib/test/test_zipfile/_functools.py | 9 - Lib/test/test_zipfile/_itertools.py | 79 ---- Lib/test/test_zipfile/_path/__init__.py | 0 Lib/test/test_zipfile/_path/_functools.py | 9 + Lib/test/test_zipfile/_path/_itertools.py | 79 ++++ Lib/test/test_zipfile/_path/_support.py | 9 + Lib/test/test_zipfile/_path/_test_params.py | 39 ++ Lib/test/test_zipfile/_path/test_complexity.py | 24 + Lib/test/test_zipfile/_path/test_path.py | 525 +++++++++++++++++++++ Lib/test/test_zipfile/_support.py | 9 - Lib/test/test_zipfile/_test_params.py | 39 -- Lib/test/test_zipfile/test_complexity.py | 24 - Lib/test/test_zipfile/test_path.py | 525 --------------------- Lib/zipfile/_path.py | 400 ---------------- Lib/zipfile/_path/__init__.py | 400 ++++++++++++++++ Makefile.pre.in | 3 +- .../2023-07-14-16-20-06.gh-issue-106752.gd1i6D.rst | 2 + 18 files changed, 1090 insertions(+), 1087 deletions(-) delete mode 100644 Lib/test/test_zipfile/_functools.py delete mode 100644 Lib/test/test_zipfile/_itertools.py create mode 100644 Lib/test/test_zipfile/_path/__init__.py create mode 100644 Lib/test/test_zipfile/_path/_functools.py create mode 100644 Lib/test/test_zipfile/_path/_itertools.py create mode 100644 Lib/test/test_zipfile/_path/_support.py create mode 100644 Lib/test/test_zipfile/_path/_test_params.py create mode 100644 Lib/test/test_zipfile/_path/test_complexity.py create mode 100644 Lib/test/test_zipfile/_path/test_path.py delete mode 100644 Lib/test/test_zipfile/_support.py delete mode 100644 Lib/test/test_zipfile/_test_params.py delete mode 100644 Lib/test/test_zipfile/test_complexity.py delete mode 100644 Lib/test/test_zipfile/test_path.py delete mode 100644 Lib/zipfile/_path.py create mode 100644 Lib/zipfile/_path/__init__.py create mode 100644 Misc/NEWS.d/next/Tests/2023-07-14-16-20-06.gh-issue-106752.gd1i6D.rst diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5b471c7..234a954 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -172,7 +172,7 @@ Doc/c-api/stable.rst @encukou **/*pathlib* @barneygale # zipfile.Path -**/*zipfile/*_path.py @jaraco +**/*zipfile/_path/* @jaraco # Argument Clinic /Tools/clinic/** @erlend-aasland @AlexWaygood diff --git a/Lib/test/test_zipfile/_functools.py b/Lib/test/test_zipfile/_functools.py deleted file mode 100644 index 75f2b20..0000000 --- a/Lib/test/test_zipfile/_functools.py +++ /dev/null @@ -1,9 +0,0 @@ -import functools - - -# from jaraco.functools 3.5.2 -def compose(*funcs): - def compose_two(f1, f2): - return lambda *args, **kwargs: f1(f2(*args, **kwargs)) - - return functools.reduce(compose_two, funcs) diff --git a/Lib/test/test_zipfile/_itertools.py b/Lib/test/test_zipfile/_itertools.py deleted file mode 100644 index f735dd2..0000000 --- a/Lib/test/test_zipfile/_itertools.py +++ /dev/null @@ -1,79 +0,0 @@ -import itertools -from collections import deque -from itertools import islice - - -# from jaraco.itertools 6.3.0 -class Counter: - """ - Wrap an iterable in an object that stores the count of items - that pass through it. - - >>> items = Counter(range(20)) - >>> items.count - 0 - >>> values = list(items) - >>> items.count - 20 - """ - - def __init__(self, i): - self.count = 0 - self.iter = zip(itertools.count(1), i) - - def __iter__(self): - return self - - def __next__(self): - self.count, result = next(self.iter) - return result - - -# from more_itertools v8.13.0 -def always_iterable(obj, base_type=(str, bytes)): - if obj is None: - return iter(()) - - if (base_type is not None) and isinstance(obj, base_type): - return iter((obj,)) - - try: - return iter(obj) - except TypeError: - return iter((obj,)) - - -# from more_itertools v9.0.0 -def consume(iterator, n=None): - """Advance *iterable* by *n* steps. If *n* is ``None``, consume it - entirely. - Efficiently exhausts an iterator without returning values. Defaults to - consuming the whole iterator, but an optional second argument may be - provided to limit consumption. - >>> i = (x for x in range(10)) - >>> next(i) - 0 - >>> consume(i, 3) - >>> next(i) - 4 - >>> consume(i) - >>> next(i) - Traceback (most recent call last): - File "", line 1, in - StopIteration - If the iterator has fewer items remaining than the provided limit, the - whole iterator will be consumed. - >>> i = (x for x in range(3)) - >>> consume(i, 5) - >>> next(i) - Traceback (most recent call last): - File "", line 1, in - StopIteration - """ - # Use functions that consume iterators at C speed. - if n is None: - # feed the entire iterator into a zero-length deque - deque(iterator, maxlen=0) - else: - # advance to the empty slice starting at position n - next(islice(iterator, n, n), None) diff --git a/Lib/test/test_zipfile/_path/__init__.py b/Lib/test/test_zipfile/_path/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Lib/test/test_zipfile/_path/_functools.py b/Lib/test/test_zipfile/_path/_functools.py new file mode 100644 index 0000000..75f2b20 --- /dev/null +++ b/Lib/test/test_zipfile/_path/_functools.py @@ -0,0 +1,9 @@ +import functools + + +# from jaraco.functools 3.5.2 +def compose(*funcs): + def compose_two(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + return functools.reduce(compose_two, funcs) diff --git a/Lib/test/test_zipfile/_path/_itertools.py b/Lib/test/test_zipfile/_path/_itertools.py new file mode 100644 index 0000000..f735dd2 --- /dev/null +++ b/Lib/test/test_zipfile/_path/_itertools.py @@ -0,0 +1,79 @@ +import itertools +from collections import deque +from itertools import islice + + +# from jaraco.itertools 6.3.0 +class Counter: + """ + Wrap an iterable in an object that stores the count of items + that pass through it. + + >>> items = Counter(range(20)) + >>> items.count + 0 + >>> values = list(items) + >>> items.count + 20 + """ + + def __init__(self, i): + self.count = 0 + self.iter = zip(itertools.count(1), i) + + def __iter__(self): + return self + + def __next__(self): + self.count, result = next(self.iter) + return result + + +# from more_itertools v8.13.0 +def always_iterable(obj, base_type=(str, bytes)): + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) + + +# from more_itertools v9.0.0 +def consume(iterator, n=None): + """Advance *iterable* by *n* steps. If *n* is ``None``, consume it + entirely. + Efficiently exhausts an iterator without returning values. Defaults to + consuming the whole iterator, but an optional second argument may be + provided to limit consumption. + >>> i = (x for x in range(10)) + >>> next(i) + 0 + >>> consume(i, 3) + >>> next(i) + 4 + >>> consume(i) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + If the iterator has fewer items remaining than the provided limit, the + whole iterator will be consumed. + >>> i = (x for x in range(3)) + >>> consume(i, 5) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + """ + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) diff --git a/Lib/test/test_zipfile/_path/_support.py b/Lib/test/test_zipfile/_path/_support.py new file mode 100644 index 0000000..1afdf3b --- /dev/null +++ b/Lib/test/test_zipfile/_path/_support.py @@ -0,0 +1,9 @@ +import importlib +import unittest + + +def import_or_skip(name): + try: + return importlib.import_module(name) + except ImportError: # pragma: no cover + raise unittest.SkipTest(f'Unable to import {name}') diff --git a/Lib/test/test_zipfile/_path/_test_params.py b/Lib/test/test_zipfile/_path/_test_params.py new file mode 100644 index 0000000..bc95b4e --- /dev/null +++ b/Lib/test/test_zipfile/_path/_test_params.py @@ -0,0 +1,39 @@ +import types +import functools + +from ._itertools import always_iterable + + +def parameterize(names, value_groups): + """ + Decorate a test method to run it as a set of subtests. + + Modeled after pytest.parametrize. + """ + + def decorator(func): + @functools.wraps(func) + def wrapped(self): + for values in value_groups: + resolved = map(Invoked.eval, always_iterable(values)) + params = dict(zip(always_iterable(names), resolved)) + with self.subTest(**params): + func(self, **params) + + return wrapped + + return decorator + + +class Invoked(types.SimpleNamespace): + """ + Wrap a function to be invoked for each usage. + """ + + @classmethod + def wrap(cls, func): + return cls(func=func) + + @classmethod + def eval(cls, cand): + return cand.func() if isinstance(cand, cls) else cand diff --git a/Lib/test/test_zipfile/_path/test_complexity.py b/Lib/test/test_zipfile/_path/test_complexity.py new file mode 100644 index 0000000..3432dc3 --- /dev/null +++ b/Lib/test/test_zipfile/_path/test_complexity.py @@ -0,0 +1,24 @@ +import unittest +import string +import zipfile + +from ._functools import compose +from ._itertools import consume + +from ._support import import_or_skip + + +big_o = import_or_skip('big_o') + + +class TestComplexity(unittest.TestCase): + def test_implied_dirs_performance(self): + best, others = big_o.big_o( + compose(consume, zipfile.CompleteDirs._implied_dirs), + lambda size: [ + '/'.join(string.ascii_lowercase + str(n)) for n in range(size) + ], + max_n=1000, + min_n=1, + ) + assert best <= big_o.complexities.Linear diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py new file mode 100644 index 0000000..aff91e5 --- /dev/null +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -0,0 +1,525 @@ +import io +import itertools +import contextlib +import pathlib +import pickle +import sys +import unittest +import zipfile + +from ._functools import compose +from ._itertools import Counter + +from ._test_params import parameterize, Invoked + +from test.support.os_helper import temp_dir + + +class jaraco: + class itertools: + Counter = Counter + + +def add_dirs(zf): + """ + Given a writable zip file zf, inject directory entries for + any directories implied by the presence of children. + """ + for name in zipfile.CompleteDirs._implied_dirs(zf.namelist()): + zf.writestr(name, b"") + return zf + + +def build_alpharep_fixture(): + """ + Create a zip file with this structure: + + . + ├── a.txt + ├── b + │ ├── c.txt + │ ├── d + │ │ └── e.txt + │ └── f.txt + └── g + └── h + └── i.txt + + This fixture has the following key characteristics: + + - a file at the root (a) + - a file two levels deep (b/d/e) + - multiple files in a directory (b/c, b/f) + - a directory containing only a directory (g/h) + + "alpha" because it uses alphabet + "rep" because it's a representative example + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("a.txt", b"content of a") + zf.writestr("b/c.txt", b"content of c") + zf.writestr("b/d/e.txt", b"content of e") + zf.writestr("b/f.txt", b"content of f") + zf.writestr("g/h/i.txt", b"content of i") + zf.filename = "alpharep.zip" + return zf + + +alpharep_generators = [ + Invoked.wrap(build_alpharep_fixture), + Invoked.wrap(compose(add_dirs, build_alpharep_fixture)), +] + +pass_alpharep = parameterize(['alpharep'], alpharep_generators) + + +class TestPath(unittest.TestCase): + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + def zipfile_ondisk(self, alpharep): + tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) + buffer = alpharep.fp + alpharep.close() + path = tmpdir / alpharep.filename + with path.open("wb") as strm: + strm.write(buffer.getvalue()) + return path + + @pass_alpharep + def test_iterdir_and_types(self, alpharep): + root = zipfile.Path(alpharep) + assert root.is_dir() + a, b, g = root.iterdir() + assert a.is_file() + assert b.is_dir() + assert g.is_dir() + c, f, d = b.iterdir() + assert c.is_file() and f.is_file() + (e,) = d.iterdir() + assert e.is_file() + (h,) = g.iterdir() + (i,) = h.iterdir() + assert i.is_file() + + @pass_alpharep + def test_is_file_missing(self, alpharep): + root = zipfile.Path(alpharep) + assert not root.joinpath('missing.txt').is_file() + + @pass_alpharep + def test_iterdir_on_file(self, alpharep): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + with self.assertRaises(ValueError): + a.iterdir() + + @pass_alpharep + def test_subdir_is_dir(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'b').is_dir() + assert (root / 'b/').is_dir() + assert (root / 'g').is_dir() + assert (root / 'g/').is_dir() + + @pass_alpharep + def test_open(self, alpharep): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + with a.open(encoding="utf-8") as strm: + data = strm.read() + self.assertEqual(data, "content of a") + with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError + data = strm.read() + self.assertEqual(data, "content of a") + + def test_open_encoding_utf16(self): + in_memory_file = io.BytesIO() + zf = zipfile.ZipFile(in_memory_file, "w") + zf.writestr("path/16.txt", "This was utf-16".encode("utf-16")) + zf.filename = "test_open_utf16.zip" + root = zipfile.Path(zf) + (path,) = root.iterdir() + u16 = path.joinpath("16.txt") + with u16.open('r', "utf-16") as strm: + data = strm.read() + assert data == "This was utf-16" + with u16.open(encoding="utf-16") as strm: + data = strm.read() + assert data == "This was utf-16" + + def test_open_encoding_errors(self): + in_memory_file = io.BytesIO() + zf = zipfile.ZipFile(in_memory_file, "w") + zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.") + zf.filename = "test_read_text_encoding_errors.zip" + root = zipfile.Path(zf) + (path,) = root.iterdir() + u16 = path.joinpath("bad-utf8.bin") + + # encoding= as a positional argument for gh-101144. + data = u16.read_text("utf-8", errors="ignore") + assert data == "invalid utf-8: ." + with u16.open("r", "utf-8", errors="surrogateescape") as f: + assert f.read() == "invalid utf-8: \udcff\udcff." + + # encoding= both positional and keyword is an error; gh-101144. + with self.assertRaisesRegex(TypeError, "encoding"): + data = u16.read_text("utf-8", encoding="utf-8") + + # both keyword arguments work. + with u16.open("r", encoding="utf-8", errors="strict") as f: + # error during decoding with wrong codec. + with self.assertRaises(UnicodeDecodeError): + f.read() + + @unittest.skipIf( + not getattr(sys.flags, 'warn_default_encoding', 0), + "Requires warn_default_encoding", + ) + @pass_alpharep + def test_encoding_warnings(self, alpharep): + """EncodingWarning must blame the read_text and open calls.""" + assert sys.flags.warn_default_encoding + root = zipfile.Path(alpharep) + with self.assertWarns(EncodingWarning) as wc: + root.joinpath("a.txt").read_text() + assert __file__ == wc.filename + with self.assertWarns(EncodingWarning) as wc: + root.joinpath("a.txt").open("r").close() + assert __file__ == wc.filename + + def test_open_write(self): + """ + If the zipfile is open for write, it should be possible to + write bytes or text to it. + """ + zf = zipfile.Path(zipfile.ZipFile(io.BytesIO(), mode='w')) + with zf.joinpath('file.bin').open('wb') as strm: + strm.write(b'binary contents') + with zf.joinpath('file.txt').open('w', encoding="utf-8") as strm: + strm.write('text file') + + def test_open_extant_directory(self): + """ + Attempting to open a directory raises IsADirectoryError. + """ + zf = zipfile.Path(add_dirs(build_alpharep_fixture())) + with self.assertRaises(IsADirectoryError): + zf.joinpath('b').open() + + @pass_alpharep + def test_open_binary_invalid_args(self, alpharep): + root = zipfile.Path(alpharep) + with self.assertRaises(ValueError): + root.joinpath('a.txt').open('rb', encoding='utf-8') + with self.assertRaises(ValueError): + root.joinpath('a.txt').open('rb', 'utf-8') + + def test_open_missing_directory(self): + """ + Attempting to open a missing directory raises FileNotFoundError. + """ + zf = zipfile.Path(add_dirs(build_alpharep_fixture())) + with self.assertRaises(FileNotFoundError): + zf.joinpath('z').open() + + @pass_alpharep + def test_read(self, alpharep): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + assert a.read_text(encoding="utf-8") == "content of a" + # Also check positional encoding arg (gh-101144). + assert a.read_text("utf-8") == "content of a" + assert a.read_bytes() == b"content of a" + + @pass_alpharep + def test_joinpath(self, alpharep): + root = zipfile.Path(alpharep) + a = root.joinpath("a.txt") + assert a.is_file() + e = root.joinpath("b").joinpath("d").joinpath("e.txt") + assert e.read_text(encoding="utf-8") == "content of e" + + @pass_alpharep + def test_joinpath_multiple(self, alpharep): + root = zipfile.Path(alpharep) + e = root.joinpath("b", "d", "e.txt") + assert e.read_text(encoding="utf-8") == "content of e" + + @pass_alpharep + def test_traverse_truediv(self, alpharep): + root = zipfile.Path(alpharep) + a = root / "a.txt" + assert a.is_file() + e = root / "b" / "d" / "e.txt" + assert e.read_text(encoding="utf-8") == "content of e" + + @pass_alpharep + def test_pathlike_construction(self, alpharep): + """ + zipfile.Path should be constructable from a path-like object + """ + zipfile_ondisk = self.zipfile_ondisk(alpharep) + pathlike = pathlib.Path(str(zipfile_ondisk)) + zipfile.Path(pathlike) + + @pass_alpharep + def test_traverse_pathlike(self, alpharep): + root = zipfile.Path(alpharep) + root / pathlib.Path("a") + + @pass_alpharep + def test_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'a').parent.at == '' + assert (root / 'a' / 'b').parent.at == 'a/' + + @pass_alpharep + def test_dir_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'b').parent.at == '' + assert (root / 'b/').parent.at == '' + + @pass_alpharep + def test_missing_dir_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'missing dir/').parent.at == '' + + @pass_alpharep + def test_mutability(self, alpharep): + """ + If the underlying zipfile is changed, the Path object should + reflect that change. + """ + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + alpharep.writestr('foo.txt', 'foo') + alpharep.writestr('bar/baz.txt', 'baz') + assert any(child.name == 'foo.txt' for child in root.iterdir()) + assert (root / 'foo.txt').read_text(encoding="utf-8") == 'foo' + (baz,) = (root / 'bar').iterdir() + assert baz.read_text(encoding="utf-8") == 'baz' + + HUGE_ZIPFILE_NUM_ENTRIES = 2**13 + + def huge_zipfile(self): + """Create a read-only zipfile with a huge number of entries entries.""" + strm = io.BytesIO() + zf = zipfile.ZipFile(strm, "w") + for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)): + zf.writestr(entry, entry) + zf.mode = 'r' + return zf + + def test_joinpath_constant_time(self): + """ + Ensure joinpath on items in zipfile is linear time. + """ + root = zipfile.Path(self.huge_zipfile()) + entries = jaraco.itertools.Counter(root.iterdir()) + for entry in entries: + entry.joinpath('suffix') + # Check the file iterated all items + assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES + + @pass_alpharep + def test_read_does_not_close(self, alpharep): + alpharep = self.zipfile_ondisk(alpharep) + with zipfile.ZipFile(alpharep) as file: + for rep in range(2): + zipfile.Path(file, 'a.txt').read_text(encoding="utf-8") + + @pass_alpharep + def test_subclass(self, alpharep): + class Subclass(zipfile.Path): + pass + + root = Subclass(alpharep) + assert isinstance(root / 'b', Subclass) + + @pass_alpharep + def test_filename(self, alpharep): + root = zipfile.Path(alpharep) + assert root.filename == pathlib.Path('alpharep.zip') + + @pass_alpharep + def test_root_name(self, alpharep): + """ + The name of the root should be the name of the zipfile + """ + root = zipfile.Path(alpharep) + assert root.name == 'alpharep.zip' == root.filename.name + + @pass_alpharep + def test_suffix(self, alpharep): + """ + The suffix of the root should be the suffix of the zipfile. + The suffix of each nested file is the final component's last suffix, if any. + Includes the leading period, just like pathlib.Path. + """ + root = zipfile.Path(alpharep) + assert root.suffix == '.zip' == root.filename.suffix + + b = root / "b.txt" + assert b.suffix == ".txt" + + c = root / "c" / "filename.tar.gz" + assert c.suffix == ".gz" + + d = root / "d" + assert d.suffix == "" + + @pass_alpharep + def test_suffixes(self, alpharep): + """ + The suffix of the root should be the suffix of the zipfile. + The suffix of each nested file is the final component's last suffix, if any. + Includes the leading period, just like pathlib.Path. + """ + root = zipfile.Path(alpharep) + assert root.suffixes == ['.zip'] == root.filename.suffixes + + b = root / 'b.txt' + assert b.suffixes == ['.txt'] + + c = root / 'c' / 'filename.tar.gz' + assert c.suffixes == ['.tar', '.gz'] + + d = root / 'd' + assert d.suffixes == [] + + e = root / '.hgrc' + assert e.suffixes == [] + + @pass_alpharep + def test_stem(self, alpharep): + """ + The final path component, without its suffix + """ + root = zipfile.Path(alpharep) + assert root.stem == 'alpharep' == root.filename.stem + + b = root / "b.txt" + assert b.stem == "b" + + c = root / "c" / "filename.tar.gz" + assert c.stem == "filename.tar" + + d = root / "d" + assert d.stem == "d" + + @pass_alpharep + def test_root_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert root.parent == pathlib.Path('.') + root.root.filename = 'foo/bar.zip' + assert root.parent == pathlib.Path('foo') + + @pass_alpharep + def test_root_unnamed(self, alpharep): + """ + It is an error to attempt to get the name + or parent of an unnamed zipfile. + """ + alpharep.filename = None + root = zipfile.Path(alpharep) + with self.assertRaises(TypeError): + root.name + with self.assertRaises(TypeError): + root.parent + + # .name and .parent should still work on subs + sub = root / "b" + assert sub.name == "b" + assert sub.parent + + @pass_alpharep + def test_match_and_glob(self, alpharep): + root = zipfile.Path(alpharep) + assert not root.match("*.txt") + + assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")] + + files = root.glob("**/*.txt") + assert all(each.match("*.txt") for each in files) + + assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt")) + + def test_glob_empty(self): + root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w')) + with self.assertRaises(ValueError): + root.glob('') + + @pass_alpharep + def test_eq_hash(self, alpharep): + root = zipfile.Path(alpharep) + assert root == zipfile.Path(alpharep) + + assert root != (root / "a.txt") + assert (root / "a.txt") == (root / "a.txt") + + root = zipfile.Path(alpharep) + assert root in {root} + + @pass_alpharep + def test_is_symlink(self, alpharep): + """ + See python/cpython#82102 for symlink support beyond this object. + """ + + root = zipfile.Path(alpharep) + assert not root.is_symlink() + + @pass_alpharep + def test_relative_to(self, alpharep): + root = zipfile.Path(alpharep) + relative = root.joinpath("b", "c.txt").relative_to(root / "b") + assert str(relative) == "c.txt" + + relative = root.joinpath("b", "d", "e.txt").relative_to(root / "b") + assert str(relative) == "d/e.txt" + + @pass_alpharep + def test_inheritance(self, alpharep): + cls = type('PathChild', (zipfile.Path,), {}) + file = cls(alpharep).joinpath('some dir').parent + assert isinstance(file, cls) + + @parameterize( + ['alpharep', 'path_type', 'subpath'], + itertools.product( + alpharep_generators, + [str, pathlib.Path], + ['', 'b/'], + ), + ) + def test_pickle(self, alpharep, path_type, subpath): + zipfile_ondisk = path_type(self.zipfile_ondisk(alpharep)) + + saved_1 = pickle.dumps(zipfile.Path(zipfile_ondisk, at=subpath)) + restored_1 = pickle.loads(saved_1) + first, *rest = restored_1.iterdir() + assert first.read_text(encoding='utf-8').startswith('content of ') + + @pass_alpharep + def test_extract_orig_with_implied_dirs(self, alpharep): + """ + A zip file wrapped in a Path should extract even with implied dirs. + """ + source_path = self.zipfile_ondisk(alpharep) + zf = zipfile.ZipFile(source_path) + # wrap the zipfile for its side effect + zipfile.Path(zf) + zf.extractall(source_path.parent) + + @pass_alpharep + def test_getinfo_missing(self, alpharep): + """ + Validate behavior of getinfo on original zipfile after wrapping. + """ + zipfile.Path(alpharep) + with self.assertRaises(KeyError): + alpharep.getinfo('does-not-exist') diff --git a/Lib/test/test_zipfile/_support.py b/Lib/test/test_zipfile/_support.py deleted file mode 100644 index 1afdf3b..0000000 --- a/Lib/test/test_zipfile/_support.py +++ /dev/null @@ -1,9 +0,0 @@ -import importlib -import unittest - - -def import_or_skip(name): - try: - return importlib.import_module(name) - except ImportError: # pragma: no cover - raise unittest.SkipTest(f'Unable to import {name}') diff --git a/Lib/test/test_zipfile/_test_params.py b/Lib/test/test_zipfile/_test_params.py deleted file mode 100644 index bc95b4e..0000000 --- a/Lib/test/test_zipfile/_test_params.py +++ /dev/null @@ -1,39 +0,0 @@ -import types -import functools - -from ._itertools import always_iterable - - -def parameterize(names, value_groups): - """ - Decorate a test method to run it as a set of subtests. - - Modeled after pytest.parametrize. - """ - - def decorator(func): - @functools.wraps(func) - def wrapped(self): - for values in value_groups: - resolved = map(Invoked.eval, always_iterable(values)) - params = dict(zip(always_iterable(names), resolved)) - with self.subTest(**params): - func(self, **params) - - return wrapped - - return decorator - - -class Invoked(types.SimpleNamespace): - """ - Wrap a function to be invoked for each usage. - """ - - @classmethod - def wrap(cls, func): - return cls(func=func) - - @classmethod - def eval(cls, cand): - return cand.func() if isinstance(cand, cls) else cand diff --git a/Lib/test/test_zipfile/test_complexity.py b/Lib/test/test_zipfile/test_complexity.py deleted file mode 100644 index 3432dc3..0000000 --- a/Lib/test/test_zipfile/test_complexity.py +++ /dev/null @@ -1,24 +0,0 @@ -import unittest -import string -import zipfile - -from ._functools import compose -from ._itertools import consume - -from ._support import import_or_skip - - -big_o = import_or_skip('big_o') - - -class TestComplexity(unittest.TestCase): - def test_implied_dirs_performance(self): - best, others = big_o.big_o( - compose(consume, zipfile.CompleteDirs._implied_dirs), - lambda size: [ - '/'.join(string.ascii_lowercase + str(n)) for n in range(size) - ], - max_n=1000, - min_n=1, - ) - assert best <= big_o.complexities.Linear diff --git a/Lib/test/test_zipfile/test_path.py b/Lib/test/test_zipfile/test_path.py deleted file mode 100644 index aff91e5..0000000 --- a/Lib/test/test_zipfile/test_path.py +++ /dev/null @@ -1,525 +0,0 @@ -import io -import itertools -import contextlib -import pathlib -import pickle -import sys -import unittest -import zipfile - -from ._functools import compose -from ._itertools import Counter - -from ._test_params import parameterize, Invoked - -from test.support.os_helper import temp_dir - - -class jaraco: - class itertools: - Counter = Counter - - -def add_dirs(zf): - """ - Given a writable zip file zf, inject directory entries for - any directories implied by the presence of children. - """ - for name in zipfile.CompleteDirs._implied_dirs(zf.namelist()): - zf.writestr(name, b"") - return zf - - -def build_alpharep_fixture(): - """ - Create a zip file with this structure: - - . - ├── a.txt - ├── b - │ ├── c.txt - │ ├── d - │ │ └── e.txt - │ └── f.txt - └── g - └── h - └── i.txt - - This fixture has the following key characteristics: - - - a file at the root (a) - - a file two levels deep (b/d/e) - - multiple files in a directory (b/c, b/f) - - a directory containing only a directory (g/h) - - "alpha" because it uses alphabet - "rep" because it's a representative example - """ - data = io.BytesIO() - zf = zipfile.ZipFile(data, "w") - zf.writestr("a.txt", b"content of a") - zf.writestr("b/c.txt", b"content of c") - zf.writestr("b/d/e.txt", b"content of e") - zf.writestr("b/f.txt", b"content of f") - zf.writestr("g/h/i.txt", b"content of i") - zf.filename = "alpharep.zip" - return zf - - -alpharep_generators = [ - Invoked.wrap(build_alpharep_fixture), - Invoked.wrap(compose(add_dirs, build_alpharep_fixture)), -] - -pass_alpharep = parameterize(['alpharep'], alpharep_generators) - - -class TestPath(unittest.TestCase): - def setUp(self): - self.fixtures = contextlib.ExitStack() - self.addCleanup(self.fixtures.close) - - def zipfile_ondisk(self, alpharep): - tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) - buffer = alpharep.fp - alpharep.close() - path = tmpdir / alpharep.filename - with path.open("wb") as strm: - strm.write(buffer.getvalue()) - return path - - @pass_alpharep - def test_iterdir_and_types(self, alpharep): - root = zipfile.Path(alpharep) - assert root.is_dir() - a, b, g = root.iterdir() - assert a.is_file() - assert b.is_dir() - assert g.is_dir() - c, f, d = b.iterdir() - assert c.is_file() and f.is_file() - (e,) = d.iterdir() - assert e.is_file() - (h,) = g.iterdir() - (i,) = h.iterdir() - assert i.is_file() - - @pass_alpharep - def test_is_file_missing(self, alpharep): - root = zipfile.Path(alpharep) - assert not root.joinpath('missing.txt').is_file() - - @pass_alpharep - def test_iterdir_on_file(self, alpharep): - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - with self.assertRaises(ValueError): - a.iterdir() - - @pass_alpharep - def test_subdir_is_dir(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'b').is_dir() - assert (root / 'b/').is_dir() - assert (root / 'g').is_dir() - assert (root / 'g/').is_dir() - - @pass_alpharep - def test_open(self, alpharep): - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - with a.open(encoding="utf-8") as strm: - data = strm.read() - self.assertEqual(data, "content of a") - with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError - data = strm.read() - self.assertEqual(data, "content of a") - - def test_open_encoding_utf16(self): - in_memory_file = io.BytesIO() - zf = zipfile.ZipFile(in_memory_file, "w") - zf.writestr("path/16.txt", "This was utf-16".encode("utf-16")) - zf.filename = "test_open_utf16.zip" - root = zipfile.Path(zf) - (path,) = root.iterdir() - u16 = path.joinpath("16.txt") - with u16.open('r', "utf-16") as strm: - data = strm.read() - assert data == "This was utf-16" - with u16.open(encoding="utf-16") as strm: - data = strm.read() - assert data == "This was utf-16" - - def test_open_encoding_errors(self): - in_memory_file = io.BytesIO() - zf = zipfile.ZipFile(in_memory_file, "w") - zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.") - zf.filename = "test_read_text_encoding_errors.zip" - root = zipfile.Path(zf) - (path,) = root.iterdir() - u16 = path.joinpath("bad-utf8.bin") - - # encoding= as a positional argument for gh-101144. - data = u16.read_text("utf-8", errors="ignore") - assert data == "invalid utf-8: ." - with u16.open("r", "utf-8", errors="surrogateescape") as f: - assert f.read() == "invalid utf-8: \udcff\udcff." - - # encoding= both positional and keyword is an error; gh-101144. - with self.assertRaisesRegex(TypeError, "encoding"): - data = u16.read_text("utf-8", encoding="utf-8") - - # both keyword arguments work. - with u16.open("r", encoding="utf-8", errors="strict") as f: - # error during decoding with wrong codec. - with self.assertRaises(UnicodeDecodeError): - f.read() - - @unittest.skipIf( - not getattr(sys.flags, 'warn_default_encoding', 0), - "Requires warn_default_encoding", - ) - @pass_alpharep - def test_encoding_warnings(self, alpharep): - """EncodingWarning must blame the read_text and open calls.""" - assert sys.flags.warn_default_encoding - root = zipfile.Path(alpharep) - with self.assertWarns(EncodingWarning) as wc: - root.joinpath("a.txt").read_text() - assert __file__ == wc.filename - with self.assertWarns(EncodingWarning) as wc: - root.joinpath("a.txt").open("r").close() - assert __file__ == wc.filename - - def test_open_write(self): - """ - If the zipfile is open for write, it should be possible to - write bytes or text to it. - """ - zf = zipfile.Path(zipfile.ZipFile(io.BytesIO(), mode='w')) - with zf.joinpath('file.bin').open('wb') as strm: - strm.write(b'binary contents') - with zf.joinpath('file.txt').open('w', encoding="utf-8") as strm: - strm.write('text file') - - def test_open_extant_directory(self): - """ - Attempting to open a directory raises IsADirectoryError. - """ - zf = zipfile.Path(add_dirs(build_alpharep_fixture())) - with self.assertRaises(IsADirectoryError): - zf.joinpath('b').open() - - @pass_alpharep - def test_open_binary_invalid_args(self, alpharep): - root = zipfile.Path(alpharep) - with self.assertRaises(ValueError): - root.joinpath('a.txt').open('rb', encoding='utf-8') - with self.assertRaises(ValueError): - root.joinpath('a.txt').open('rb', 'utf-8') - - def test_open_missing_directory(self): - """ - Attempting to open a missing directory raises FileNotFoundError. - """ - zf = zipfile.Path(add_dirs(build_alpharep_fixture())) - with self.assertRaises(FileNotFoundError): - zf.joinpath('z').open() - - @pass_alpharep - def test_read(self, alpharep): - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - assert a.read_text(encoding="utf-8") == "content of a" - # Also check positional encoding arg (gh-101144). - assert a.read_text("utf-8") == "content of a" - assert a.read_bytes() == b"content of a" - - @pass_alpharep - def test_joinpath(self, alpharep): - root = zipfile.Path(alpharep) - a = root.joinpath("a.txt") - assert a.is_file() - e = root.joinpath("b").joinpath("d").joinpath("e.txt") - assert e.read_text(encoding="utf-8") == "content of e" - - @pass_alpharep - def test_joinpath_multiple(self, alpharep): - root = zipfile.Path(alpharep) - e = root.joinpath("b", "d", "e.txt") - assert e.read_text(encoding="utf-8") == "content of e" - - @pass_alpharep - def test_traverse_truediv(self, alpharep): - root = zipfile.Path(alpharep) - a = root / "a.txt" - assert a.is_file() - e = root / "b" / "d" / "e.txt" - assert e.read_text(encoding="utf-8") == "content of e" - - @pass_alpharep - def test_pathlike_construction(self, alpharep): - """ - zipfile.Path should be constructable from a path-like object - """ - zipfile_ondisk = self.zipfile_ondisk(alpharep) - pathlike = pathlib.Path(str(zipfile_ondisk)) - zipfile.Path(pathlike) - - @pass_alpharep - def test_traverse_pathlike(self, alpharep): - root = zipfile.Path(alpharep) - root / pathlib.Path("a") - - @pass_alpharep - def test_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'a').parent.at == '' - assert (root / 'a' / 'b').parent.at == 'a/' - - @pass_alpharep - def test_dir_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'b').parent.at == '' - assert (root / 'b/').parent.at == '' - - @pass_alpharep - def test_missing_dir_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'missing dir/').parent.at == '' - - @pass_alpharep - def test_mutability(self, alpharep): - """ - If the underlying zipfile is changed, the Path object should - reflect that change. - """ - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - alpharep.writestr('foo.txt', 'foo') - alpharep.writestr('bar/baz.txt', 'baz') - assert any(child.name == 'foo.txt' for child in root.iterdir()) - assert (root / 'foo.txt').read_text(encoding="utf-8") == 'foo' - (baz,) = (root / 'bar').iterdir() - assert baz.read_text(encoding="utf-8") == 'baz' - - HUGE_ZIPFILE_NUM_ENTRIES = 2**13 - - def huge_zipfile(self): - """Create a read-only zipfile with a huge number of entries entries.""" - strm = io.BytesIO() - zf = zipfile.ZipFile(strm, "w") - for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)): - zf.writestr(entry, entry) - zf.mode = 'r' - return zf - - def test_joinpath_constant_time(self): - """ - Ensure joinpath on items in zipfile is linear time. - """ - root = zipfile.Path(self.huge_zipfile()) - entries = jaraco.itertools.Counter(root.iterdir()) - for entry in entries: - entry.joinpath('suffix') - # Check the file iterated all items - assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES - - @pass_alpharep - def test_read_does_not_close(self, alpharep): - alpharep = self.zipfile_ondisk(alpharep) - with zipfile.ZipFile(alpharep) as file: - for rep in range(2): - zipfile.Path(file, 'a.txt').read_text(encoding="utf-8") - - @pass_alpharep - def test_subclass(self, alpharep): - class Subclass(zipfile.Path): - pass - - root = Subclass(alpharep) - assert isinstance(root / 'b', Subclass) - - @pass_alpharep - def test_filename(self, alpharep): - root = zipfile.Path(alpharep) - assert root.filename == pathlib.Path('alpharep.zip') - - @pass_alpharep - def test_root_name(self, alpharep): - """ - The name of the root should be the name of the zipfile - """ - root = zipfile.Path(alpharep) - assert root.name == 'alpharep.zip' == root.filename.name - - @pass_alpharep - def test_suffix(self, alpharep): - """ - The suffix of the root should be the suffix of the zipfile. - The suffix of each nested file is the final component's last suffix, if any. - Includes the leading period, just like pathlib.Path. - """ - root = zipfile.Path(alpharep) - assert root.suffix == '.zip' == root.filename.suffix - - b = root / "b.txt" - assert b.suffix == ".txt" - - c = root / "c" / "filename.tar.gz" - assert c.suffix == ".gz" - - d = root / "d" - assert d.suffix == "" - - @pass_alpharep - def test_suffixes(self, alpharep): - """ - The suffix of the root should be the suffix of the zipfile. - The suffix of each nested file is the final component's last suffix, if any. - Includes the leading period, just like pathlib.Path. - """ - root = zipfile.Path(alpharep) - assert root.suffixes == ['.zip'] == root.filename.suffixes - - b = root / 'b.txt' - assert b.suffixes == ['.txt'] - - c = root / 'c' / 'filename.tar.gz' - assert c.suffixes == ['.tar', '.gz'] - - d = root / 'd' - assert d.suffixes == [] - - e = root / '.hgrc' - assert e.suffixes == [] - - @pass_alpharep - def test_stem(self, alpharep): - """ - The final path component, without its suffix - """ - root = zipfile.Path(alpharep) - assert root.stem == 'alpharep' == root.filename.stem - - b = root / "b.txt" - assert b.stem == "b" - - c = root / "c" / "filename.tar.gz" - assert c.stem == "filename.tar" - - d = root / "d" - assert d.stem == "d" - - @pass_alpharep - def test_root_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert root.parent == pathlib.Path('.') - root.root.filename = 'foo/bar.zip' - assert root.parent == pathlib.Path('foo') - - @pass_alpharep - def test_root_unnamed(self, alpharep): - """ - It is an error to attempt to get the name - or parent of an unnamed zipfile. - """ - alpharep.filename = None - root = zipfile.Path(alpharep) - with self.assertRaises(TypeError): - root.name - with self.assertRaises(TypeError): - root.parent - - # .name and .parent should still work on subs - sub = root / "b" - assert sub.name == "b" - assert sub.parent - - @pass_alpharep - def test_match_and_glob(self, alpharep): - root = zipfile.Path(alpharep) - assert not root.match("*.txt") - - assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")] - - files = root.glob("**/*.txt") - assert all(each.match("*.txt") for each in files) - - assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt")) - - def test_glob_empty(self): - root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w')) - with self.assertRaises(ValueError): - root.glob('') - - @pass_alpharep - def test_eq_hash(self, alpharep): - root = zipfile.Path(alpharep) - assert root == zipfile.Path(alpharep) - - assert root != (root / "a.txt") - assert (root / "a.txt") == (root / "a.txt") - - root = zipfile.Path(alpharep) - assert root in {root} - - @pass_alpharep - def test_is_symlink(self, alpharep): - """ - See python/cpython#82102 for symlink support beyond this object. - """ - - root = zipfile.Path(alpharep) - assert not root.is_symlink() - - @pass_alpharep - def test_relative_to(self, alpharep): - root = zipfile.Path(alpharep) - relative = root.joinpath("b", "c.txt").relative_to(root / "b") - assert str(relative) == "c.txt" - - relative = root.joinpath("b", "d", "e.txt").relative_to(root / "b") - assert str(relative) == "d/e.txt" - - @pass_alpharep - def test_inheritance(self, alpharep): - cls = type('PathChild', (zipfile.Path,), {}) - file = cls(alpharep).joinpath('some dir').parent - assert isinstance(file, cls) - - @parameterize( - ['alpharep', 'path_type', 'subpath'], - itertools.product( - alpharep_generators, - [str, pathlib.Path], - ['', 'b/'], - ), - ) - def test_pickle(self, alpharep, path_type, subpath): - zipfile_ondisk = path_type(self.zipfile_ondisk(alpharep)) - - saved_1 = pickle.dumps(zipfile.Path(zipfile_ondisk, at=subpath)) - restored_1 = pickle.loads(saved_1) - first, *rest = restored_1.iterdir() - assert first.read_text(encoding='utf-8').startswith('content of ') - - @pass_alpharep - def test_extract_orig_with_implied_dirs(self, alpharep): - """ - A zip file wrapped in a Path should extract even with implied dirs. - """ - source_path = self.zipfile_ondisk(alpharep) - zf = zipfile.ZipFile(source_path) - # wrap the zipfile for its side effect - zipfile.Path(zf) - zf.extractall(source_path.parent) - - @pass_alpharep - def test_getinfo_missing(self, alpharep): - """ - Validate behavior of getinfo on original zipfile after wrapping. - """ - zipfile.Path(alpharep) - with self.assertRaises(KeyError): - alpharep.getinfo('does-not-exist') diff --git a/Lib/zipfile/_path.py b/Lib/zipfile/_path.py deleted file mode 100644 index fd49a3e..0000000 --- a/Lib/zipfile/_path.py +++ /dev/null @@ -1,400 +0,0 @@ -import io -import posixpath -import zipfile -import itertools -import contextlib -import pathlib -import re -import fnmatch - - -__all__ = ['Path'] - - -def _parents(path): - """ - Given a path with elements separated by - posixpath.sep, generate all parents of that path. - - >>> list(_parents('b/d')) - ['b'] - >>> list(_parents('/b/d/')) - ['/b'] - >>> list(_parents('b/d/f/')) - ['b/d', 'b'] - >>> list(_parents('b')) - [] - >>> list(_parents('')) - [] - """ - return itertools.islice(_ancestry(path), 1, None) - - -def _ancestry(path): - """ - Given a path with elements separated by - posixpath.sep, generate all elements of that path - - >>> list(_ancestry('b/d')) - ['b/d', 'b'] - >>> list(_ancestry('/b/d/')) - ['/b/d', '/b'] - >>> list(_ancestry('b/d/f/')) - ['b/d/f', 'b/d', 'b'] - >>> list(_ancestry('b')) - ['b'] - >>> list(_ancestry('')) - [] - """ - path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: - yield path - path, tail = posixpath.split(path) - - -_dedupe = dict.fromkeys -"""Deduplicate an iterable in original order""" - - -def _difference(minuend, subtrahend): - """ - Return items in minuend not in subtrahend, retaining order - with O(1) lookup. - """ - return itertools.filterfalse(set(subtrahend).__contains__, minuend) - - -class InitializedState: - """ - Mix-in to save the initialization state for pickling. - """ - - def __init__(self, *args, **kwargs): - self.__args = args - self.__kwargs = kwargs - super().__init__(*args, **kwargs) - - def __getstate__(self): - return self.__args, self.__kwargs - - def __setstate__(self, state): - args, kwargs = state - super().__init__(*args, **kwargs) - - -class CompleteDirs(InitializedState, zipfile.ZipFile): - """ - A ZipFile subclass that ensures that implied directories - are always included in the namelist. - - >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt'])) - ['foo/', 'foo/bar/'] - >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/'])) - ['foo/'] - """ - - @staticmethod - def _implied_dirs(names): - parents = itertools.chain.from_iterable(map(_parents, names)) - as_dirs = (p + posixpath.sep for p in parents) - return _dedupe(_difference(as_dirs, names)) - - def namelist(self): - names = super().namelist() - return names + list(self._implied_dirs(names)) - - def _name_set(self): - return set(self.namelist()) - - def resolve_dir(self, name): - """ - If the name represents a directory, return that name - as a directory (with the trailing slash). - """ - names = self._name_set() - dirname = name + '/' - dir_match = name not in names and dirname in names - return dirname if dir_match else name - - def getinfo(self, name): - """ - Supplement getinfo for implied dirs. - """ - try: - return super().getinfo(name) - except KeyError: - if not name.endswith('/') or name not in self._name_set(): - raise - return zipfile.ZipInfo(filename=name) - - @classmethod - def make(cls, source): - """ - Given a source (filename or zipfile), return an - appropriate CompleteDirs subclass. - """ - if isinstance(source, CompleteDirs): - return source - - if not isinstance(source, zipfile.ZipFile): - return cls(source) - - # Only allow for FastLookup when supplied zipfile is read-only - if 'r' not in source.mode: - cls = CompleteDirs - - source.__class__ = cls - return source - - -class FastLookup(CompleteDirs): - """ - ZipFile subclass to ensure implicit - dirs exist and are resolved rapidly. - """ - - def namelist(self): - with contextlib.suppress(AttributeError): - return self.__names - self.__names = super().namelist() - return self.__names - - def _name_set(self): - with contextlib.suppress(AttributeError): - return self.__lookup - self.__lookup = super()._name_set() - return self.__lookup - - -def _extract_text_encoding(encoding=None, *args, **kwargs): - # stacklevel=3 so that the caller of the caller see any warning. - return io.text_encoding(encoding, 3), args, kwargs - - -class Path: - """ - A pathlib-compatible interface for zip files. - - Consider a zip file with this structure:: - - . - ├── a.txt - └── b - ├── c.txt - └── d - └── e.txt - - >>> data = io.BytesIO() - >>> zf = ZipFile(data, 'w') - >>> zf.writestr('a.txt', 'content of a') - >>> zf.writestr('b/c.txt', 'content of c') - >>> zf.writestr('b/d/e.txt', 'content of e') - >>> zf.filename = 'mem/abcde.zip' - - Path accepts the zipfile object itself or a filename - - >>> root = Path(zf) - - From there, several path operations are available. - - Directory iteration (including the zip file itself): - - >>> a, b = root.iterdir() - >>> a - Path('mem/abcde.zip', 'a.txt') - >>> b - Path('mem/abcde.zip', 'b/') - - name property: - - >>> b.name - 'b' - - join with divide operator: - - >>> c = b / 'c.txt' - >>> c - Path('mem/abcde.zip', 'b/c.txt') - >>> c.name - 'c.txt' - - Read text: - - >>> c.read_text(encoding='utf-8') - 'content of c' - - existence: - - >>> c.exists() - True - >>> (b / 'missing.txt').exists() - False - - Coercion to string: - - >>> import os - >>> str(c).replace(os.sep, posixpath.sep) - 'mem/abcde.zip/b/c.txt' - - At the root, ``name``, ``filename``, and ``parent`` - resolve to the zipfile. Note these attributes are not - valid and will raise a ``ValueError`` if the zipfile - has no filename. - - >>> root.name - 'abcde.zip' - >>> str(root.filename).replace(os.sep, posixpath.sep) - 'mem/abcde.zip' - >>> str(root.parent) - 'mem' - """ - - __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" - - def __init__(self, root, at=""): - """ - Construct a Path from a ZipFile or filename. - - Note: When the source is an existing ZipFile object, - its type (__class__) will be mutated to a - specialized type. If the caller wishes to retain the - original type, the caller should either create a - separate ZipFile object or pass a filename. - """ - self.root = FastLookup.make(root) - self.at = at - - def __eq__(self, other): - """ - >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo' - False - """ - if self.__class__ is not other.__class__: - return NotImplemented - return (self.root, self.at) == (other.root, other.at) - - def __hash__(self): - return hash((self.root, self.at)) - - def open(self, mode='r', *args, pwd=None, **kwargs): - """ - Open this entry as text or binary following the semantics - of ``pathlib.Path.open()`` by passing arguments through - to io.TextIOWrapper(). - """ - if self.is_dir(): - raise IsADirectoryError(self) - zip_mode = mode[0] - if not self.exists() and zip_mode == 'r': - raise FileNotFoundError(self) - stream = self.root.open(self.at, zip_mode, pwd=pwd) - if 'b' in mode: - if args or kwargs: - raise ValueError("encoding args invalid for binary operation") - return stream - # Text mode: - encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) - return io.TextIOWrapper(stream, encoding, *args, **kwargs) - - @property - def name(self): - return pathlib.Path(self.at).name or self.filename.name - - @property - def suffix(self): - return pathlib.Path(self.at).suffix or self.filename.suffix - - @property - def suffixes(self): - return pathlib.Path(self.at).suffixes or self.filename.suffixes - - @property - def stem(self): - return pathlib.Path(self.at).stem or self.filename.stem - - @property - def filename(self): - return pathlib.Path(self.root.filename).joinpath(self.at) - - def read_text(self, *args, **kwargs): - encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) - with self.open('r', encoding, *args, **kwargs) as strm: - return strm.read() - - def read_bytes(self): - with self.open('rb') as strm: - return strm.read() - - def _is_child(self, path): - return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") - - def _next(self, at): - return self.__class__(self.root, at) - - def is_dir(self): - return not self.at or self.at.endswith("/") - - def is_file(self): - return self.exists() and not self.is_dir() - - def exists(self): - return self.at in self.root._name_set() - - def iterdir(self): - if not self.is_dir(): - raise ValueError("Can't listdir a file") - subs = map(self._next, self.root.namelist()) - return filter(self._is_child, subs) - - def match(self, path_pattern): - return pathlib.Path(self.at).match(path_pattern) - - def is_symlink(self): - """ - Return whether this path is a symlink. Always false (python/cpython#82102). - """ - return False - - def _descendants(self): - for child in self.iterdir(): - yield child - if child.is_dir(): - yield from child._descendants() - - def glob(self, pattern): - if not pattern: - raise ValueError(f"Unacceptable pattern: {pattern!r}") - - matches = re.compile(fnmatch.translate(pattern)).fullmatch - return ( - child - for child in self._descendants() - if matches(str(child.relative_to(self))) - ) - - def rglob(self, pattern): - return self.glob(f'**/{pattern}') - - def relative_to(self, other, *extra): - return posixpath.relpath(str(self), str(other.joinpath(*extra))) - - def __str__(self): - return posixpath.join(self.root.filename, self.at) - - def __repr__(self): - return self.__repr.format(self=self) - - def joinpath(self, *other): - next = posixpath.join(self.at, *other) - return self._next(self.root.resolve_dir(next)) - - __truediv__ = joinpath - - @property - def parent(self): - if not self.at: - return self.filename.parent - parent_at = posixpath.dirname(self.at.rstrip('/')) - if parent_at: - parent_at += '/' - return self._next(parent_at) diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py new file mode 100644 index 0000000..fd49a3e --- /dev/null +++ b/Lib/zipfile/_path/__init__.py @@ -0,0 +1,400 @@ +import io +import posixpath +import zipfile +import itertools +import contextlib +import pathlib +import re +import fnmatch + + +__all__ = ['Path'] + + +def _parents(path): + """ + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] + """ + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) + + +_dedupe = dict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + +class InitializedState: + """ + Mix-in to save the initialization state for pickling. + """ + + def __init__(self, *args, **kwargs): + self.__args = args + self.__kwargs = kwargs + super().__init__(*args, **kwargs) + + def __getstate__(self): + return self.__args, self.__kwargs + + def __setstate__(self, state): + args, kwargs = state + super().__init__(*args, **kwargs) + + +class CompleteDirs(InitializedState, zipfile.ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. + + >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt'])) + ['foo/', 'foo/bar/'] + >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/'])) + ['foo/'] + """ + + @staticmethod + def _implied_dirs(names): + parents = itertools.chain.from_iterable(map(_parents, names)) + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) + + def namelist(self): + names = super().namelist() + return names + list(self._implied_dirs(names)) + + def _name_set(self): + return set(self.namelist()) + + def resolve_dir(self, name): + """ + If the name represents a directory, return that name + as a directory (with the trailing slash). + """ + names = self._name_set() + dirname = name + '/' + dir_match = name not in names and dirname in names + return dirname if dir_match else name + + def getinfo(self, name): + """ + Supplement getinfo for implied dirs. + """ + try: + return super().getinfo(name) + except KeyError: + if not name.endswith('/') or name not in self._name_set(): + raise + return zipfile.ZipInfo(filename=name) + + @classmethod + def make(cls, source): + """ + Given a source (filename or zipfile), return an + appropriate CompleteDirs subclass. + """ + if isinstance(source, CompleteDirs): + return source + + if not isinstance(source, zipfile.ZipFile): + return cls(source) + + # Only allow for FastLookup when supplied zipfile is read-only + if 'r' not in source.mode: + cls = CompleteDirs + + source.__class__ = cls + return source + + +class FastLookup(CompleteDirs): + """ + ZipFile subclass to ensure implicit + dirs exist and are resolved rapidly. + """ + + def namelist(self): + with contextlib.suppress(AttributeError): + return self.__names + self.__names = super().namelist() + return self.__names + + def _name_set(self): + with contextlib.suppress(AttributeError): + return self.__lookup + self.__lookup = super()._name_set() + return self.__lookup + + +def _extract_text_encoding(encoding=None, *args, **kwargs): + # stacklevel=3 so that the caller of the caller see any warning. + return io.text_encoding(encoding, 3), args, kwargs + + +class Path: + """ + A pathlib-compatible interface for zip files. + + Consider a zip file with this structure:: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + + >>> data = io.BytesIO() + >>> zf = ZipFile(data, 'w') + >>> zf.writestr('a.txt', 'content of a') + >>> zf.writestr('b/c.txt', 'content of c') + >>> zf.writestr('b/d/e.txt', 'content of e') + >>> zf.filename = 'mem/abcde.zip' + + Path accepts the zipfile object itself or a filename + + >>> root = Path(zf) + + From there, several path operations are available. + + Directory iteration (including the zip file itself): + + >>> a, b = root.iterdir() + >>> a + Path('mem/abcde.zip', 'a.txt') + >>> b + Path('mem/abcde.zip', 'b/') + + name property: + + >>> b.name + 'b' + + join with divide operator: + + >>> c = b / 'c.txt' + >>> c + Path('mem/abcde.zip', 'b/c.txt') + >>> c.name + 'c.txt' + + Read text: + + >>> c.read_text(encoding='utf-8') + 'content of c' + + existence: + + >>> c.exists() + True + >>> (b / 'missing.txt').exists() + False + + Coercion to string: + + >>> import os + >>> str(c).replace(os.sep, posixpath.sep) + 'mem/abcde.zip/b/c.txt' + + At the root, ``name``, ``filename``, and ``parent`` + resolve to the zipfile. Note these attributes are not + valid and will raise a ``ValueError`` if the zipfile + has no filename. + + >>> root.name + 'abcde.zip' + >>> str(root.filename).replace(os.sep, posixpath.sep) + 'mem/abcde.zip' + >>> str(root.parent) + 'mem' + """ + + __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" + + def __init__(self, root, at=""): + """ + Construct a Path from a ZipFile or filename. + + Note: When the source is an existing ZipFile object, + its type (__class__) will be mutated to a + specialized type. If the caller wishes to retain the + original type, the caller should either create a + separate ZipFile object or pass a filename. + """ + self.root = FastLookup.make(root) + self.at = at + + def __eq__(self, other): + """ + >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo' + False + """ + if self.__class__ is not other.__class__: + return NotImplemented + return (self.root, self.at) == (other.root, other.at) + + def __hash__(self): + return hash((self.root, self.at)) + + def open(self, mode='r', *args, pwd=None, **kwargs): + """ + Open this entry as text or binary following the semantics + of ``pathlib.Path.open()`` by passing arguments through + to io.TextIOWrapper(). + """ + if self.is_dir(): + raise IsADirectoryError(self) + zip_mode = mode[0] + if not self.exists() and zip_mode == 'r': + raise FileNotFoundError(self) + stream = self.root.open(self.at, zip_mode, pwd=pwd) + if 'b' in mode: + if args or kwargs: + raise ValueError("encoding args invalid for binary operation") + return stream + # Text mode: + encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) + return io.TextIOWrapper(stream, encoding, *args, **kwargs) + + @property + def name(self): + return pathlib.Path(self.at).name or self.filename.name + + @property + def suffix(self): + return pathlib.Path(self.at).suffix or self.filename.suffix + + @property + def suffixes(self): + return pathlib.Path(self.at).suffixes or self.filename.suffixes + + @property + def stem(self): + return pathlib.Path(self.at).stem or self.filename.stem + + @property + def filename(self): + return pathlib.Path(self.root.filename).joinpath(self.at) + + def read_text(self, *args, **kwargs): + encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) + with self.open('r', encoding, *args, **kwargs) as strm: + return strm.read() + + def read_bytes(self): + with self.open('rb') as strm: + return strm.read() + + def _is_child(self, path): + return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") + + def _next(self, at): + return self.__class__(self.root, at) + + def is_dir(self): + return not self.at or self.at.endswith("/") + + def is_file(self): + return self.exists() and not self.is_dir() + + def exists(self): + return self.at in self.root._name_set() + + def iterdir(self): + if not self.is_dir(): + raise ValueError("Can't listdir a file") + subs = map(self._next, self.root.namelist()) + return filter(self._is_child, subs) + + def match(self, path_pattern): + return pathlib.Path(self.at).match(path_pattern) + + def is_symlink(self): + """ + Return whether this path is a symlink. Always false (python/cpython#82102). + """ + return False + + def _descendants(self): + for child in self.iterdir(): + yield child + if child.is_dir(): + yield from child._descendants() + + def glob(self, pattern): + if not pattern: + raise ValueError(f"Unacceptable pattern: {pattern!r}") + + matches = re.compile(fnmatch.translate(pattern)).fullmatch + return ( + child + for child in self._descendants() + if matches(str(child.relative_to(self))) + ) + + def rglob(self, pattern): + return self.glob(f'**/{pattern}') + + def relative_to(self, other, *extra): + return posixpath.relpath(str(self), str(other.joinpath(*extra))) + + def __str__(self): + return posixpath.join(self.root.filename, self.at) + + def __repr__(self): + return self.__repr.format(self=self) + + def joinpath(self, *other): + next = posixpath.join(self.at, *other) + return self._next(self.root.resolve_dir(next)) + + __truediv__ = joinpath + + @property + def parent(self): + if not self.at: + return self.filename.parent + parent_at = posixpath.dirname(self.at.rstrip('/')) + if parent_at: + parent_at += '/' + return self._next(parent_at) diff --git a/Makefile.pre.in b/Makefile.pre.in index 4b65551..ddf524a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2125,7 +2125,7 @@ LIBSUBDIRS= asyncio \ wsgiref \ $(XMLLIBSUBDIRS) \ xmlrpc \ - zipfile \ + zipfile zipfile/_path \ zoneinfo \ __phello__ TESTSUBDIRS= idlelib/idle_test \ @@ -2229,6 +2229,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_warnings \ test/test_warnings/data \ test/test_zipfile \ + test/test_zipfile/_path \ test/test_zoneinfo \ test/test_zoneinfo/data \ test/tkinterdata \ diff --git a/Misc/NEWS.d/next/Tests/2023-07-14-16-20-06.gh-issue-106752.gd1i6D.rst b/Misc/NEWS.d/next/Tests/2023-07-14-16-20-06.gh-issue-106752.gd1i6D.rst new file mode 100644 index 0000000..ba7257e --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2023-07-14-16-20-06.gh-issue-106752.gd1i6D.rst @@ -0,0 +1,2 @@ +Moved tests for ``zipfile.Path`` into ``Lib/test/test_zipfile/_path``. Made +``zipfile._path`` a package. -- cgit v0.12