diff options
author | Jason R. Coombs <jaraco@jaraco.com> | 2020-02-29 16:34:11 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-29 16:34:11 (GMT) |
commit | 0aeab5c4381f0cc11479362af2533b3a391312ac (patch) | |
tree | 11f53305312285e0bd7a911dc883294d9f73bef0 /Lib | |
parent | 1f0cd3c61a5ae3aac5ebaccc75ae9828ca4f96c4 (diff) | |
download | cpython-0aeab5c4381f0cc11479362af2533b3a391312ac.zip cpython-0aeab5c4381f0cc11479362af2533b3a391312ac.tar.gz cpython-0aeab5c4381f0cc11479362af2533b3a391312ac.tar.bz2 |
bpo-39667: Sync zipp 3.0 (GH-18540)
* bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0
* 📜🤖 Added by blurb_it.
* Update docs for new zipfile.Path.open
* Rely on dict, faster than OrderedDict.
* Syntax edits on docs
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_zipfile.py | 8 | ||||
-rw-r--r-- | Lib/zipfile.py | 63 |
2 files changed, 38 insertions, 33 deletions
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 09fc850..643c5b4 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -5,6 +5,7 @@ import itertools import os import pathlib import posixpath +import string import struct import subprocess import sys @@ -2880,7 +2881,7 @@ class TestPath(unittest.TestCase): a, b, g = root.iterdir() with a.open() as strm: data = strm.read() - assert data == b"content of a" + assert data == "content of a" def test_read(self): for alpharep in self.zipfile_alpharep(): @@ -2974,6 +2975,11 @@ class TestPath(unittest.TestCase): # Check the file iterated all items assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES + # @func_timeout.func_set_timeout(3) + def test_implied_dirs_performance(self): + data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)] + zipfile.CompleteDirs._implied_dirs(data) + if __name__ == "__main__": unittest.main() diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 4510fac..55993c8 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -17,7 +17,6 @@ import sys import threading import time import contextlib -from collections import OrderedDict try: import zlib # We may need its compression method @@ -2102,24 +2101,6 @@ class PyZipFile(ZipFile): return (fname, archivename) -def _unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." - # unique_everseen('AAAABBBCCDAABBB') --> A B C D - # unique_everseen('ABBCcAD', str.lower) --> A B C D - seen = set() - seen_add = seen.add - if key is None: - for element in itertools.filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - else: - for element in iterable: - k = key(element) - if k not in seen: - seen_add(k) - yield element - - def _parents(path): """ Given a path with elements separated by @@ -2161,6 +2142,18 @@ def _ancestry(path): path, tail = posixpath.split(path) +_dedupe = dict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + class CompleteDirs(ZipFile): """ A ZipFile subclass that ensures that implied directories @@ -2170,13 +2163,8 @@ class CompleteDirs(ZipFile): @staticmethod def _implied_dirs(names): parents = itertools.chain.from_iterable(map(_parents, names)) - # Deduplicate entries in original order - implied_dirs = OrderedDict.fromkeys( - p + posixpath.sep for p in parents - # Cast names to a set for O(1) lookups - if p + posixpath.sep not in set(names) - ) - return implied_dirs + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) def namelist(self): names = super(CompleteDirs, self).namelist() @@ -2305,20 +2293,31 @@ class Path: self.root = FastLookup.make(root) self.at = at - @property - def open(self): - return functools.partial(self.root.open, self.at) + def open(self, mode='r', *args, **kwargs): + """ + Open this entry as text or binary following the semantics + of ``pathlib.Path.open()`` by passing arguments through + to io.TextIOWrapper(). + """ + pwd = kwargs.pop('pwd', None) + zip_mode = mode[0] + stream = self.root.open(self.at, zip_mode, pwd=pwd) + if 'b' in mode: + if args or kwargs: + raise ValueError("encoding args invalid for binary operation") + return stream + return io.TextIOWrapper(stream, *args, **kwargs) @property def name(self): return posixpath.basename(self.at.rstrip("/")) def read_text(self, *args, **kwargs): - with self.open() as strm: - return io.TextIOWrapper(strm, *args, **kwargs).read() + with self.open('r', *args, **kwargs) as strm: + return strm.read() def read_bytes(self): - with self.open() as strm: + with self.open('rb') as strm: return strm.read() def _is_child(self, path): |