summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason R. Coombs <jaraco@jaraco.com>2020-02-29 16:34:11 (GMT)
committerGitHub <noreply@github.com>2020-02-29 16:34:11 (GMT)
commit0aeab5c4381f0cc11479362af2533b3a391312ac (patch)
tree11f53305312285e0bd7a911dc883294d9f73bef0
parent1f0cd3c61a5ae3aac5ebaccc75ae9828ca4f96c4 (diff)
downloadcpython-0aeab5c4381f0cc11479362af2533b3a391312ac.zip
cpython-0aeab5c4381f0cc11479362af2533b3a391312ac.tar.gz
cpython-0aeab5c4381f0cc11479362af2533b3a391312ac.tar.bz2
bpo-39667: Sync zipp 3.0 (GH-18540)
* bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0 * 📜🤖 Added by blurb_it. * Update docs for new zipfile.Path.open * Rely on dict, faster than OrderedDict. * Syntax edits on docs Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
-rw-r--r--Doc/library/zipfile.rst18
-rw-r--r--Lib/test/test_zipfile.py8
-rw-r--r--Lib/zipfile.py63
-rw-r--r--Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst1
4 files changed, 53 insertions, 37 deletions
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index e8a2530..7126d8b 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -489,10 +489,20 @@ Path objects are traversable using the ``/`` operator.
The final path component.
-.. method:: Path.open(*, **)
-
- Invoke :meth:`ZipFile.open` on the current path. Accepts
- the same arguments as :meth:`ZipFile.open`.
+.. method:: Path.open(mode='r', *, pwd, **)
+
+ Invoke :meth:`ZipFile.open` on the current path.
+ Allows opening for read or write, text or binary
+ through supported modes: 'r', 'w', 'rb', 'wb'.
+ Positional and keyword arguments are passed through to
+ :class:`io.TextIOWrapper` when opened as text and
+ ignored otherwise.
+ ``pwd`` is the ``pwd`` parameter to
+ :meth:`ZipFile.open`.
+
+ .. versionchanged:: 3.9
+ Added support for text and binary modes for open. Default
+ mode is now text.
.. method:: Path.iterdir()
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 09fc850..643c5b4 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -5,6 +5,7 @@ import itertools
import os
import pathlib
import posixpath
+import string
import struct
import subprocess
import sys
@@ -2880,7 +2881,7 @@ class TestPath(unittest.TestCase):
a, b, g = root.iterdir()
with a.open() as strm:
data = strm.read()
- assert data == b"content of a"
+ assert data == "content of a"
def test_read(self):
for alpharep in self.zipfile_alpharep():
@@ -2974,6 +2975,11 @@ class TestPath(unittest.TestCase):
# Check the file iterated all items
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
+ # @func_timeout.func_set_timeout(3)
+ def test_implied_dirs_performance(self):
+ data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
+ zipfile.CompleteDirs._implied_dirs(data)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 4510fac..55993c8 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -17,7 +17,6 @@ import sys
import threading
import time
import contextlib
-from collections import OrderedDict
try:
import zlib # We may need its compression method
@@ -2102,24 +2101,6 @@ class PyZipFile(ZipFile):
return (fname, archivename)
-def _unique_everseen(iterable, key=None):
- "List unique elements, preserving order. Remember all elements ever seen."
- # unique_everseen('AAAABBBCCDAABBB') --> A B C D
- # unique_everseen('ABBCcAD', str.lower) --> A B C D
- seen = set()
- seen_add = seen.add
- if key is None:
- for element in itertools.filterfalse(seen.__contains__, iterable):
- seen_add(element)
- yield element
- else:
- for element in iterable:
- k = key(element)
- if k not in seen:
- seen_add(k)
- yield element
-
-
def _parents(path):
"""
Given a path with elements separated by
@@ -2161,6 +2142,18 @@ def _ancestry(path):
path, tail = posixpath.split(path)
+_dedupe = dict.fromkeys
+"""Deduplicate an iterable in original order"""
+
+
+def _difference(minuend, subtrahend):
+ """
+ Return items in minuend not in subtrahend, retaining order
+ with O(1) lookup.
+ """
+ return itertools.filterfalse(set(subtrahend).__contains__, minuend)
+
+
class CompleteDirs(ZipFile):
"""
A ZipFile subclass that ensures that implied directories
@@ -2170,13 +2163,8 @@ class CompleteDirs(ZipFile):
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
- # Deduplicate entries in original order
- implied_dirs = OrderedDict.fromkeys(
- p + posixpath.sep for p in parents
- # Cast names to a set for O(1) lookups
- if p + posixpath.sep not in set(names)
- )
- return implied_dirs
+ as_dirs = (p + posixpath.sep for p in parents)
+ return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()
@@ -2305,20 +2293,31 @@ class Path:
self.root = FastLookup.make(root)
self.at = at
- @property
- def open(self):
- return functools.partial(self.root.open, self.at)
+ def open(self, mode='r', *args, **kwargs):
+ """
+ Open this entry as text or binary following the semantics
+ of ``pathlib.Path.open()`` by passing arguments through
+ to io.TextIOWrapper().
+ """
+ pwd = kwargs.pop('pwd', None)
+ zip_mode = mode[0]
+ stream = self.root.open(self.at, zip_mode, pwd=pwd)
+ if 'b' in mode:
+ if args or kwargs:
+ raise ValueError("encoding args invalid for binary operation")
+ return stream
+ return io.TextIOWrapper(stream, *args, **kwargs)
@property
def name(self):
return posixpath.basename(self.at.rstrip("/"))
def read_text(self, *args, **kwargs):
- with self.open() as strm:
- return io.TextIOWrapper(strm, *args, **kwargs).read()
+ with self.open('r', *args, **kwargs) as strm:
+ return strm.read()
def read_bytes(self):
- with self.open() as strm:
+ with self.open('rb') as strm:
return strm.read()
def _is_child(self, path):
diff --git a/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst b/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst
new file mode 100644
index 0000000..acf503c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst
@@ -0,0 +1 @@
+Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0. \ No newline at end of file