diff options
author | Jason R. Coombs <jaraco@jaraco.com> | 2024-03-14 21:53:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-14 21:53:50 (GMT) |
commit | be59aaf3abec37b27bdb31fadf433665e5471a46 (patch) | |
tree | 513a36446c05b228f4b9367d102011e4e44a5807 /Lib/zipfile | |
parent | ab9e322ae1da1068b615d0375859920c710458e4 (diff) | |
download | cpython-be59aaf3abec37b27bdb31fadf433665e5471a46.zip cpython-be59aaf3abec37b27bdb31fadf433665e5471a46.tar.gz cpython-be59aaf3abec37b27bdb31fadf433665e5471a46.tar.bz2 |
gh-106531: Refresh zipfile._path with zipp 3.18. (#116835)
* gh-106531: Refresh zipfile._path with zipp 3.18.
* Add blurb
Diffstat (limited to 'Lib/zipfile')
-rw-r--r-- | Lib/zipfile/_path/__init__.py | 65 | ||||
-rw-r--r-- | Lib/zipfile/_path/glob.py | 112 |
2 files changed, 140 insertions, 37 deletions
diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index 78c4135..4c16756 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -5,8 +5,9 @@ import itertools import contextlib import pathlib import re +import sys -from .glob import translate +from .glob import Translator __all__ = ['Path'] @@ -147,6 +148,16 @@ class CompleteDirs(InitializedState, zipfile.ZipFile): source.__class__ = cls return source + @classmethod + def inject(cls, zf: zipfile.ZipFile) -> zipfile.ZipFile: + """ + Given a writable zip file zf, inject directory entries for + any directories implied by the presence of children. + """ + for name in cls._implied_dirs(zf.namelist()): + zf.writestr(name, b"") + return zf + class FastLookup(CompleteDirs): """ @@ -168,8 +179,10 @@ class FastLookup(CompleteDirs): def _extract_text_encoding(encoding=None, *args, **kwargs): - # stacklevel=3 so that the caller of the caller see any warning. - return io.text_encoding(encoding, 3), args, kwargs + # compute stack level so that the caller of the caller sees any warning. + is_pypy = sys.implementation.name == 'pypy' + stack_level = 3 + is_pypy + return io.text_encoding(encoding, stack_level), args, kwargs class Path: @@ -194,13 +207,13 @@ class Path: Path accepts the zipfile object itself or a filename - >>> root = Path(zf) + >>> path = Path(zf) From there, several path operations are available. Directory iteration (including the zip file itself): - >>> a, b = root.iterdir() + >>> a, b = path.iterdir() >>> a Path('mem/abcde.zip', 'a.txt') >>> b @@ -238,16 +251,38 @@ class Path: 'mem/abcde.zip/b/c.txt' At the root, ``name``, ``filename``, and ``parent`` - resolve to the zipfile. Note these attributes are not - valid and will raise a ``ValueError`` if the zipfile - has no filename. + resolve to the zipfile. - >>> root.name + >>> str(path) + 'mem/abcde.zip/' + >>> path.name 'abcde.zip' - >>> str(root.filename).replace(os.sep, posixpath.sep) - 'mem/abcde.zip' - >>> str(root.parent) + >>> path.filename == pathlib.Path('mem/abcde.zip') + True + >>> str(path.parent) 'mem' + + If the zipfile has no filename, such attribtues are not + valid and accessing them will raise an Exception. + + >>> zf.filename = None + >>> path.name + Traceback (most recent call last): + ... + TypeError: ... + + >>> path.filename + Traceback (most recent call last): + ... + TypeError: ... + + >>> path.parent + Traceback (most recent call last): + ... + TypeError: ... + + # workaround python/cpython#106763 + >>> pass """ __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" @@ -364,8 +399,10 @@ class Path: raise ValueError(f"Unacceptable pattern: {pattern!r}") prefix = re.escape(self.at) - matches = re.compile(prefix + translate(pattern)).fullmatch - return map(self._next, filter(matches, self.root.namelist())) + tr = Translator(seps='/') + matches = re.compile(prefix + tr.translate(pattern)).fullmatch + names = (data.filename for data in self.root.filelist) + return map(self._next, filter(matches, names)) def rglob(self, pattern): return self.glob(f'**/{pattern}') diff --git a/Lib/zipfile/_path/glob.py b/Lib/zipfile/_path/glob.py index 4a2e665..69c41d7 100644 --- a/Lib/zipfile/_path/glob.py +++ b/Lib/zipfile/_path/glob.py @@ -1,18 +1,97 @@ +import os import re -def translate(pattern): - r""" - Given a glob pattern, produce a regex that matches it. +_default_seps = os.sep + str(os.altsep) * bool(os.altsep) - >>> translate('*.txt') - '[^/]*\\.txt' - >>> translate('a?txt') - 'a.txt' - >>> translate('**/*') - '.*/[^/]*' + +class Translator: + """ + >>> Translator('xyz') + Traceback (most recent call last): + ... + AssertionError: Invalid separators + + >>> Translator('') + Traceback (most recent call last): + ... + AssertionError: Invalid separators """ - return ''.join(map(replace, separate(pattern))) + + seps: str + + def __init__(self, seps: str = _default_seps): + assert seps and set(seps) <= set(_default_seps), "Invalid separators" + self.seps = seps + + def translate(self, pattern): + """ + Given a glob pattern, produce a regex that matches it. + """ + return self.extend(self.translate_core(pattern)) + + def extend(self, pattern): + r""" + Extend regex for pattern-wide concerns. + + Apply '(?s:)' to create a non-matching group that + matches newlines (valid on Unix). + + Append '\Z' to imply fullmatch even when match is used. + """ + return rf'(?s:{pattern})\Z' + + def translate_core(self, pattern): + r""" + Given a glob pattern, produce a regex that matches it. + + >>> t = Translator() + >>> t.translate_core('*.txt').replace('\\\\', '') + '[^/]*\\.txt' + >>> t.translate_core('a?txt') + 'a[^/]txt' + >>> t.translate_core('**/*').replace('\\\\', '') + '.*/[^/][^/]*' + """ + self.restrict_rglob(pattern) + return ''.join(map(self.replace, separate(self.star_not_empty(pattern)))) + + def replace(self, match): + """ + Perform the replacements for a match from :func:`separate`. + """ + return match.group('set') or ( + re.escape(match.group(0)) + .replace('\\*\\*', r'.*') + .replace('\\*', rf'[^{re.escape(self.seps)}]*') + .replace('\\?', r'[^/]') + ) + + def restrict_rglob(self, pattern): + """ + Raise ValueError if ** appears in anything but a full path segment. + + >>> Translator().translate('**foo') + Traceback (most recent call last): + ... + ValueError: ** must appear alone in a path segment + """ + seps_pattern = rf'[{re.escape(self.seps)}]+' + segments = re.split(seps_pattern, pattern) + if any('**' in segment and segment != '**' for segment in segments): + raise ValueError("** must appear alone in a path segment") + + def star_not_empty(self, pattern): + """ + Ensure that * will not match an empty segment. + """ + + def handle_segment(match): + segment = match.group(0) + return '?*' if segment == '*' else segment + + not_seps_pattern = rf'[^{re.escape(self.seps)}]+' + return re.sub(not_seps_pattern, handle_segment, pattern) def separate(pattern): @@ -25,16 +104,3 @@ def separate(pattern): ['a', '[?]', 'txt'] """ return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern) - - -def replace(match): - """ - Perform the replacements for a match from :func:`separate`. - """ - - return match.group('set') or ( - re.escape(match.group(0)) - .replace('\\*\\*', r'.*') - .replace('\\*', r'[^/]*') - .replace('\\?', r'.') - ) |