diff options
author | Barry Warsaw <barry@python.org> | 2018-01-24 20:36:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-24 20:36:21 (GMT) |
commit | 6f6eb35f9bee18f54945f09664344f2d118ed89f (patch) | |
tree | e1e7432ccc9f7755d85da2803181f04504ac3894 /Lib/importlib/resources.py | |
parent | 789e359f51d2b27bea01b8c6c3bf090aaedf8839 (diff) | |
download | cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.zip cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.tar.gz cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.tar.bz2 |
bpo-32248 - Implement `ResourceReader` and `get_resource_reader()` for zipimport (#5248)
Diffstat (limited to 'Lib/importlib/resources.py')
-rw-r--r-- | Lib/importlib/resources.py | 150 |
1 files changed, 83 insertions, 67 deletions
diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py index 20888df..bf6d703 100644 --- a/Lib/importlib/resources.py +++ b/Lib/importlib/resources.py @@ -12,7 +12,7 @@ from types import ModuleType from typing import Iterator, Optional, Set, Union # noqa: F401 from typing import cast from typing.io import BinaryIO, TextIO -from zipfile import ZipFile +from zipimport import ZipImportError Package = Union[str, ModuleType] @@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool: # contents doesn't necessarily mean it's a resource. Directories are not # resources, so let's try to find out if it's a directory or not. path = Path(package.__spec__.origin).parent / name - if path.is_file(): - return True - if path.is_dir(): - return False - # If it's not a file and it's not a directory, what is it? Well, this - # means the file doesn't exist on the file system, so it probably lives - # inside a zip file. We have to crack open the zip, look at its table of - # contents, and make sure that this entry doesn't have sub-entries. - archive_path = package.__spec__.loader.archive # type: ignore - package_directory = Path(package.__spec__.origin).parent - with ZipFile(archive_path) as zf: - toc = zf.namelist() - relpath = package_directory.relative_to(archive_path) - candidate_path = relpath / name - for entry in toc: - try: - relative_to_candidate = Path(entry).relative_to(candidate_path) - except ValueError: - # The two paths aren't relative to each other so we can ignore it. - continue - # Since directories aren't explicitly listed in the zip file, we must - # infer their 'directory-ness' by looking at the number of path - # components in the path relative to the package resource we're - # looking up. If there are zero additional parts, it's a file, i.e. a - # resource. If there are more than zero it's a directory, i.e. not a - # resource. It has to be one of these two cases. - return len(relative_to_candidate.parts) == 0 - # I think it's impossible to get here. It would mean that we are looking - # for a resource in a zip file, there's an entry matching it in the return - # value of contents(), but we never actually found it in the zip's table of - # contents. - raise AssertionError('Impossible situation') + return path.is_file() def contents(package: Package) -> Iterator[str]: @@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]: not package.__spec__.has_location): return [] package_directory = Path(package.__spec__.origin).parent - try: - yield from os.listdir(str(package_directory)) - except (NotADirectoryError, FileNotFoundError): - # The package is probably in a zip file. - archive_path = getattr(package.__spec__.loader, 'archive', None) - if archive_path is None: - raise - relpath = package_directory.relative_to(archive_path) - with ZipFile(archive_path) as zf: - toc = zf.namelist() - subdirs_seen = set() # type: Set - for filename in toc: - path = Path(filename) - # Strip off any path component parts that are in common with the - # package directory, relative to the zip archive's file system - # path. This gives us all the parts that live under the named - # package inside the zip file. If the length of these subparts is - # exactly 1, then it is situated inside the package. The resulting - # length will be 0 if it's above the package, and it will be - # greater than 1 if it lives in a subdirectory of the package - # directory. - # - # However, since directories themselves don't appear in the zip - # archive as a separate entry, we need to return the first path - # component for any case that has > 1 subparts -- but only once! - if path.parts[:len(relpath.parts)] != relpath.parts: + yield from os.listdir(str(package_directory)) + + +# Private implementation of ResourceReader and get_resource_reader() for +# zipimport. Don't use these directly! We're implementing these in Python +# because 1) it's easier, 2) zipimport will likely get rewritten in Python +# itself at some point, so doing this all in C would just be a waste of +# effort. + +class _ZipImportResourceReader(resources_abc.ResourceReader): + """Private class used to support ZipImport.get_resource_reader(). + + This class is allowed to reference all the innards and private parts of + the zipimporter. + """ + + def __init__(self, zipimporter, fullname): + self.zipimporter = zipimporter + self.fullname = fullname + + def open_resource(self, resource): + path = f'{self.fullname}/{resource}' + try: + return BytesIO(self.zipimporter.get_data(path)) + except OSError: + raise FileNotFoundError + + def resource_path(self, resource): + # All resources are in the zip file, so there is no path to the file. + # Raising FileNotFoundError tells the higher level API to extract the + # binary data and create a temporary file. + raise FileNotFoundError + + def is_resource(self, name): + # Maybe we could do better, but if we can get the data, it's a + # resource. Otherwise it isn't. + path = f'{self.fullname}/{name}' + try: + self.zipimporter.get_data(path) + except OSError: + return False + return True + + def contents(self): + # This is a bit convoluted, because fullname will be a module path, + # but _files is a list of file names relative to the top of the + # archive's namespace. We want to compare file paths to find all the + # names of things inside the module represented by fullname. So we + # turn the module path of fullname into a file path relative to the + # top of the archive, and then we iterate through _files looking for + # names inside that "directory". + fullname_path = Path(self.zipimporter.get_filename(self.fullname)) + relative_path = fullname_path.relative_to(self.zipimporter.archive) + # Don't forget that fullname names a package, so its path will include + # __init__.py, which we want to ignore. + assert relative_path.name == '__init__.py' + package_path = relative_path.parent + subdirs_seen = set() + for filename in self.zipimporter._files: + try: + relative = Path(filename).relative_to(package_path) + except ValueError: continue - subparts = path.parts[len(relpath.parts):] - if len(subparts) == 1: - yield subparts[0] - elif len(subparts) > 1: - subdir = subparts[0] - if subdir not in subdirs_seen: - subdirs_seen.add(subdir) - yield subdir + # If the path of the file (which is relative to the top of the zip + # namespace), relative to the package given when the resource + # reader was created, has a parent, then it's a name in a + # subdirectory and thus we skip it. + parent_name = relative.parent.name + if len(parent_name) == 0: + yield relative.name + elif parent_name not in subdirs_seen: + subdirs_seen.add(parent_name) + yield parent_name + + +def _zipimport_get_resource_reader(zipimporter, fullname): + try: + if not zipimporter.is_package(fullname): + return None + except ZipImportError: + return None + return _ZipImportResourceReader(zipimporter, fullname) |