bpo-32248 - Implement `ResourceReader` and `get_resource_reader()` for zipimport (#5248)

author: Barry Warsaw <barry@python.org> 2018-01-24 20:36:21 (GMT)
committer: GitHub <noreply@github.com> 2018-01-24 20:36:21 (GMT)
commit: 6f6eb35f9bee18f54945f09664344f2d118ed89f (patch)
tree: e1e7432ccc9f7755d85da2803181f04504ac3894 /Lib
parent: 789e359f51d2b27bea01b8c6c3bf090aaedf8839 (diff)
download: cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.zip
cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.tar.gz
cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.tar.bz2
1 files changed, 83 insertions, 67 deletions
diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py
index 20888df..bf6d703 100644
--- a/Lib/importlib/resources.py
+++ b/Lib/importlib/resources.py
@@ -12,7 +12,7 @@ from types import ModuleType
 from typing import Iterator, Optional, Set, Union   # noqa: F401
 from typing import cast
 from typing.io import BinaryIO, TextIO
-from zipfile import ZipFile
+from zipimport import ZipImportError
 
 
 Package = Union[str, ModuleType]
@@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
     # contents doesn't necessarily mean it's a resource.  Directories are not
     # resources, so let's try to find out if it's a directory or not.
     path = Path(package.__spec__.origin).parent / name
-    if path.is_file():
-        return True
-    if path.is_dir():
-        return False
-    # If it's not a file and it's not a directory, what is it?  Well, this
-    # means the file doesn't exist on the file system, so it probably lives
-    # inside a zip file.  We have to crack open the zip, look at its table of
-    # contents, and make sure that this entry doesn't have sub-entries.
-    archive_path = package.__spec__.loader.archive   # type: ignore
-    package_directory = Path(package.__spec__.origin).parent
-    with ZipFile(archive_path) as zf:
-        toc = zf.namelist()
-    relpath = package_directory.relative_to(archive_path)
-    candidate_path = relpath / name
-    for entry in toc:
-        try:
-            relative_to_candidate = Path(entry).relative_to(candidate_path)
-        except ValueError:
-            # The two paths aren't relative to each other so we can ignore it.
-            continue
-        # Since directories aren't explicitly listed in the zip file, we must
-        # infer their 'directory-ness' by looking at the number of path
-        # components in the path relative to the package resource we're
-        # looking up.  If there are zero additional parts, it's a file, i.e. a
-        # resource.  If there are more than zero it's a directory, i.e. not a
-        # resource.  It has to be one of these two cases.
-        return len(relative_to_candidate.parts) == 0
-    # I think it's impossible to get here.  It would mean that we are looking
-    # for a resource in a zip file, there's an entry matching it in the return
-    # value of contents(), but we never actually found it in the zip's table of
-    # contents.
-    raise AssertionError('Impossible situation')
+    return path.is_file()
 
 
 def contents(package: Package) -> Iterator[str]:
@@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
             not package.__spec__.has_location):
         return []
     package_directory = Path(package.__spec__.origin).parent
-    try:
-        yield from os.listdir(str(package_directory))
-    except (NotADirectoryError, FileNotFoundError):
-        # The package is probably in a zip file.
-        archive_path = getattr(package.__spec__.loader, 'archive', None)
-        if archive_path is None:
-            raise
-        relpath = package_directory.relative_to(archive_path)
-        with ZipFile(archive_path) as zf:
-            toc = zf.namelist()
-        subdirs_seen = set()                        # type: Set
-        for filename in toc:
-            path = Path(filename)
-            # Strip off any path component parts that are in common with the
-            # package directory, relative to the zip archive's file system
-            # path.  This gives us all the parts that live under the named
-            # package inside the zip file.  If the length of these subparts is
-            # exactly 1, then it is situated inside the package.  The resulting
-            # length will be 0 if it's above the package, and it will be
-            # greater than 1 if it lives in a subdirectory of the package
-            # directory.
-            #
-            # However, since directories themselves don't appear in the zip
-            # archive as a separate entry, we need to return the first path
-            # component for any case that has > 1 subparts -- but only once!
-            if path.parts[:len(relpath.parts)] != relpath.parts:
+    yield from os.listdir(str(package_directory))
+
+
+# Private implementation of ResourceReader and get_resource_reader() for
+# zipimport.  Don't use these directly!  We're implementing these in Python
+# because 1) it's easier, 2) zipimport will likely get rewritten in Python
+# itself at some point, so doing this all in C would just be a waste of
+# effort.
+
+class _ZipImportResourceReader(resources_abc.ResourceReader):
+    """Private class used to support ZipImport.get_resource_reader().
+
+    This class is allowed to reference all the innards and private parts of
+    the zipimporter.
+    """
+
+    def __init__(self, zipimporter, fullname):
+        self.zipimporter = zipimporter
+        self.fullname = fullname
+
+    def open_resource(self, resource):
+        path = f'{self.fullname}/{resource}'
+        try:
+            return BytesIO(self.zipimporter.get_data(path))
+        except OSError:
+            raise FileNotFoundError
+
+    def resource_path(self, resource):
+        # All resources are in the zip file, so there is no path to the file.
+        # Raising FileNotFoundError tells the higher level API to extract the
+        # binary data and create a temporary file.
+        raise FileNotFoundError
+
+    def is_resource(self, name):
+        # Maybe we could do better, but if we can get the data, it's a
+        # resource.  Otherwise it isn't.
+        path = f'{self.fullname}/{name}'
+        try:
+            self.zipimporter.get_data(path)
+        except OSError:
+            return False
+        return True
+
+    def contents(self):
+        # This is a bit convoluted, because fullname will be a module path,
+        # but _files is a list of file names relative to the top of the
+        # archive's namespace.  We want to compare file paths to find all the
+        # names of things inside the module represented by fullname.  So we
+        # turn the module path of fullname into a file path relative to the
+        # top of the archive, and then we iterate through _files looking for
+        # names inside that "directory".
+        fullname_path = Path(self.zipimporter.get_filename(self.fullname))
+        relative_path = fullname_path.relative_to(self.zipimporter.archive)
+        # Don't forget that fullname names a package, so its path will include
+        # __init__.py, which we want to ignore.
+        assert relative_path.name == '__init__.py'
+        package_path = relative_path.parent
+        subdirs_seen = set()
+        for filename in self.zipimporter._files:
+            try:
+                relative = Path(filename).relative_to(package_path)
+            except ValueError:
                 continue
-            subparts = path.parts[len(relpath.parts):]
-            if len(subparts) == 1:
-                yield subparts[0]
-            elif len(subparts) > 1:
-                subdir = subparts[0]
-                if subdir not in subdirs_seen:
-                    subdirs_seen.add(subdir)
-                    yield subdir
+            # If the path of the file (which is relative to the top of the zip
+            # namespace), relative to the package given when the resource
+            # reader was created, has a parent, then it's a name in a
+            # subdirectory and thus we skip it.
+            parent_name = relative.parent.name
+            if len(parent_name) == 0:
+                yield relative.name
+            elif parent_name not in subdirs_seen:
+                subdirs_seen.add(parent_name)
+                yield parent_name
+
+
+def _zipimport_get_resource_reader(zipimporter, fullname):
+    try:
+        if not zipimporter.is_package(fullname):
+            return None
+    except ZipImportError:
+        return None
+    return _ZipImportResourceReader(zipimporter, fullname)
author	Barry Warsaw <barry@python.org>	2018-01-24 20:36:21 (GMT)
committer	GitHub <noreply@github.com>	2018-01-24 20:36:21 (GMT)
commit	6f6eb35f9bee18f54945f09664344f2d118ed89f (patch)
tree	e1e7432ccc9f7755d85da2803181f04504ac3894 /Lib
parent	789e359f51d2b27bea01b8c6c3bf090aaedf8839 (diff)
download	cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.zip cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.tar.gz cpython-6f6eb35f9bee18f54945f09664344f2d118ed89f.tar.bz2