summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/whatsnew/3.7.rst8
-rw-r--r--Lib/importlib/resources.py150
-rw-r--r--Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst19
-rw-r--r--Modules/clinic/zipimport.c.h33
-rw-r--r--Modules/zipimport.c30
5 files changed, 157 insertions, 83 deletions
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index 0418aa6..20be158 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -328,8 +328,12 @@ importlib.resources
This module provides several new APIs and one new ABC for access to, opening,
and reading *resources* inside packages. Resources are roughly akin to files
inside of packages, but they needn't be actual files on the physical file
-system. Module loaders can provide :class:`importlib.abc.ResourceReader`
-implementations to support this new module's API.
+system. Module loaders can provide a :meth:`get_resource_reader()` function
+which returns a :class:`importlib.abc.ResourceReader` instance to support this
+new API. Built-in file path loaders and zip file loaders both support this.
+(see the PyPI package
+`importlib_resources <http://importlib-resources.readthedocs.io/en/latest/>`_
+as a compatible back port for older Python versions).
Improved Modules
diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py
index 20888df..bf6d703 100644
--- a/Lib/importlib/resources.py
+++ b/Lib/importlib/resources.py
@@ -12,7 +12,7 @@ from types import ModuleType
from typing import Iterator, Optional, Set, Union # noqa: F401
from typing import cast
from typing.io import BinaryIO, TextIO
-from zipfile import ZipFile
+from zipimport import ZipImportError
Package = Union[str, ModuleType]
@@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
# contents doesn't necessarily mean it's a resource. Directories are not
# resources, so let's try to find out if it's a directory or not.
path = Path(package.__spec__.origin).parent / name
- if path.is_file():
- return True
- if path.is_dir():
- return False
- # If it's not a file and it's not a directory, what is it? Well, this
- # means the file doesn't exist on the file system, so it probably lives
- # inside a zip file. We have to crack open the zip, look at its table of
- # contents, and make sure that this entry doesn't have sub-entries.
- archive_path = package.__spec__.loader.archive # type: ignore
- package_directory = Path(package.__spec__.origin).parent
- with ZipFile(archive_path) as zf:
- toc = zf.namelist()
- relpath = package_directory.relative_to(archive_path)
- candidate_path = relpath / name
- for entry in toc:
- try:
- relative_to_candidate = Path(entry).relative_to(candidate_path)
- except ValueError:
- # The two paths aren't relative to each other so we can ignore it.
- continue
- # Since directories aren't explicitly listed in the zip file, we must
- # infer their 'directory-ness' by looking at the number of path
- # components in the path relative to the package resource we're
- # looking up. If there are zero additional parts, it's a file, i.e. a
- # resource. If there are more than zero it's a directory, i.e. not a
- # resource. It has to be one of these two cases.
- return len(relative_to_candidate.parts) == 0
- # I think it's impossible to get here. It would mean that we are looking
- # for a resource in a zip file, there's an entry matching it in the return
- # value of contents(), but we never actually found it in the zip's table of
- # contents.
- raise AssertionError('Impossible situation')
+ return path.is_file()
def contents(package: Package) -> Iterator[str]:
@@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
not package.__spec__.has_location):
return []
package_directory = Path(package.__spec__.origin).parent
- try:
- yield from os.listdir(str(package_directory))
- except (NotADirectoryError, FileNotFoundError):
- # The package is probably in a zip file.
- archive_path = getattr(package.__spec__.loader, 'archive', None)
- if archive_path is None:
- raise
- relpath = package_directory.relative_to(archive_path)
- with ZipFile(archive_path) as zf:
- toc = zf.namelist()
- subdirs_seen = set() # type: Set
- for filename in toc:
- path = Path(filename)
- # Strip off any path component parts that are in common with the
- # package directory, relative to the zip archive's file system
- # path. This gives us all the parts that live under the named
- # package inside the zip file. If the length of these subparts is
- # exactly 1, then it is situated inside the package. The resulting
- # length will be 0 if it's above the package, and it will be
- # greater than 1 if it lives in a subdirectory of the package
- # directory.
- #
- # However, since directories themselves don't appear in the zip
- # archive as a separate entry, we need to return the first path
- # component for any case that has > 1 subparts -- but only once!
- if path.parts[:len(relpath.parts)] != relpath.parts:
+ yield from os.listdir(str(package_directory))
+
+
+# Private implementation of ResourceReader and get_resource_reader() for
+# zipimport. Don't use these directly! We're implementing these in Python
+# because 1) it's easier, 2) zipimport will likely get rewritten in Python
+# itself at some point, so doing this all in C would just be a waste of
+# effort.
+
+class _ZipImportResourceReader(resources_abc.ResourceReader):
+ """Private class used to support ZipImport.get_resource_reader().
+
+ This class is allowed to reference all the innards and private parts of
+ the zipimporter.
+ """
+
+ def __init__(self, zipimporter, fullname):
+ self.zipimporter = zipimporter
+ self.fullname = fullname
+
+ def open_resource(self, resource):
+ path = f'{self.fullname}/{resource}'
+ try:
+ return BytesIO(self.zipimporter.get_data(path))
+ except OSError:
+ raise FileNotFoundError
+
+ def resource_path(self, resource):
+ # All resources are in the zip file, so there is no path to the file.
+ # Raising FileNotFoundError tells the higher level API to extract the
+ # binary data and create a temporary file.
+ raise FileNotFoundError
+
+ def is_resource(self, name):
+ # Maybe we could do better, but if we can get the data, it's a
+ # resource. Otherwise it isn't.
+ path = f'{self.fullname}/{name}'
+ try:
+ self.zipimporter.get_data(path)
+ except OSError:
+ return False
+ return True
+
+ def contents(self):
+ # This is a bit convoluted, because fullname will be a module path,
+ # but _files is a list of file names relative to the top of the
+ # archive's namespace. We want to compare file paths to find all the
+ # names of things inside the module represented by fullname. So we
+ # turn the module path of fullname into a file path relative to the
+ # top of the archive, and then we iterate through _files looking for
+ # names inside that "directory".
+ fullname_path = Path(self.zipimporter.get_filename(self.fullname))
+ relative_path = fullname_path.relative_to(self.zipimporter.archive)
+ # Don't forget that fullname names a package, so its path will include
+ # __init__.py, which we want to ignore.
+ assert relative_path.name == '__init__.py'
+ package_path = relative_path.parent
+ subdirs_seen = set()
+ for filename in self.zipimporter._files:
+ try:
+ relative = Path(filename).relative_to(package_path)
+ except ValueError:
continue
- subparts = path.parts[len(relpath.parts):]
- if len(subparts) == 1:
- yield subparts[0]
- elif len(subparts) > 1:
- subdir = subparts[0]
- if subdir not in subdirs_seen:
- subdirs_seen.add(subdir)
- yield subdir
+ # If the path of the file (which is relative to the top of the zip
+ # namespace), relative to the package given when the resource
+ # reader was created, has a parent, then it's a name in a
+ # subdirectory and thus we skip it.
+ parent_name = relative.parent.name
+ if len(parent_name) == 0:
+ yield relative.name
+ elif parent_name not in subdirs_seen:
+ subdirs_seen.add(parent_name)
+ yield parent_name
+
+
+def _zipimport_get_resource_reader(zipimporter, fullname):
+ try:
+ if not zipimporter.is_package(fullname):
+ return None
+ except ZipImportError:
+ return None
+ return _ZipImportResourceReader(zipimporter, fullname)
diff --git a/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst b/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst
index a41fde9..d821cf9 100644
--- a/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst
+++ b/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst
@@ -1,13 +1,6 @@
-Add :class:`importlib.abc.ResourceReader` as an ABC to provide a
-unified API for reading resources contained within packages. Loaders
-wishing to support resource reading are expected to implement the
-``get_resource_reader(fullname)`` method.
-
-Also add :mod:`importlib.resources` as the stdlib port of the
-``importlib_resources`` PyPI package. The modules provides a high-level
-API for end-users to read resources in a nicer fashion than having to
-directly interact with low-level details such as loaders.
-
-Thanks to this work, :class:`importlib.abc.ResourceLoader` has now
-been documented as deprecated due to its under-specified nature and
-lack of features as provided by :class:`importlib.abc.ResourceReader`.
+Add :mod:`importlib.resources` and :class:`importlib.abc.ResourceReader` as
+the unified API for reading resources contained within packages. Loaders
+wishing to support resource reading must implement the
+:meth:`get_resource_reader()` method. File-based and zipimport-based loaders
+both implement these APIs. :class:`importlib.abc.ResourceLoader` is
+deprecated in favor of these new APIs.
diff --git a/Modules/clinic/zipimport.c.h b/Modules/clinic/zipimport.c.h
index 548aade..565b065 100644
--- a/Modules/clinic/zipimport.c.h
+++ b/Modules/clinic/zipimport.c.h
@@ -291,4 +291,35 @@ zipimport_zipimporter_get_source(ZipImporter *self, PyObject *arg)
exit:
return return_value;
}
-/*[clinic end generated code: output=93cb62a3a9752b9f input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(zipimport_zipimporter_get_resource_reader__doc__,
+"get_resource_reader($self, fullname, /)\n"
+"--\n"
+"\n"
+"Return the ResourceReader for a package in a zip file.\n"
+"\n"
+"If \'fullname\' is a package within the zip file, return the \'ResourceReader\'\n"
+"object for the package. Otherwise return None.");
+
+#define ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF \
+ {"get_resource_reader", (PyCFunction)zipimport_zipimporter_get_resource_reader, METH_O, zipimport_zipimporter_get_resource_reader__doc__},
+
+static PyObject *
+zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
+ PyObject *fullname);
+
+static PyObject *
+zipimport_zipimporter_get_resource_reader(ZipImporter *self, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ PyObject *fullname;
+
+ if (!PyArg_Parse(arg, "U:get_resource_reader", &fullname)) {
+ goto exit;
+ }
+ return_value = zipimport_zipimporter_get_resource_reader_impl(self, fullname);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=0b57adfe21373512 input=a9049054013a1b77]*/
diff --git a/Modules/zipimport.c b/Modules/zipimport.c
index 1d0e0ba..8501366 100644
--- a/Modules/zipimport.c
+++ b/Modules/zipimport.c
@@ -784,6 +784,35 @@ zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname)
Py_RETURN_NONE;
}
+/*[clinic input]
+zipimport.zipimporter.get_resource_reader
+
+ fullname: unicode
+ /
+
+Return the ResourceReader for a package in a zip file.
+
+If 'fullname' is a package within the zip file, return the 'ResourceReader'
+object for the package. Otherwise return None.
+
+[clinic start generated code]*/
+
+static PyObject *
+zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
+ PyObject *fullname)
+/*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/
+{
+ PyObject *module = PyImport_ImportModule("importlib.resources");
+ if (module == NULL) {
+ return NULL;
+ }
+ PyObject *retval = PyObject_CallMethod(
+ module, "_zipimport_get_resource_reader",
+ "OO", (PyObject *)self, fullname);
+ Py_DECREF(module);
+ return retval;
+}
+
static PyMethodDef zipimporter_methods[] = {
ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
@@ -794,6 +823,7 @@ static PyMethodDef zipimporter_methods[] = {
ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
+ ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF
{NULL, NULL} /* sentinel */
};