diff options
-rw-r--r-- | Doc/whatsnew/3.7.rst | 8 | ||||
-rw-r--r-- | Lib/importlib/resources.py | 150 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst | 19 | ||||
-rw-r--r-- | Modules/clinic/zipimport.c.h | 33 | ||||
-rw-r--r-- | Modules/zipimport.c | 30 |
5 files changed, 157 insertions, 83 deletions
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 0418aa6..20be158 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -328,8 +328,12 @@ importlib.resources This module provides several new APIs and one new ABC for access to, opening, and reading *resources* inside packages. Resources are roughly akin to files inside of packages, but they needn't be actual files on the physical file -system. Module loaders can provide :class:`importlib.abc.ResourceReader` -implementations to support this new module's API. +system. Module loaders can provide a :meth:`get_resource_reader()` function +which returns a :class:`importlib.abc.ResourceReader` instance to support this +new API. Built-in file path loaders and zip file loaders both support this. +(see the PyPI package +`importlib_resources <http://importlib-resources.readthedocs.io/en/latest/>`_ +as a compatible back port for older Python versions). Improved Modules diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py index 20888df..bf6d703 100644 --- a/Lib/importlib/resources.py +++ b/Lib/importlib/resources.py @@ -12,7 +12,7 @@ from types import ModuleType from typing import Iterator, Optional, Set, Union # noqa: F401 from typing import cast from typing.io import BinaryIO, TextIO -from zipfile import ZipFile +from zipimport import ZipImportError Package = Union[str, ModuleType] @@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool: # contents doesn't necessarily mean it's a resource. Directories are not # resources, so let's try to find out if it's a directory or not. path = Path(package.__spec__.origin).parent / name - if path.is_file(): - return True - if path.is_dir(): - return False - # If it's not a file and it's not a directory, what is it? Well, this - # means the file doesn't exist on the file system, so it probably lives - # inside a zip file. We have to crack open the zip, look at its table of - # contents, and make sure that this entry doesn't have sub-entries. - archive_path = package.__spec__.loader.archive # type: ignore - package_directory = Path(package.__spec__.origin).parent - with ZipFile(archive_path) as zf: - toc = zf.namelist() - relpath = package_directory.relative_to(archive_path) - candidate_path = relpath / name - for entry in toc: - try: - relative_to_candidate = Path(entry).relative_to(candidate_path) - except ValueError: - # The two paths aren't relative to each other so we can ignore it. - continue - # Since directories aren't explicitly listed in the zip file, we must - # infer their 'directory-ness' by looking at the number of path - # components in the path relative to the package resource we're - # looking up. If there are zero additional parts, it's a file, i.e. a - # resource. If there are more than zero it's a directory, i.e. not a - # resource. It has to be one of these two cases. - return len(relative_to_candidate.parts) == 0 - # I think it's impossible to get here. It would mean that we are looking - # for a resource in a zip file, there's an entry matching it in the return - # value of contents(), but we never actually found it in the zip's table of - # contents. - raise AssertionError('Impossible situation') + return path.is_file() def contents(package: Package) -> Iterator[str]: @@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]: not package.__spec__.has_location): return [] package_directory = Path(package.__spec__.origin).parent - try: - yield from os.listdir(str(package_directory)) - except (NotADirectoryError, FileNotFoundError): - # The package is probably in a zip file. - archive_path = getattr(package.__spec__.loader, 'archive', None) - if archive_path is None: - raise - relpath = package_directory.relative_to(archive_path) - with ZipFile(archive_path) as zf: - toc = zf.namelist() - subdirs_seen = set() # type: Set - for filename in toc: - path = Path(filename) - # Strip off any path component parts that are in common with the - # package directory, relative to the zip archive's file system - # path. This gives us all the parts that live under the named - # package inside the zip file. If the length of these subparts is - # exactly 1, then it is situated inside the package. The resulting - # length will be 0 if it's above the package, and it will be - # greater than 1 if it lives in a subdirectory of the package - # directory. - # - # However, since directories themselves don't appear in the zip - # archive as a separate entry, we need to return the first path - # component for any case that has > 1 subparts -- but only once! - if path.parts[:len(relpath.parts)] != relpath.parts: + yield from os.listdir(str(package_directory)) + + +# Private implementation of ResourceReader and get_resource_reader() for +# zipimport. Don't use these directly! We're implementing these in Python +# because 1) it's easier, 2) zipimport will likely get rewritten in Python +# itself at some point, so doing this all in C would just be a waste of +# effort. + +class _ZipImportResourceReader(resources_abc.ResourceReader): + """Private class used to support ZipImport.get_resource_reader(). + + This class is allowed to reference all the innards and private parts of + the zipimporter. + """ + + def __init__(self, zipimporter, fullname): + self.zipimporter = zipimporter + self.fullname = fullname + + def open_resource(self, resource): + path = f'{self.fullname}/{resource}' + try: + return BytesIO(self.zipimporter.get_data(path)) + except OSError: + raise FileNotFoundError + + def resource_path(self, resource): + # All resources are in the zip file, so there is no path to the file. + # Raising FileNotFoundError tells the higher level API to extract the + # binary data and create a temporary file. + raise FileNotFoundError + + def is_resource(self, name): + # Maybe we could do better, but if we can get the data, it's a + # resource. Otherwise it isn't. + path = f'{self.fullname}/{name}' + try: + self.zipimporter.get_data(path) + except OSError: + return False + return True + + def contents(self): + # This is a bit convoluted, because fullname will be a module path, + # but _files is a list of file names relative to the top of the + # archive's namespace. We want to compare file paths to find all the + # names of things inside the module represented by fullname. So we + # turn the module path of fullname into a file path relative to the + # top of the archive, and then we iterate through _files looking for + # names inside that "directory". + fullname_path = Path(self.zipimporter.get_filename(self.fullname)) + relative_path = fullname_path.relative_to(self.zipimporter.archive) + # Don't forget that fullname names a package, so its path will include + # __init__.py, which we want to ignore. + assert relative_path.name == '__init__.py' + package_path = relative_path.parent + subdirs_seen = set() + for filename in self.zipimporter._files: + try: + relative = Path(filename).relative_to(package_path) + except ValueError: continue - subparts = path.parts[len(relpath.parts):] - if len(subparts) == 1: - yield subparts[0] - elif len(subparts) > 1: - subdir = subparts[0] - if subdir not in subdirs_seen: - subdirs_seen.add(subdir) - yield subdir + # If the path of the file (which is relative to the top of the zip + # namespace), relative to the package given when the resource + # reader was created, has a parent, then it's a name in a + # subdirectory and thus we skip it. + parent_name = relative.parent.name + if len(parent_name) == 0: + yield relative.name + elif parent_name not in subdirs_seen: + subdirs_seen.add(parent_name) + yield parent_name + + +def _zipimport_get_resource_reader(zipimporter, fullname): + try: + if not zipimporter.is_package(fullname): + return None + except ZipImportError: + return None + return _ZipImportResourceReader(zipimporter, fullname) diff --git a/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst b/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst index a41fde9..d821cf9 100644 --- a/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst +++ b/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst @@ -1,13 +1,6 @@ -Add :class:`importlib.abc.ResourceReader` as an ABC to provide a -unified API for reading resources contained within packages. Loaders -wishing to support resource reading are expected to implement the -``get_resource_reader(fullname)`` method. - -Also add :mod:`importlib.resources` as the stdlib port of the -``importlib_resources`` PyPI package. The modules provides a high-level -API for end-users to read resources in a nicer fashion than having to -directly interact with low-level details such as loaders. - -Thanks to this work, :class:`importlib.abc.ResourceLoader` has now -been documented as deprecated due to its under-specified nature and -lack of features as provided by :class:`importlib.abc.ResourceReader`. +Add :mod:`importlib.resources` and :class:`importlib.abc.ResourceReader` as +the unified API for reading resources contained within packages. Loaders +wishing to support resource reading must implement the +:meth:`get_resource_reader()` method. File-based and zipimport-based loaders +both implement these APIs. :class:`importlib.abc.ResourceLoader` is +deprecated in favor of these new APIs. diff --git a/Modules/clinic/zipimport.c.h b/Modules/clinic/zipimport.c.h index 548aade..565b065 100644 --- a/Modules/clinic/zipimport.c.h +++ b/Modules/clinic/zipimport.c.h @@ -291,4 +291,35 @@ zipimport_zipimporter_get_source(ZipImporter *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=93cb62a3a9752b9f input=a9049054013a1b77]*/ + +PyDoc_STRVAR(zipimport_zipimporter_get_resource_reader__doc__, +"get_resource_reader($self, fullname, /)\n" +"--\n" +"\n" +"Return the ResourceReader for a package in a zip file.\n" +"\n" +"If \'fullname\' is a package within the zip file, return the \'ResourceReader\'\n" +"object for the package. Otherwise return None."); + +#define ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF \ + {"get_resource_reader", (PyCFunction)zipimport_zipimporter_get_resource_reader, METH_O, zipimport_zipimporter_get_resource_reader__doc__}, + +static PyObject * +zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self, + PyObject *fullname); + +static PyObject * +zipimport_zipimporter_get_resource_reader(ZipImporter *self, PyObject *arg) +{ + PyObject *return_value = NULL; + PyObject *fullname; + + if (!PyArg_Parse(arg, "U:get_resource_reader", &fullname)) { + goto exit; + } + return_value = zipimport_zipimporter_get_resource_reader_impl(self, fullname); + +exit: + return return_value; +} +/*[clinic end generated code: output=0b57adfe21373512 input=a9049054013a1b77]*/ diff --git a/Modules/zipimport.c b/Modules/zipimport.c index 1d0e0ba..8501366 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -784,6 +784,35 @@ zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname) Py_RETURN_NONE; } +/*[clinic input] +zipimport.zipimporter.get_resource_reader + + fullname: unicode + / + +Return the ResourceReader for a package in a zip file. + +If 'fullname' is a package within the zip file, return the 'ResourceReader' +object for the package. Otherwise return None. + +[clinic start generated code]*/ + +static PyObject * +zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self, + PyObject *fullname) +/*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/ +{ + PyObject *module = PyImport_ImportModule("importlib.resources"); + if (module == NULL) { + return NULL; + } + PyObject *retval = PyObject_CallMethod( + module, "_zipimport_get_resource_reader", + "OO", (PyObject *)self, fullname); + Py_DECREF(module); + return retval; +} + static PyMethodDef zipimporter_methods[] = { ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF @@ -794,6 +823,7 @@ static PyMethodDef zipimporter_methods[] = { ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF + ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF {NULL, NULL} /* sentinel */ }; |