diff options
author | Barry Warsaw <barry@python.org> | 2017-12-30 20:18:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-30 20:18:06 (GMT) |
commit | deae6b451fefd5fd3143dd65051e1d341e5a5f84 (patch) | |
tree | 21d0732eb5dd8f7d5ca469b39a1f3b703f3f83f3 /Lib/importlib | |
parent | ffcb4c0165827d0a48ea973cc88bc134c74879fb (diff) | |
download | cpython-deae6b451fefd5fd3143dd65051e1d341e5a5f84.zip cpython-deae6b451fefd5fd3143dd65051e1d341e5a5f84.tar.gz cpython-deae6b451fefd5fd3143dd65051e1d341e5a5f84.tar.bz2 |
bpo-32248 - Implement importlib.resources (#4911)
Port importlib_resources to importlib.resources
Diffstat (limited to 'Lib/importlib')
-rw-r--r-- | Lib/importlib/resources.py | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py new file mode 100644 index 0000000..8511f24 --- /dev/null +++ b/Lib/importlib/resources.py @@ -0,0 +1,303 @@ +import os +import tempfile + +from . import abc as resources_abc +from builtins import open as builtins_open +from contextlib import contextmanager, suppress +from importlib import import_module +from importlib.abc import ResourceLoader +from io import BytesIO, TextIOWrapper +from pathlib import Path +from types import ModuleType +from typing import Iterator, Optional, Set, Union # noqa: F401 +from typing import cast +from typing.io import BinaryIO, TextIO +from zipfile import ZipFile + + +Package = Union[str, ModuleType] +Resource = Union[str, os.PathLike] + + +def _get_package(package) -> ModuleType: + """Take a package name or module object and return the module. + + If a name, the module is imported. If the passed or imported module + object is not a package, raise an exception. + """ + if hasattr(package, '__spec__'): + if package.__spec__.submodule_search_locations is None: + raise TypeError('{!r} is not a package'.format( + package.__spec__.name)) + else: + return package + else: + module = import_module(package) + if module.__spec__.submodule_search_locations is None: + raise TypeError('{!r} is not a package'.format(package)) + else: + return module + + +def _normalize_path(path) -> str: + """Normalize a path by ensuring it is a string. + + If the resulting string contains path separators, an exception is raised. + """ + str_path = str(path) + parent, file_name = os.path.split(str_path) + if parent: + raise ValueError('{!r} must be only a file name'.format(path)) + else: + return file_name + + +def _get_resource_reader( + package: ModuleType) -> Optional[resources_abc.ResourceReader]: + # Return the package's loader if it's a ResourceReader. We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + if hasattr(package.__spec__.loader, 'open_resource'): + return cast(resources_abc.ResourceReader, package.__spec__.loader) + return None + + +def open_binary(package: Package, resource: Resource) -> BinaryIO: + """Return a file-like object opened for binary reading of the resource.""" + resource = _normalize_path(resource) + package = _get_package(package) + reader = _get_resource_reader(package) + if reader is not None: + return reader.open_resource(resource) + absolute_package_path = os.path.abspath(package.__spec__.origin) + package_path = os.path.dirname(absolute_package_path) + full_path = os.path.join(package_path, resource) + try: + return builtins_open(full_path, mode='rb') + except OSError: + # Just assume the loader is a resource loader; all the relevant + # importlib.machinery loaders are and an AttributeError for + # get_data() will make it clear what is needed from the loader. + loader = cast(ResourceLoader, package.__spec__.loader) + data = None + if hasattr(package.__spec__.loader, 'get_data'): + with suppress(OSError): + data = loader.get_data(full_path) + if data is None: + package_name = package.__spec__.name + message = '{!r} resource not found in {!r}'.format( + resource, package_name) + raise FileNotFoundError(message) + else: + return BytesIO(data) + + +def open_text(package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict') -> TextIO: + """Return a file-like object opened for text reading of the resource.""" + resource = _normalize_path(resource) + package = _get_package(package) + reader = _get_resource_reader(package) + if reader is not None: + return TextIOWrapper(reader.open_resource(resource), encoding, errors) + absolute_package_path = os.path.abspath(package.__spec__.origin) + package_path = os.path.dirname(absolute_package_path) + full_path = os.path.join(package_path, resource) + try: + return builtins_open( + full_path, mode='r', encoding=encoding, errors=errors) + except OSError: + # Just assume the loader is a resource loader; all the relevant + # importlib.machinery loaders are and an AttributeError for + # get_data() will make it clear what is needed from the loader. + loader = cast(ResourceLoader, package.__spec__.loader) + data = None + if hasattr(package.__spec__.loader, 'get_data'): + with suppress(OSError): + data = loader.get_data(full_path) + if data is None: + package_name = package.__spec__.name + message = '{!r} resource not found in {!r}'.format( + resource, package_name) + raise FileNotFoundError(message) + else: + return TextIOWrapper(BytesIO(data), encoding, errors) + + +def read_binary(package: Package, resource: Resource) -> bytes: + """Return the binary contents of the resource.""" + resource = _normalize_path(resource) + package = _get_package(package) + with open_binary(package, resource) as fp: + return fp.read() + + +def read_text(package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict') -> str: + """Return the decoded string of the resource. + + The decoding-related arguments have the same semantics as those of + bytes.decode(). + """ + resource = _normalize_path(resource) + package = _get_package(package) + with open_text(package, resource, encoding, errors) as fp: + return fp.read() + + +@contextmanager +def path(package: Package, resource: Resource) -> Iterator[Path]: + """A context manager providing a file path object to the resource. + + If the resource does not already exist on its own on the file system, + a temporary file will be created. If the file was created, the file + will be deleted upon exiting the context manager (no exception is + raised if the file was deleted prior to the context manager + exiting). + """ + resource = _normalize_path(resource) + package = _get_package(package) + reader = _get_resource_reader(package) + if reader is not None: + try: + yield Path(reader.resource_path(resource)) + return + except FileNotFoundError: + pass + # Fall-through for both the lack of resource_path() *and* if + # resource_path() raises FileNotFoundError. + package_directory = Path(package.__spec__.origin).parent + file_path = package_directory / resource + if file_path.exists(): + yield file_path + else: + with open_binary(package, resource) as fp: + data = fp.read() + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on + # Windows properly. + fd, raw_path = tempfile.mkstemp() + try: + os.write(fd, data) + os.close(fd) + yield Path(raw_path) + finally: + try: + os.remove(raw_path) + except FileNotFoundError: + pass + + +def is_resource(package: Package, name: str) -> bool: + """True if 'name' is a resource inside 'package'. + + Directories are *not* resources. + """ + package = _get_package(package) + _normalize_path(name) + reader = _get_resource_reader(package) + if reader is not None: + return reader.is_resource(name) + try: + package_contents = set(contents(package)) + except (NotADirectoryError, FileNotFoundError): + return False + if name not in package_contents: + return False + # Just because the given file_name lives as an entry in the package's + # contents doesn't necessarily mean it's a resource. Directories are not + # resources, so let's try to find out if it's a directory or not. + path = Path(package.__spec__.origin).parent / name + if path.is_file(): + return True + if path.is_dir(): + return False + # If it's not a file and it's not a directory, what is it? Well, this + # means the file doesn't exist on the file system, so it probably lives + # inside a zip file. We have to crack open the zip, look at its table of + # contents, and make sure that this entry doesn't have sub-entries. + archive_path = package.__spec__.loader.archive # type: ignore + package_directory = Path(package.__spec__.origin).parent + with ZipFile(archive_path) as zf: + toc = zf.namelist() + relpath = package_directory.relative_to(archive_path) + candidate_path = relpath / name + for entry in toc: + try: + relative_to_candidate = Path(entry).relative_to(candidate_path) + except ValueError: + # The two paths aren't relative to each other so we can ignore it. + continue + # Since directories aren't explicitly listed in the zip file, we must + # infer their 'directory-ness' by looking at the number of path + # components in the path relative to the package resource we're + # looking up. If there are zero additional parts, it's a file, i.e. a + # resource. If there are more than zero it's a directory, i.e. not a + # resource. It has to be one of these two cases. + return len(relative_to_candidate.parts) == 0 + # I think it's impossible to get here. It would mean that we are looking + # for a resource in a zip file, there's an entry matching it in the return + # value of contents(), but we never actually found it in the zip's table of + # contents. + raise AssertionError('Impossible situation') + + +def contents(package: Package) -> Iterator[str]: + """Return the list of entries in 'package'. + + Note that not all entries are resources. Specifically, directories are + not considered resources. Use `is_resource()` on each entry returned here + to check if it is a resource or not. + """ + package = _get_package(package) + reader = _get_resource_reader(package) + if reader is not None: + yield from reader.contents() + return + # Is the package a namespace package? By definition, namespace packages + # cannot have resources. + if (package.__spec__.origin == 'namespace' and + not package.__spec__.has_location): + return [] + package_directory = Path(package.__spec__.origin).parent + try: + yield from os.listdir(str(package_directory)) + except (NotADirectoryError, FileNotFoundError): + # The package is probably in a zip file. + archive_path = getattr(package.__spec__.loader, 'archive', None) + if archive_path is None: + raise + relpath = package_directory.relative_to(archive_path) + with ZipFile(archive_path) as zf: + toc = zf.namelist() + subdirs_seen = set() # type: Set + for filename in toc: + path = Path(filename) + # Strip off any path component parts that are in common with the + # package directory, relative to the zip archive's file system + # path. This gives us all the parts that live under the named + # package inside the zip file. If the length of these subparts is + # exactly 1, then it is situated inside the package. The resulting + # length will be 0 if it's above the package, and it will be + # greater than 1 if it lives in a subdirectory of the package + # directory. + # + # However, since directories themselves don't appear in the zip + # archive as a separate entry, we need to return the first path + # component for any case that has > 1 subparts -- but only once! + if path.parts[:len(relpath.parts)] != relpath.parts: + continue + subparts = path.parts[len(relpath.parts):] + if len(subparts) == 1: + yield subparts[0] + elif len(subparts) > 1: + subdir = subparts[0] + if subdir not in subdirs_seen: + subdirs_seen.add(subdir) + yield subdir |