From 37e0c7850de902179b28f1378fbbc38a5ed3628c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 2 May 2021 17:03:40 -0400 Subject: bpo-43926: Cleaner metadata with PEP 566 JSON support. (GH-25565) * bpo-43926: Cleaner metadata with PEP 566 JSON support. * Add blurb * Add versionchanged and versionadded declarations for changes to metadata. * Use descriptor for PEP 566 --- Doc/library/importlib.metadata.rst | 13 + Lib/importlib/_collections.py | 30 - Lib/importlib/_functools.py | 85 -- Lib/importlib/_itertools.py | 19 - Lib/importlib/metadata.py | 908 --------------------- Lib/importlib/metadata/__init__.py | 890 ++++++++++++++++++++ Lib/importlib/metadata/_adapters.py | 67 ++ Lib/importlib/metadata/_collections.py | 30 + Lib/importlib/metadata/_functools.py | 85 ++ Lib/importlib/metadata/_itertools.py | 19 + Lib/importlib/metadata/_meta.py | 29 + Lib/importlib/metadata/_text.py | 99 +++ Lib/test/test_importlib/fixtures.py | 11 + Lib/test/test_importlib/test_main.py | 6 +- Lib/test/test_importlib/test_metadata_api.py | 23 + .../2021-04-23-17-48-55.bpo-43926.HMUlGU.rst | 4 + 16 files changed, 1273 insertions(+), 1045 deletions(-) delete mode 100644 Lib/importlib/_collections.py delete mode 100644 Lib/importlib/_functools.py delete mode 100644 Lib/importlib/_itertools.py delete mode 100644 Lib/importlib/metadata.py create mode 100644 Lib/importlib/metadata/__init__.py create mode 100644 Lib/importlib/metadata/_adapters.py create mode 100644 Lib/importlib/metadata/_collections.py create mode 100644 Lib/importlib/metadata/_functools.py create mode 100644 Lib/importlib/metadata/_itertools.py create mode 100644 Lib/importlib/metadata/_meta.py create mode 100644 Lib/importlib/metadata/_text.py create mode 100644 Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index 40e48d1..9bedee5 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -170,6 +170,19 @@ the values are returned unparsed from the distribution metadata:: >>> wheel_metadata['Requires-Python'] # doctest: +SKIP '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*' +``PackageMetadata`` also presents a ``json`` attribute that returns +all the metadata in a JSON-compatible form per :PEP:`566`:: + + >>> wheel_metadata.json['requires_python'] + '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*' + +.. versionchanged:: 3.10 + The ``Description`` is now included in the metadata when presented + through the payload. Line continuation characters have been removed. + +.. versionadded:: 3.10 + The ``json`` attribute was added. + .. _version: diff --git a/Lib/importlib/_collections.py b/Lib/importlib/_collections.py deleted file mode 100644 index cf0954e..0000000 --- a/Lib/importlib/_collections.py +++ /dev/null @@ -1,30 +0,0 @@ -import collections - - -# from jaraco.collections 3.3 -class FreezableDefaultDict(collections.defaultdict): - """ - Often it is desirable to prevent the mutation of - a default dict after its initial construction, such - as to prevent mutation during iteration. - - >>> dd = FreezableDefaultDict(list) - >>> dd[0].append('1') - >>> dd.freeze() - >>> dd[1] - [] - >>> len(dd) - 1 - """ - - def __missing__(self, key): - return getattr(self, '_frozen', super().__missing__)(key) - - def freeze(self): - self._frozen = lambda key: self.default_factory() - - -class Pair(collections.namedtuple('Pair', 'name value')): - @classmethod - def parse(cls, text): - return cls(*map(str.strip, text.split("=", 1))) diff --git a/Lib/importlib/_functools.py b/Lib/importlib/_functools.py deleted file mode 100644 index 73f50d0..0000000 --- a/Lib/importlib/_functools.py +++ /dev/null @@ -1,85 +0,0 @@ -import types -import functools - - -# from jaraco.functools 3.3 -def method_cache(method, cache_wrapper=None): - """ - Wrap lru_cache to support storing the cache data in the object instances. - - Abstracts the common paradigm where the method explicitly saves an - underscore-prefixed protected property on first call and returns that - subsequently. - - >>> class MyClass: - ... calls = 0 - ... - ... @method_cache - ... def method(self, value): - ... self.calls += 1 - ... return value - - >>> a = MyClass() - >>> a.method(3) - 3 - >>> for x in range(75): - ... res = a.method(x) - >>> a.calls - 75 - - Note that the apparent behavior will be exactly like that of lru_cache - except that the cache is stored on each instance, so values in one - instance will not flush values from another, and when an instance is - deleted, so are the cached values for that instance. - - >>> b = MyClass() - >>> for x in range(35): - ... res = b.method(x) - >>> b.calls - 35 - >>> a.method(0) - 0 - >>> a.calls - 75 - - Note that if method had been decorated with ``functools.lru_cache()``, - a.calls would have been 76 (due to the cached value of 0 having been - flushed by the 'b' instance). - - Clear the cache with ``.cache_clear()`` - - >>> a.method.cache_clear() - - Same for a method that hasn't yet been called. - - >>> c = MyClass() - >>> c.method.cache_clear() - - Another cache wrapper may be supplied: - - >>> cache = functools.lru_cache(maxsize=2) - >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) - >>> a = MyClass() - >>> a.method2() - 3 - - Caution - do not subsequently wrap the method with another decorator, such - as ``@property``, which changes the semantics of the function. - - See also - http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ - for another implementation and additional justification. - """ - cache_wrapper = cache_wrapper or functools.lru_cache() - - def wrapper(self, *args, **kwargs): - # it's the first call, replace the method with a cached, bound method - bound_method = types.MethodType(method, self) - cached_method = cache_wrapper(bound_method) - setattr(self, method.__name__, cached_method) - return cached_method(*args, **kwargs) - - # Support cache clear even before cache has been created. - wrapper.cache_clear = lambda: None - - return wrapper diff --git a/Lib/importlib/_itertools.py b/Lib/importlib/_itertools.py deleted file mode 100644 index dd45f2f..0000000 --- a/Lib/importlib/_itertools.py +++ /dev/null @@ -1,19 +0,0 @@ -from itertools import filterfalse - - -def unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." - # unique_everseen('AAAABBBCCDAABBB') --> A B C D - # unique_everseen('ABBCcAD', str.lower) --> A B C D - seen = set() - seen_add = seen.add - if key is None: - for element in filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - else: - for element in iterable: - k = key(element) - if k not in seen: - seen_add(k) - yield element diff --git a/Lib/importlib/metadata.py b/Lib/importlib/metadata.py deleted file mode 100644 index 7a427eb..0000000 --- a/Lib/importlib/metadata.py +++ /dev/null @@ -1,908 +0,0 @@ -import os -import re -import abc -import csv -import sys -import email -import pathlib -import zipfile -import operator -import textwrap -import warnings -import functools -import itertools -import posixpath -import collections - -from ._collections import FreezableDefaultDict, Pair -from ._functools import method_cache -from ._itertools import unique_everseen - -from contextlib import suppress -from importlib import import_module -from importlib.abc import MetaPathFinder -from itertools import starmap -from typing import Any, List, Mapping, Optional, Protocol, TypeVar, Union - - -__all__ = [ - 'Distribution', - 'DistributionFinder', - 'PackageNotFoundError', - 'distribution', - 'distributions', - 'entry_points', - 'files', - 'metadata', - 'packages_distributions', - 'requires', - 'version', -] - - -class PackageNotFoundError(ModuleNotFoundError): - """The package was not found.""" - - def __str__(self): - tmpl = "No package metadata was found for {self.name}" - return tmpl.format(**locals()) - - @property - def name(self): - (name,) = self.args - return name - - -class Sectioned: - """ - A simple entry point config parser for performance - - >>> for item in Sectioned.read(Sectioned._sample): - ... print(item) - Pair(name='sec1', value='# comments ignored') - Pair(name='sec1', value='a = 1') - Pair(name='sec1', value='b = 2') - Pair(name='sec2', value='a = 2') - - >>> res = Sectioned.section_pairs(Sectioned._sample) - >>> item = next(res) - >>> item.name - 'sec1' - >>> item.value - Pair(name='a', value='1') - >>> item = next(res) - >>> item.value - Pair(name='b', value='2') - >>> item = next(res) - >>> item.name - 'sec2' - >>> item.value - Pair(name='a', value='2') - >>> list(res) - [] - """ - - _sample = textwrap.dedent( - """ - [sec1] - # comments ignored - a = 1 - b = 2 - - [sec2] - a = 2 - """ - ).lstrip() - - @classmethod - def section_pairs(cls, text): - return ( - section._replace(value=Pair.parse(section.value)) - for section in cls.read(text, filter_=cls.valid) - if section.name is not None - ) - - @staticmethod - def read(text, filter_=None): - lines = filter(filter_, map(str.strip, text.splitlines())) - name = None - for value in lines: - section_match = value.startswith('[') and value.endswith(']') - if section_match: - name = value.strip('[]') - continue - yield Pair(name, value) - - @staticmethod - def valid(line): - return line and not line.startswith('#') - - -class EntryPoint( - collections.namedtuple('EntryPointBase', 'name value group')): - """An entry point as defined by Python packaging conventions. - - See `the packaging docs on entry points - `_ - for more information. - """ - - pattern = re.compile( - r'(?P[\w.]+)\s*' - r'(:\s*(?P[\w.]+))?\s*' - r'(?P\[.*\])?\s*$' - ) - """ - A regular expression describing the syntax for an entry point, - which might look like: - - - module - - package.module - - package.module:attribute - - package.module:object.attribute - - package.module:attr [extra1, extra2] - - Other combinations are possible as well. - - The expression is lenient about whitespace around the ':', - following the attr, and following any extras. - """ - - dist: Optional['Distribution'] = None - - def load(self): - """Load the entry point from its definition. If only a module - is indicated by the value, return that module. Otherwise, - return the named object. - """ - match = self.pattern.match(self.value) - module = import_module(match.group('module')) - attrs = filter(None, (match.group('attr') or '').split('.')) - return functools.reduce(getattr, attrs, module) - - @property - def module(self): - match = self.pattern.match(self.value) - return match.group('module') - - @property - def attr(self): - match = self.pattern.match(self.value) - return match.group('attr') - - @property - def extras(self): - match = self.pattern.match(self.value) - return list(re.finditer(r'\w+', match.group('extras') or '')) - - def _for(self, dist): - self.dist = dist - return self - - def __iter__(self): - """ - Supply iter so one may construct dicts of EntryPoints by name. - """ - msg = ( - "Construction of dict of EntryPoints is deprecated in " - "favor of EntryPoints." - ) - warnings.warn(msg, DeprecationWarning) - return iter((self.name, self)) - - def __reduce__(self): - return ( - self.__class__, - (self.name, self.value, self.group), - ) - - def matches(self, **params): - attrs = (getattr(self, param) for param in params) - return all(map(operator.eq, params.values(), attrs)) - - -class EntryPoints(tuple): - """ - An immutable collection of selectable EntryPoint objects. - """ - - __slots__ = () - - def __getitem__(self, name): # -> EntryPoint: - """ - Get the EntryPoint in self matching name. - """ - try: - return next(iter(self.select(name=name))) - except StopIteration: - raise KeyError(name) - - def select(self, **params): - """ - Select entry points from self that match the - given parameters (typically group and/or name). - """ - return EntryPoints(ep for ep in self if ep.matches(**params)) - - @property - def names(self): - """ - Return the set of all names of all entry points. - """ - return set(ep.name for ep in self) - - @property - def groups(self): - """ - Return the set of all groups of all entry points. - - For coverage while SelectableGroups is present. - >>> EntryPoints().groups - set() - """ - return set(ep.group for ep in self) - - @classmethod - def _from_text_for(cls, text, dist): - return cls(ep._for(dist) for ep in cls._from_text(text)) - - @classmethod - def _from_text(cls, text): - return itertools.starmap(EntryPoint, cls._parse_groups(text or '')) - - @staticmethod - def _parse_groups(text): - return ( - (item.value.name, item.value.value, item.name) - for item in Sectioned.section_pairs(text) - ) - - -def flake8_bypass(func): - # defer inspect import as performance optimization. - import inspect - - is_flake8 = any('flake8' in str(frame.filename) for frame in inspect.stack()[:5]) - return func if not is_flake8 else lambda: None - - -class Deprecated: - """ - Compatibility add-in for mapping to indicate that - mapping behavior is deprecated. - - >>> recwarn = getfixture('recwarn') - >>> class DeprecatedDict(Deprecated, dict): pass - >>> dd = DeprecatedDict(foo='bar') - >>> dd.get('baz', None) - >>> dd['foo'] - 'bar' - >>> list(dd) - ['foo'] - >>> list(dd.keys()) - ['foo'] - >>> 'foo' in dd - True - >>> list(dd.values()) - ['bar'] - >>> len(recwarn) - 1 - """ - - _warn = functools.partial( - warnings.warn, - "SelectableGroups dict interface is deprecated. Use select.", - DeprecationWarning, - stacklevel=2, - ) - - def __getitem__(self, name): - self._warn() - return super().__getitem__(name) - - def get(self, name, default=None): - flake8_bypass(self._warn)() - return super().get(name, default) - - def __iter__(self): - self._warn() - return super().__iter__() - - def __contains__(self, *args): - self._warn() - return super().__contains__(*args) - - def keys(self): - self._warn() - return super().keys() - - def values(self): - self._warn() - return super().values() - - -class SelectableGroups(Deprecated, dict): - """ - A backward- and forward-compatible result from - entry_points that fully implements the dict interface. - """ - - @classmethod - def load(cls, eps): - by_group = operator.attrgetter('group') - ordered = sorted(eps, key=by_group) - grouped = itertools.groupby(ordered, by_group) - return cls((group, EntryPoints(eps)) for group, eps in grouped) - - @property - def _all(self): - """ - Reconstruct a list of all entrypoints from the groups. - """ - groups = super(Deprecated, self).values() - return EntryPoints(itertools.chain.from_iterable(groups)) - - @property - def groups(self): - return self._all.groups - - @property - def names(self): - """ - for coverage: - >>> SelectableGroups().names - set() - """ - return self._all.names - - def select(self, **params): - if not params: - return self - return self._all.select(**params) - - -class PackagePath(pathlib.PurePosixPath): - """A reference to a path in a package""" - - def read_text(self, encoding='utf-8'): - with self.locate().open(encoding=encoding) as stream: - return stream.read() - - def read_binary(self): - with self.locate().open('rb') as stream: - return stream.read() - - def locate(self): - """Return a path-like object for this path""" - return self.dist.locate_file(self) - - -class FileHash: - def __init__(self, spec): - self.mode, _, self.value = spec.partition('=') - - def __repr__(self): - return ''.format(self.mode, self.value) - - -_T = TypeVar("_T") - - -class PackageMetadata(Protocol): - def __len__(self) -> int: - ... # pragma: no cover - - def __contains__(self, item: str) -> bool: - ... # pragma: no cover - - def __getitem__(self, key: str) -> str: - ... # pragma: no cover - - def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: - """ - Return all values associated with a possibly multi-valued key. - """ - - -class Distribution: - """A Python distribution package.""" - - @abc.abstractmethod - def read_text(self, filename): - """Attempt to load metadata file given by the name. - - :param filename: The name of the file in the distribution info. - :return: The text if found, otherwise None. - """ - - @abc.abstractmethod - def locate_file(self, path): - """ - Given a path to a file in this distribution, return a path - to it. - """ - - @classmethod - def from_name(cls, name): - """Return the Distribution for the given package name. - - :param name: The name of the distribution package to search for. - :return: The Distribution instance (or subclass thereof) for the named - package, if found. - :raises PackageNotFoundError: When the named package's distribution - metadata cannot be found. - """ - for resolver in cls._discover_resolvers(): - dists = resolver(DistributionFinder.Context(name=name)) - dist = next(iter(dists), None) - if dist is not None: - return dist - else: - raise PackageNotFoundError(name) - - @classmethod - def discover(cls, **kwargs): - """Return an iterable of Distribution objects for all packages. - - Pass a ``context`` or pass keyword arguments for constructing - a context. - - :context: A ``DistributionFinder.Context`` object. - :return: Iterable of Distribution objects for all packages. - """ - context = kwargs.pop('context', None) - if context and kwargs: - raise ValueError("cannot accept context and kwargs") - context = context or DistributionFinder.Context(**kwargs) - return itertools.chain.from_iterable( - resolver(context) for resolver in cls._discover_resolvers() - ) - - @staticmethod - def at(path): - """Return a Distribution for the indicated metadata path - - :param path: a string or path-like object - :return: a concrete Distribution instance for the path - """ - return PathDistribution(pathlib.Path(path)) - - @staticmethod - def _discover_resolvers(): - """Search the meta_path for resolvers.""" - declared = ( - getattr(finder, 'find_distributions', None) for finder in sys.meta_path - ) - return filter(None, declared) - - @classmethod - def _local(cls, root='.'): - from pep517 import build, meta - - system = build.compat_system(root) - builder = functools.partial( - meta.build, - source_dir=root, - system=system, - ) - return PathDistribution(zipfile.Path(meta.build_as_zip(builder))) - - @property - def metadata(self) -> PackageMetadata: - """Return the parsed metadata for this Distribution. - - The returned object will have keys that name the various bits of - metadata. See PEP 566 for details. - """ - text = ( - self.read_text('METADATA') - or self.read_text('PKG-INFO') - # This last clause is here to support old egg-info files. Its - # effect is to just end up using the PathDistribution's self._path - # (which points to the egg-info file) attribute unchanged. - or self.read_text('') - ) - return email.message_from_string(text) - - @property - def name(self): - """Return the 'Name' metadata for the distribution package.""" - return self.metadata['Name'] - - @property - def version(self): - """Return the 'Version' metadata for the distribution package.""" - return self.metadata['Version'] - - @property - def entry_points(self): - return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self) - - @property - def files(self): - """Files in this distribution. - - :return: List of PackagePath for this distribution or None - - Result is `None` if the metadata file that enumerates files - (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is - missing. - Result may be empty if the metadata exists but is empty. - """ - file_lines = self._read_files_distinfo() or self._read_files_egginfo() - - def make_file(name, hash=None, size_str=None): - result = PackagePath(name) - result.hash = FileHash(hash) if hash else None - result.size = int(size_str) if size_str else None - result.dist = self - return result - - return file_lines and list(starmap(make_file, csv.reader(file_lines))) - - def _read_files_distinfo(self): - """ - Read the lines of RECORD - """ - text = self.read_text('RECORD') - return text and text.splitlines() - - def _read_files_egginfo(self): - """ - SOURCES.txt might contain literal commas, so wrap each line - in quotes. - """ - text = self.read_text('SOURCES.txt') - return text and map('"{}"'.format, text.splitlines()) - - @property - def requires(self): - """Generated requirements specified for this Distribution""" - reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() - return reqs and list(reqs) - - def _read_dist_info_reqs(self): - return self.metadata.get_all('Requires-Dist') - - def _read_egg_info_reqs(self): - source = self.read_text('requires.txt') - return source and self._deps_from_requires_text(source) - - @classmethod - def _deps_from_requires_text(cls, source): - return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) - - @staticmethod - def _convert_egg_info_reqs_to_simple_reqs(sections): - """ - Historically, setuptools would solicit and store 'extra' - requirements, including those with environment markers, - in separate sections. More modern tools expect each - dependency to be defined separately, with any relevant - extras and environment markers attached directly to that - requirement. This method converts the former to the - latter. See _test_deps_from_requires_text for an example. - """ - - def make_condition(name): - return name and 'extra == "{name}"'.format(name=name) - - def parse_condition(section): - section = section or '' - extra, sep, markers = section.partition(':') - if extra and markers: - markers = '({markers})'.format(markers=markers) - conditions = list(filter(None, [markers, make_condition(extra)])) - return '; ' + ' and '.join(conditions) if conditions else '' - - for section in sections: - yield section.value + parse_condition(section.name) - - -class DistributionFinder(MetaPathFinder): - """ - A MetaPathFinder capable of discovering installed distributions. - """ - - class Context: - """ - Keyword arguments presented by the caller to - ``distributions()`` or ``Distribution.discover()`` - to narrow the scope of a search for distributions - in all DistributionFinders. - - Each DistributionFinder may expect any parameters - and should attempt to honor the canonical - parameters defined below when appropriate. - """ - - name = None - """ - Specific name for which a distribution finder should match. - A name of ``None`` matches all distributions. - """ - - def __init__(self, **kwargs): - vars(self).update(kwargs) - - @property - def path(self): - """ - The path that a distribution finder should search. - - Typically refers to Python package paths and defaults - to ``sys.path``. - """ - return vars(self).get('path', sys.path) - - @abc.abstractmethod - def find_distributions(self, context=Context()): - """ - Find distributions. - - Return an iterable of all Distribution instances capable of - loading the metadata for packages matching the ``context``, - a DistributionFinder.Context instance. - """ - - -class FastPath: - """ - Micro-optimized class for searching a path for - children. - """ - - @functools.lru_cache() # type: ignore - def __new__(cls, root): - return super().__new__(cls) - - def __init__(self, root): - self.root = root - self.base = os.path.basename(self.root).lower() - - def joinpath(self, child): - return pathlib.Path(self.root, child) - - def children(self): - with suppress(Exception): - return os.listdir(self.root or '') - with suppress(Exception): - return self.zip_children() - return [] - - def zip_children(self): - zip_path = zipfile.Path(self.root) - names = zip_path.root.namelist() - self.joinpath = zip_path.joinpath - - return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) - - def search(self, name): - return self.lookup(self.mtime).search(name) - - @property - def mtime(self): - with suppress(OSError): - return os.stat(self.root).st_mtime - self.lookup.cache_clear() - - @method_cache - def lookup(self, mtime): - return Lookup(self) - - -class Lookup: - def __init__(self, path: FastPath): - base = os.path.basename(path.root).lower() - base_is_egg = base.endswith(".egg") - self.infos = FreezableDefaultDict(list) - self.eggs = FreezableDefaultDict(list) - - for child in path.children(): - low = child.lower() - if low.endswith((".dist-info", ".egg-info")): - # rpartition is faster than splitext and suitable for this purpose. - name = low.rpartition(".")[0].partition("-")[0] - normalized = Prepared.normalize(name) - self.infos[normalized].append(path.joinpath(child)) - elif base_is_egg and low == "egg-info": - name = base.rpartition(".")[0].partition("-")[0] - legacy_normalized = Prepared.legacy_normalize(name) - self.eggs[legacy_normalized].append(path.joinpath(child)) - - self.infos.freeze() - self.eggs.freeze() - - def search(self, prepared): - infos = ( - self.infos[prepared.normalized] - if prepared - else itertools.chain.from_iterable(self.infos.values()) - ) - eggs = ( - self.eggs[prepared.legacy_normalized] - if prepared - else itertools.chain.from_iterable(self.eggs.values()) - ) - return itertools.chain(infos, eggs) - - -class Prepared: - """ - A prepared search for metadata on a possibly-named package. - """ - - normalized = None - legacy_normalized = None - - def __init__(self, name): - self.name = name - if name is None: - return - self.normalized = self.normalize(name) - self.legacy_normalized = self.legacy_normalize(name) - - @staticmethod - def normalize(name): - """ - PEP 503 normalization plus dashes as underscores. - """ - return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') - - @staticmethod - def legacy_normalize(name): - """ - Normalize the package name as found in the convention in - older packaging tools versions and specs. - """ - return name.lower().replace('-', '_') - - def __bool__(self): - return bool(self.name) - - -class MetadataPathFinder(DistributionFinder): - @classmethod - def find_distributions(cls, context=DistributionFinder.Context()): - """ - Find distributions. - - Return an iterable of all Distribution instances capable of - loading the metadata for packages matching ``context.name`` - (or all names if ``None`` indicated) along the paths in the list - of directories ``context.path``. - """ - found = cls._search_paths(context.name, context.path) - return map(PathDistribution, found) - - @classmethod - def _search_paths(cls, name, paths): - """Find metadata directories in paths heuristically.""" - prepared = Prepared(name) - return itertools.chain.from_iterable( - path.search(prepared) for path in map(FastPath, paths) - ) - - def invalidate_caches(cls): - FastPath.__new__.cache_clear() - - -class PathDistribution(Distribution): - def __init__(self, path): - """Construct a distribution from a path to the metadata directory. - - :param path: A pathlib.Path or similar object supporting - .joinpath(), __div__, .parent, and .read_text(). - """ - self._path = path - - def read_text(self, filename): - with suppress( - FileNotFoundError, - IsADirectoryError, - KeyError, - NotADirectoryError, - PermissionError, - ): - return self._path.joinpath(filename).read_text(encoding='utf-8') - - read_text.__doc__ = Distribution.read_text.__doc__ - - def locate_file(self, path): - return self._path.parent / path - - -def distribution(distribution_name): - """Get the ``Distribution`` instance for the named package. - - :param distribution_name: The name of the distribution package as a string. - :return: A ``Distribution`` instance (or subclass thereof). - """ - return Distribution.from_name(distribution_name) - - -def distributions(**kwargs): - """Get all ``Distribution`` instances in the current environment. - - :return: An iterable of ``Distribution`` instances. - """ - return Distribution.discover(**kwargs) - - -def metadata(distribution_name) -> PackageMetadata: - """Get the metadata for the named package. - - :param distribution_name: The name of the distribution package to query. - :return: A PackageMetadata containing the parsed metadata. - """ - return Distribution.from_name(distribution_name).metadata - - -def version(distribution_name): - """Get the version string for the named package. - - :param distribution_name: The name of the distribution package to query. - :return: The version string for the package as defined in the package's - "Version" metadata key. - """ - return distribution(distribution_name).version - - -def entry_points(**params) -> Union[EntryPoints, SelectableGroups]: - """Return EntryPoint objects for all installed packages. - - Pass selection parameters (group or name) to filter the - result to entry points matching those properties (see - EntryPoints.select()). - - For compatibility, returns ``SelectableGroups`` object unless - selection parameters are supplied. In the future, this function - will return ``EntryPoints`` instead of ``SelectableGroups`` - even when no selection parameters are supplied. - - For maximum future compatibility, pass selection parameters - or invoke ``.select`` with parameters on the result. - - :return: EntryPoints or SelectableGroups for all installed packages. - """ - unique = functools.partial(unique_everseen, key=operator.attrgetter('name')) - eps = itertools.chain.from_iterable( - dist.entry_points for dist in unique(distributions()) - ) - return SelectableGroups.load(eps).select(**params) - - -def files(distribution_name): - """Return a list of files for the named package. - - :param distribution_name: The name of the distribution package to query. - :return: List of files composing the distribution. - """ - return distribution(distribution_name).files - - -def requires(distribution_name): - """ - Return a list of requirements for the named package. - - :return: An iterator of requirements, suitable for - packaging.requirement.Requirement. - """ - return distribution(distribution_name).requires - - -def packages_distributions() -> Mapping[str, List[str]]: - """ - Return a mapping of top-level packages to their - distributions. - - >>> import collections.abc - >>> pkgs = packages_distributions() - >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values()) - True - """ - pkg_to_dist = collections.defaultdict(list) - for dist in distributions(): - for pkg in (dist.read_text('top_level.txt') or '').split(): - pkg_to_dist[pkg].append(dist.metadata['Name']) - return dict(pkg_to_dist) diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py new file mode 100644 index 0000000..1421621 --- /dev/null +++ b/Lib/importlib/metadata/__init__.py @@ -0,0 +1,890 @@ +import os +import re +import abc +import csv +import sys +import email +import pathlib +import zipfile +import operator +import textwrap +import warnings +import functools +import itertools +import posixpath +import collections + +from . import _adapters, _meta +from ._collections import FreezableDefaultDict, Pair +from ._functools import method_cache +from ._itertools import unique_everseen + +from contextlib import suppress +from importlib import import_module +from importlib.abc import MetaPathFinder +from itertools import starmap +from typing import List, Mapping, Optional, Union + + +__all__ = [ + 'Distribution', + 'DistributionFinder', + 'PackageNotFoundError', + 'distribution', + 'distributions', + 'entry_points', + 'files', + 'metadata', + 'packages_distributions', + 'requires', + 'version', +] + + +class PackageNotFoundError(ModuleNotFoundError): + """The package was not found.""" + + def __str__(self): + tmpl = "No package metadata was found for {self.name}" + return tmpl.format(**locals()) + + @property + def name(self): + (name,) = self.args + return name + + +class Sectioned: + """ + A simple entry point config parser for performance + + >>> for item in Sectioned.read(Sectioned._sample): + ... print(item) + Pair(name='sec1', value='# comments ignored') + Pair(name='sec1', value='a = 1') + Pair(name='sec1', value='b = 2') + Pair(name='sec2', value='a = 2') + + >>> res = Sectioned.section_pairs(Sectioned._sample) + >>> item = next(res) + >>> item.name + 'sec1' + >>> item.value + Pair(name='a', value='1') + >>> item = next(res) + >>> item.value + Pair(name='b', value='2') + >>> item = next(res) + >>> item.name + 'sec2' + >>> item.value + Pair(name='a', value='2') + >>> list(res) + [] + """ + + _sample = textwrap.dedent( + """ + [sec1] + # comments ignored + a = 1 + b = 2 + + [sec2] + a = 2 + """ + ).lstrip() + + @classmethod + def section_pairs(cls, text): + return ( + section._replace(value=Pair.parse(section.value)) + for section in cls.read(text, filter_=cls.valid) + if section.name is not None + ) + + @staticmethod + def read(text, filter_=None): + lines = filter(filter_, map(str.strip, text.splitlines())) + name = None + for value in lines: + section_match = value.startswith('[') and value.endswith(']') + if section_match: + name = value.strip('[]') + continue + yield Pair(name, value) + + @staticmethod + def valid(line): + return line and not line.startswith('#') + + +class EntryPoint( + collections.namedtuple('EntryPointBase', 'name value group')): + """An entry point as defined by Python packaging conventions. + + See `the packaging docs on entry points + `_ + for more information. + """ + + pattern = re.compile( + r'(?P[\w.]+)\s*' + r'(:\s*(?P[\w.]+))?\s*' + r'(?P\[.*\])?\s*$' + ) + """ + A regular expression describing the syntax for an entry point, + which might look like: + + - module + - package.module + - package.module:attribute + - package.module:object.attribute + - package.module:attr [extra1, extra2] + + Other combinations are possible as well. + + The expression is lenient about whitespace around the ':', + following the attr, and following any extras. + """ + + dist: Optional['Distribution'] = None + + def load(self): + """Load the entry point from its definition. If only a module + is indicated by the value, return that module. Otherwise, + return the named object. + """ + match = self.pattern.match(self.value) + module = import_module(match.group('module')) + attrs = filter(None, (match.group('attr') or '').split('.')) + return functools.reduce(getattr, attrs, module) + + @property + def module(self): + match = self.pattern.match(self.value) + return match.group('module') + + @property + def attr(self): + match = self.pattern.match(self.value) + return match.group('attr') + + @property + def extras(self): + match = self.pattern.match(self.value) + return list(re.finditer(r'\w+', match.group('extras') or '')) + + def _for(self, dist): + self.dist = dist + return self + + def __iter__(self): + """ + Supply iter so one may construct dicts of EntryPoints by name. + """ + msg = ( + "Construction of dict of EntryPoints is deprecated in " + "favor of EntryPoints." + ) + warnings.warn(msg, DeprecationWarning) + return iter((self.name, self)) + + def __reduce__(self): + return ( + self.__class__, + (self.name, self.value, self.group), + ) + + def matches(self, **params): + attrs = (getattr(self, param) for param in params) + return all(map(operator.eq, params.values(), attrs)) + + +class EntryPoints(tuple): + """ + An immutable collection of selectable EntryPoint objects. + """ + + __slots__ = () + + def __getitem__(self, name): # -> EntryPoint: + """ + Get the EntryPoint in self matching name. + """ + try: + return next(iter(self.select(name=name))) + except StopIteration: + raise KeyError(name) + + def select(self, **params): + """ + Select entry points from self that match the + given parameters (typically group and/or name). + """ + return EntryPoints(ep for ep in self if ep.matches(**params)) + + @property + def names(self): + """ + Return the set of all names of all entry points. + """ + return set(ep.name for ep in self) + + @property + def groups(self): + """ + Return the set of all groups of all entry points. + + For coverage while SelectableGroups is present. + >>> EntryPoints().groups + set() + """ + return set(ep.group for ep in self) + + @classmethod + def _from_text_for(cls, text, dist): + return cls(ep._for(dist) for ep in cls._from_text(text)) + + @classmethod + def _from_text(cls, text): + return itertools.starmap(EntryPoint, cls._parse_groups(text or '')) + + @staticmethod + def _parse_groups(text): + return ( + (item.value.name, item.value.value, item.name) + for item in Sectioned.section_pairs(text) + ) + + +def flake8_bypass(func): + # defer inspect import as performance optimization. + import inspect + + is_flake8 = any('flake8' in str(frame.filename) for frame in inspect.stack()[:5]) + return func if not is_flake8 else lambda: None + + +class Deprecated: + """ + Compatibility add-in for mapping to indicate that + mapping behavior is deprecated. + + >>> recwarn = getfixture('recwarn') + >>> class DeprecatedDict(Deprecated, dict): pass + >>> dd = DeprecatedDict(foo='bar') + >>> dd.get('baz', None) + >>> dd['foo'] + 'bar' + >>> list(dd) + ['foo'] + >>> list(dd.keys()) + ['foo'] + >>> 'foo' in dd + True + >>> list(dd.values()) + ['bar'] + >>> len(recwarn) + 1 + """ + + _warn = functools.partial( + warnings.warn, + "SelectableGroups dict interface is deprecated. Use select.", + DeprecationWarning, + stacklevel=2, + ) + + def __getitem__(self, name): + self._warn() + return super().__getitem__(name) + + def get(self, name, default=None): + flake8_bypass(self._warn)() + return super().get(name, default) + + def __iter__(self): + self._warn() + return super().__iter__() + + def __contains__(self, *args): + self._warn() + return super().__contains__(*args) + + def keys(self): + self._warn() + return super().keys() + + def values(self): + self._warn() + return super().values() + + +class SelectableGroups(Deprecated, dict): + """ + A backward- and forward-compatible result from + entry_points that fully implements the dict interface. + """ + + @classmethod + def load(cls, eps): + by_group = operator.attrgetter('group') + ordered = sorted(eps, key=by_group) + grouped = itertools.groupby(ordered, by_group) + return cls((group, EntryPoints(eps)) for group, eps in grouped) + + @property + def _all(self): + """ + Reconstruct a list of all entrypoints from the groups. + """ + groups = super(Deprecated, self).values() + return EntryPoints(itertools.chain.from_iterable(groups)) + + @property + def groups(self): + return self._all.groups + + @property + def names(self): + """ + for coverage: + >>> SelectableGroups().names + set() + """ + return self._all.names + + def select(self, **params): + if not params: + return self + return self._all.select(**params) + + +class PackagePath(pathlib.PurePosixPath): + """A reference to a path in a package""" + + def read_text(self, encoding='utf-8'): + with self.locate().open(encoding=encoding) as stream: + return stream.read() + + def read_binary(self): + with self.locate().open('rb') as stream: + return stream.read() + + def locate(self): + """Return a path-like object for this path""" + return self.dist.locate_file(self) + + +class FileHash: + def __init__(self, spec): + self.mode, _, self.value = spec.partition('=') + + def __repr__(self): + return ''.format(self.mode, self.value) + + +class Distribution: + """A Python distribution package.""" + + @abc.abstractmethod + def read_text(self, filename): + """Attempt to load metadata file given by the name. + + :param filename: The name of the file in the distribution info. + :return: The text if found, otherwise None. + """ + + @abc.abstractmethod + def locate_file(self, path): + """ + Given a path to a file in this distribution, return a path + to it. + """ + + @classmethod + def from_name(cls, name): + """Return the Distribution for the given package name. + + :param name: The name of the distribution package to search for. + :return: The Distribution instance (or subclass thereof) for the named + package, if found. + :raises PackageNotFoundError: When the named package's distribution + metadata cannot be found. + """ + for resolver in cls._discover_resolvers(): + dists = resolver(DistributionFinder.Context(name=name)) + dist = next(iter(dists), None) + if dist is not None: + return dist + else: + raise PackageNotFoundError(name) + + @classmethod + def discover(cls, **kwargs): + """Return an iterable of Distribution objects for all packages. + + Pass a ``context`` or pass keyword arguments for constructing + a context. + + :context: A ``DistributionFinder.Context`` object. + :return: Iterable of Distribution objects for all packages. + """ + context = kwargs.pop('context', None) + if context and kwargs: + raise ValueError("cannot accept context and kwargs") + context = context or DistributionFinder.Context(**kwargs) + return itertools.chain.from_iterable( + resolver(context) for resolver in cls._discover_resolvers() + ) + + @staticmethod + def at(path): + """Return a Distribution for the indicated metadata path + + :param path: a string or path-like object + :return: a concrete Distribution instance for the path + """ + return PathDistribution(pathlib.Path(path)) + + @staticmethod + def _discover_resolvers(): + """Search the meta_path for resolvers.""" + declared = ( + getattr(finder, 'find_distributions', None) for finder in sys.meta_path + ) + return filter(None, declared) + + @classmethod + def _local(cls, root='.'): + from pep517 import build, meta + + system = build.compat_system(root) + builder = functools.partial( + meta.build, + source_dir=root, + system=system, + ) + return PathDistribution(zipfile.Path(meta.build_as_zip(builder))) + + @property + def metadata(self) -> _meta.PackageMetadata: + """Return the parsed metadata for this Distribution. + + The returned object will have keys that name the various bits of + metadata. See PEP 566 for details. + """ + text = ( + self.read_text('METADATA') + or self.read_text('PKG-INFO') + # This last clause is here to support old egg-info files. Its + # effect is to just end up using the PathDistribution's self._path + # (which points to the egg-info file) attribute unchanged. + or self.read_text('') + ) + return _adapters.Message(email.message_from_string(text)) + + @property + def name(self): + """Return the 'Name' metadata for the distribution package.""" + return self.metadata['Name'] + + @property + def version(self): + """Return the 'Version' metadata for the distribution package.""" + return self.metadata['Version'] + + @property + def entry_points(self): + return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self) + + @property + def files(self): + """Files in this distribution. + + :return: List of PackagePath for this distribution or None + + Result is `None` if the metadata file that enumerates files + (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is + missing. + Result may be empty if the metadata exists but is empty. + """ + file_lines = self._read_files_distinfo() or self._read_files_egginfo() + + def make_file(name, hash=None, size_str=None): + result = PackagePath(name) + result.hash = FileHash(hash) if hash else None + result.size = int(size_str) if size_str else None + result.dist = self + return result + + return file_lines and list(starmap(make_file, csv.reader(file_lines))) + + def _read_files_distinfo(self): + """ + Read the lines of RECORD + """ + text = self.read_text('RECORD') + return text and text.splitlines() + + def _read_files_egginfo(self): + """ + SOURCES.txt might contain literal commas, so wrap each line + in quotes. + """ + text = self.read_text('SOURCES.txt') + return text and map('"{}"'.format, text.splitlines()) + + @property + def requires(self): + """Generated requirements specified for this Distribution""" + reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() + return reqs and list(reqs) + + def _read_dist_info_reqs(self): + return self.metadata.get_all('Requires-Dist') + + def _read_egg_info_reqs(self): + source = self.read_text('requires.txt') + return source and self._deps_from_requires_text(source) + + @classmethod + def _deps_from_requires_text(cls, source): + return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) + + @staticmethod + def _convert_egg_info_reqs_to_simple_reqs(sections): + """ + Historically, setuptools would solicit and store 'extra' + requirements, including those with environment markers, + in separate sections. More modern tools expect each + dependency to be defined separately, with any relevant + extras and environment markers attached directly to that + requirement. This method converts the former to the + latter. See _test_deps_from_requires_text for an example. + """ + + def make_condition(name): + return name and 'extra == "{name}"'.format(name=name) + + def parse_condition(section): + section = section or '' + extra, sep, markers = section.partition(':') + if extra and markers: + markers = '({markers})'.format(markers=markers) + conditions = list(filter(None, [markers, make_condition(extra)])) + return '; ' + ' and '.join(conditions) if conditions else '' + + for section in sections: + yield section.value + parse_condition(section.name) + + +class DistributionFinder(MetaPathFinder): + """ + A MetaPathFinder capable of discovering installed distributions. + """ + + class Context: + """ + Keyword arguments presented by the caller to + ``distributions()`` or ``Distribution.discover()`` + to narrow the scope of a search for distributions + in all DistributionFinders. + + Each DistributionFinder may expect any parameters + and should attempt to honor the canonical + parameters defined below when appropriate. + """ + + name = None + """ + Specific name for which a distribution finder should match. + A name of ``None`` matches all distributions. + """ + + def __init__(self, **kwargs): + vars(self).update(kwargs) + + @property + def path(self): + """ + The path that a distribution finder should search. + + Typically refers to Python package paths and defaults + to ``sys.path``. + """ + return vars(self).get('path', sys.path) + + @abc.abstractmethod + def find_distributions(self, context=Context()): + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching the ``context``, + a DistributionFinder.Context instance. + """ + + +class FastPath: + """ + Micro-optimized class for searching a path for + children. + """ + + @functools.lru_cache() # type: ignore + def __new__(cls, root): + return super().__new__(cls) + + def __init__(self, root): + self.root = root + self.base = os.path.basename(self.root).lower() + + def joinpath(self, child): + return pathlib.Path(self.root, child) + + def children(self): + with suppress(Exception): + return os.listdir(self.root or '') + with suppress(Exception): + return self.zip_children() + return [] + + def zip_children(self): + zip_path = zipfile.Path(self.root) + names = zip_path.root.namelist() + self.joinpath = zip_path.joinpath + + return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) + + def search(self, name): + return self.lookup(self.mtime).search(name) + + @property + def mtime(self): + with suppress(OSError): + return os.stat(self.root).st_mtime + self.lookup.cache_clear() + + @method_cache + def lookup(self, mtime): + return Lookup(self) + + +class Lookup: + def __init__(self, path: FastPath): + base = os.path.basename(path.root).lower() + base_is_egg = base.endswith(".egg") + self.infos = FreezableDefaultDict(list) + self.eggs = FreezableDefaultDict(list) + + for child in path.children(): + low = child.lower() + if low.endswith((".dist-info", ".egg-info")): + # rpartition is faster than splitext and suitable for this purpose. + name = low.rpartition(".")[0].partition("-")[0] + normalized = Prepared.normalize(name) + self.infos[normalized].append(path.joinpath(child)) + elif base_is_egg and low == "egg-info": + name = base.rpartition(".")[0].partition("-")[0] + legacy_normalized = Prepared.legacy_normalize(name) + self.eggs[legacy_normalized].append(path.joinpath(child)) + + self.infos.freeze() + self.eggs.freeze() + + def search(self, prepared): + infos = ( + self.infos[prepared.normalized] + if prepared + else itertools.chain.from_iterable(self.infos.values()) + ) + eggs = ( + self.eggs[prepared.legacy_normalized] + if prepared + else itertools.chain.from_iterable(self.eggs.values()) + ) + return itertools.chain(infos, eggs) + + +class Prepared: + """ + A prepared search for metadata on a possibly-named package. + """ + + normalized = None + legacy_normalized = None + + def __init__(self, name): + self.name = name + if name is None: + return + self.normalized = self.normalize(name) + self.legacy_normalized = self.legacy_normalize(name) + + @staticmethod + def normalize(name): + """ + PEP 503 normalization plus dashes as underscores. + """ + return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + + @staticmethod + def legacy_normalize(name): + """ + Normalize the package name as found in the convention in + older packaging tools versions and specs. + """ + return name.lower().replace('-', '_') + + def __bool__(self): + return bool(self.name) + + +class MetadataPathFinder(DistributionFinder): + @classmethod + def find_distributions(cls, context=DistributionFinder.Context()): + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching ``context.name`` + (or all names if ``None`` indicated) along the paths in the list + of directories ``context.path``. + """ + found = cls._search_paths(context.name, context.path) + return map(PathDistribution, found) + + @classmethod + def _search_paths(cls, name, paths): + """Find metadata directories in paths heuristically.""" + prepared = Prepared(name) + return itertools.chain.from_iterable( + path.search(prepared) for path in map(FastPath, paths) + ) + + def invalidate_caches(cls): + FastPath.__new__.cache_clear() + + +class PathDistribution(Distribution): + def __init__(self, path): + """Construct a distribution from a path to the metadata directory. + + :param path: A pathlib.Path or similar object supporting + .joinpath(), __div__, .parent, and .read_text(). + """ + self._path = path + + def read_text(self, filename): + with suppress( + FileNotFoundError, + IsADirectoryError, + KeyError, + NotADirectoryError, + PermissionError, + ): + return self._path.joinpath(filename).read_text(encoding='utf-8') + + read_text.__doc__ = Distribution.read_text.__doc__ + + def locate_file(self, path): + return self._path.parent / path + + +def distribution(distribution_name): + """Get the ``Distribution`` instance for the named package. + + :param distribution_name: The name of the distribution package as a string. + :return: A ``Distribution`` instance (or subclass thereof). + """ + return Distribution.from_name(distribution_name) + + +def distributions(**kwargs): + """Get all ``Distribution`` instances in the current environment. + + :return: An iterable of ``Distribution`` instances. + """ + return Distribution.discover(**kwargs) + + +def metadata(distribution_name) -> _meta.PackageMetadata: + """Get the metadata for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: A PackageMetadata containing the parsed metadata. + """ + return Distribution.from_name(distribution_name).metadata + + +def version(distribution_name): + """Get the version string for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: The version string for the package as defined in the package's + "Version" metadata key. + """ + return distribution(distribution_name).version + + +def entry_points(**params) -> Union[EntryPoints, SelectableGroups]: + """Return EntryPoint objects for all installed packages. + + Pass selection parameters (group or name) to filter the + result to entry points matching those properties (see + EntryPoints.select()). + + For compatibility, returns ``SelectableGroups`` object unless + selection parameters are supplied. In the future, this function + will return ``EntryPoints`` instead of ``SelectableGroups`` + even when no selection parameters are supplied. + + For maximum future compatibility, pass selection parameters + or invoke ``.select`` with parameters on the result. + + :return: EntryPoints or SelectableGroups for all installed packages. + """ + unique = functools.partial(unique_everseen, key=operator.attrgetter('name')) + eps = itertools.chain.from_iterable( + dist.entry_points for dist in unique(distributions()) + ) + return SelectableGroups.load(eps).select(**params) + + +def files(distribution_name): + """Return a list of files for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: List of files composing the distribution. + """ + return distribution(distribution_name).files + + +def requires(distribution_name): + """ + Return a list of requirements for the named package. + + :return: An iterator of requirements, suitable for + packaging.requirement.Requirement. + """ + return distribution(distribution_name).requires + + +def packages_distributions() -> Mapping[str, List[str]]: + """ + Return a mapping of top-level packages to their + distributions. + + >>> import collections.abc + >>> pkgs = packages_distributions() + >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values()) + True + """ + pkg_to_dist = collections.defaultdict(list) + for dist in distributions(): + for pkg in (dist.read_text('top_level.txt') or '').split(): + pkg_to_dist[pkg].append(dist.metadata['Name']) + return dict(pkg_to_dist) diff --git a/Lib/importlib/metadata/_adapters.py b/Lib/importlib/metadata/_adapters.py new file mode 100644 index 0000000..ab08618 --- /dev/null +++ b/Lib/importlib/metadata/_adapters.py @@ -0,0 +1,67 @@ +import re +import textwrap +import email.message + +from ._text import FoldedCase + + +class Message(email.message.Message): + multiple_use_keys = set( + map( + FoldedCase, + [ + 'Classifier', + 'Obsoletes-Dist', + 'Platform', + 'Project-URL', + 'Provides-Dist', + 'Provides-Extra', + 'Requires-Dist', + 'Requires-External', + 'Supported-Platform', + ], + ) + ) + """ + Keys that may be indicated multiple times per PEP 566. + """ + + def __new__(cls, orig: email.message.Message): + res = super().__new__(cls) + vars(res).update(vars(orig)) + return res + + def __init__(self, *args, **kwargs): + self._headers = self._repair_headers() + + # suppress spurious error from mypy + def __iter__(self): + return super().__iter__() + + def _repair_headers(self): + def redent(value): + "Correct for RFC822 indentation" + if not value or '\n' not in value: + return value + return textwrap.dedent(' ' * 8 + value) + + headers = [(key, redent(value)) for key, value in vars(self)['_headers']] + if self._payload: + headers.append(('Description', self.get_payload())) + return headers + + @property + def json(self): + """ + Convert PackageMetadata to a JSON-compatible format + per PEP 0566. + """ + + def transform(key): + value = self.get_all(key) if key in self.multiple_use_keys else self[key] + if key == 'Keywords': + value = re.split(r'\s+', value) + tk = key.lower().replace('-', '_') + return tk, value + + return dict(map(transform, map(FoldedCase, self))) diff --git a/Lib/importlib/metadata/_collections.py b/Lib/importlib/metadata/_collections.py new file mode 100644 index 0000000..cf0954e --- /dev/null +++ b/Lib/importlib/metadata/_collections.py @@ -0,0 +1,30 @@ +import collections + + +# from jaraco.collections 3.3 +class FreezableDefaultDict(collections.defaultdict): + """ + Often it is desirable to prevent the mutation of + a default dict after its initial construction, such + as to prevent mutation during iteration. + + >>> dd = FreezableDefaultDict(list) + >>> dd[0].append('1') + >>> dd.freeze() + >>> dd[1] + [] + >>> len(dd) + 1 + """ + + def __missing__(self, key): + return getattr(self, '_frozen', super().__missing__)(key) + + def freeze(self): + self._frozen = lambda key: self.default_factory() + + +class Pair(collections.namedtuple('Pair', 'name value')): + @classmethod + def parse(cls, text): + return cls(*map(str.strip, text.split("=", 1))) diff --git a/Lib/importlib/metadata/_functools.py b/Lib/importlib/metadata/_functools.py new file mode 100644 index 0000000..73f50d0 --- /dev/null +++ b/Lib/importlib/metadata/_functools.py @@ -0,0 +1,85 @@ +import types +import functools + + +# from jaraco.functools 3.3 +def method_cache(method, cache_wrapper=None): + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + cache_wrapper = cache_wrapper or functools.lru_cache() + + def wrapper(self, *args, **kwargs): + # it's the first call, replace the method with a cached, bound method + bound_method = types.MethodType(method, self) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None + + return wrapper diff --git a/Lib/importlib/metadata/_itertools.py b/Lib/importlib/metadata/_itertools.py new file mode 100644 index 0000000..dd45f2f --- /dev/null +++ b/Lib/importlib/metadata/_itertools.py @@ -0,0 +1,19 @@ +from itertools import filterfalse + + +def unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element diff --git a/Lib/importlib/metadata/_meta.py b/Lib/importlib/metadata/_meta.py new file mode 100644 index 0000000..04d9a02 --- /dev/null +++ b/Lib/importlib/metadata/_meta.py @@ -0,0 +1,29 @@ +from typing import Any, Dict, Iterator, List, Protocol, TypeVar, Union + + +_T = TypeVar("_T") + + +class PackageMetadata(Protocol): + def __len__(self) -> int: + ... # pragma: no cover + + def __contains__(self, item: str) -> bool: + ... # pragma: no cover + + def __getitem__(self, key: str) -> str: + ... # pragma: no cover + + def __iter__(self) -> Iterator[str]: + ... # pragma: no cover + + def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: + """ + Return all values associated with a possibly multi-valued key. + """ + + @property + def json(self) -> Dict[str, Union[str, List[str]]]: + """ + A JSON-compatible form of the metadata. + """ diff --git a/Lib/importlib/metadata/_text.py b/Lib/importlib/metadata/_text.py new file mode 100644 index 0000000..766979d --- /dev/null +++ b/Lib/importlib/metadata/_text.py @@ -0,0 +1,99 @@ +import re + +from ._functools import method_cache + + +# from jaraco.text 3.5 +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use in_: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super(FoldedCase, self).lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super(FoldedCase, self).lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) diff --git a/Lib/test/test_importlib/fixtures.py b/Lib/test/test_importlib/fixtures.py index 1ae70c7..12ed07d 100644 --- a/Lib/test/test_importlib/fixtures.py +++ b/Lib/test/test_importlib/fixtures.py @@ -1,5 +1,6 @@ import os import sys +import copy import shutil import pathlib import tempfile @@ -108,6 +109,16 @@ class DistInfoPkg(OnSysPath, SiteDir): super(DistInfoPkg, self).setUp() build_files(DistInfoPkg.files, self.site_dir) + def make_uppercase(self): + """ + Rewrite metadata with everything uppercase. + """ + shutil.rmtree(self.site_dir / "distinfo_pkg-1.0.0.dist-info") + files = copy.deepcopy(DistInfoPkg.files) + info = files["distinfo_pkg-1.0.0.dist-info"] + info["METADATA"] = info["METADATA"].upper() + build_files(files, self.site_dir) + class DistInfoPkgWithDot(OnSysPath, SiteDir): files: FilesDef = { diff --git a/Lib/test/test_importlib/test_main.py b/Lib/test/test_importlib/test_main.py index 08069c9..52cb637 100644 --- a/Lib/test/test_importlib/test_main.py +++ b/Lib/test/test_importlib/test_main.py @@ -125,7 +125,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): metadata_dir.mkdir() metadata = metadata_dir / 'METADATA' with metadata.open('w', encoding='utf-8') as fp: - fp.write('Description: pôrˈtend\n') + fp.write('Description: pôrˈtend') return 'portend' @staticmethod @@ -145,7 +145,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): pôrˈtend """ - ).lstrip() + ).strip() ) return 'portend' @@ -157,7 +157,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): def test_metadata_loads_egg_info(self): pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir) meta = metadata(pkg_name) - assert meta.get_payload() == 'pôrˈtend\n' + assert meta['Description'] == 'pôrˈtend' class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase): diff --git a/Lib/test/test_importlib/test_metadata_api.py b/Lib/test/test_importlib/test_metadata_api.py index 657c166..825edc1 100644 --- a/Lib/test/test_importlib/test_metadata_api.py +++ b/Lib/test/test_importlib/test_metadata_api.py @@ -231,6 +231,29 @@ class APITests( assert deps == expected + def test_as_json(self): + md = metadata('distinfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['requires_dist']) == 2 + + def test_as_json_egg_info(self): + md = metadata('egginfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['classifier']) == 2 + + def test_as_json_odd_case(self): + self.make_uppercase() + md = metadata('distinfo-pkg').json + assert 'name' in md + assert len(md['requires_dist']) == 2 + assert md['keywords'] == ['SAMPLE', 'PACKAGE'] + class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase): def test_name_normalization(self): diff --git a/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst b/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst new file mode 100644 index 0000000..45f29a8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst @@ -0,0 +1,4 @@ +In ``importlib.metadata``, provide a uniform interface to ``Description``, +allow for any field to be encoded with multiline values, remove continuation +lines from multiline values, and add a ``.json`` property for easy access to +the PEP 566 JSON-compatible form. Sync with ``importlib_metadata 4.0``. -- cgit v0.12