From 410b70d39d9d77384f8b8597560f6731530149ca Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 31 May 2021 11:52:29 -0400 Subject: bpo-44246: Entry points performance improvements. (GH-26467) From importlib_metadata 4.3.1. --- Lib/importlib/metadata/__init__.py | 24 +++++++++++++++++++++- Lib/test/test_importlib/test_zip.py | 4 ++++ .../2021-05-31-11-28-03.bpo-44246.nhmt-v.rst | 3 +++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 94b8386..2e3403e 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -494,6 +494,11 @@ class Distribution: return self.metadata['Name'] @property + def _normalized_name(self): + """Return a normalized version of the name.""" + return Prepared.normalize(self.name) + + @property def version(self): """Return the 'Version' metadata for the distribution package.""" return self.metadata['Version'] @@ -795,6 +800,22 @@ class PathDistribution(Distribution): def locate_file(self, path): return self._path.parent / path + @property + def _normalized_name(self): + """ + Performance optimization: where possible, resolve the + normalized name from the file system path. + """ + stem = os.path.basename(str(self._path)) + return self._name_from_stem(stem) or super()._normalized_name + + def _name_from_stem(self, stem): + name, ext = os.path.splitext(stem) + if ext not in ('.dist-info', '.egg-info'): + return + name, sep, rest = stem.partition('-') + return name + def distribution(distribution_name): """Get the ``Distribution`` instance for the named package. @@ -849,7 +870,8 @@ def entry_points(**params) -> Union[EntryPoints, SelectableGroups]: :return: EntryPoints or SelectableGroups for all installed packages. """ - unique = functools.partial(unique_everseen, key=operator.attrgetter('name')) + norm_name = operator.attrgetter('_normalized_name') + unique = functools.partial(unique_everseen, key=norm_name) eps = itertools.chain.from_iterable( dist.entry_points for dist in unique(distributions()) ) diff --git a/Lib/test/test_importlib/test_zip.py b/Lib/test/test_importlib/test_zip.py index 83e0413..bf16a3b 100644 --- a/Lib/test/test_importlib/test_zip.py +++ b/Lib/test/test_importlib/test_zip.py @@ -76,3 +76,7 @@ class TestEgg(TestZip): for file in files('example'): path = str(file.dist.locate_file(file)) assert '.egg/' in path, path + + def test_normalized_name(self): + dist = distribution('example') + assert dist._normalized_name == 'example' diff --git a/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst b/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst new file mode 100644 index 0000000..727d9fd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst @@ -0,0 +1,3 @@ +In importlib.metadata.entry_points, de-duplication of distributions no +longer requires loading the full metadata for PathDistribution objects, +improving entry point loading performance by ~10x. -- cgit v0.12