diff options
Diffstat (limited to 'Lib/packaging/database.py')
-rw-r--r-- | Lib/packaging/database.py | 650 |
1 files changed, 650 insertions, 0 deletions
diff --git a/Lib/packaging/database.py b/Lib/packaging/database.py new file mode 100644 index 0000000..b2fcb97 --- /dev/null +++ b/Lib/packaging/database.py @@ -0,0 +1,650 @@ +"""PEP 376 implementation.""" + +import os +import re +import csv +import sys +import zipimport +from io import StringIO +from hashlib import md5 + +from packaging import logger +from packaging.errors import PackagingError +from packaging.version import suggest_normalized_version, VersionPredicate +from packaging.metadata import Metadata + + +__all__ = [ + 'Distribution', 'EggInfoDistribution', 'distinfo_dirname', + 'get_distributions', 'get_distribution', 'get_file_users', + 'provides_distribution', 'obsoletes_distribution', + 'enable_cache', 'disable_cache', 'clear_cache', + 'get_file_path', 'get_file'] + + +# TODO update docs + +DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES') + +# Cache +_cache_name = {} # maps names to Distribution instances +_cache_name_egg = {} # maps names to EggInfoDistribution instances +_cache_path = {} # maps paths to Distribution instances +_cache_path_egg = {} # maps paths to EggInfoDistribution instances +_cache_generated = False # indicates if .dist-info distributions are cached +_cache_generated_egg = False # indicates if .dist-info and .egg are cached +_cache_enabled = True + + +def enable_cache(): + """ + Enables the internal cache. + + Note that this function will not clear the cache in any case, for that + functionality see :func:`clear_cache`. + """ + global _cache_enabled + + _cache_enabled = True + + +def disable_cache(): + """ + Disables the internal cache. + + Note that this function will not clear the cache in any case, for that + functionality see :func:`clear_cache`. + """ + global _cache_enabled + + _cache_enabled = False + + +def clear_cache(): + """ Clears the internal cache. """ + global _cache_generated, _cache_generated_egg + + _cache_name.clear() + _cache_name_egg.clear() + _cache_path.clear() + _cache_path_egg.clear() + _cache_generated = False + _cache_generated_egg = False + + +def _yield_distributions(include_dist, include_egg, paths): + """ + Yield .dist-info and .egg(-info) distributions, based on the arguments + + :parameter include_dist: yield .dist-info distributions + :parameter include_egg: yield .egg(-info) distributions + """ + for path in paths: + realpath = os.path.realpath(path) + if not os.path.isdir(realpath): + continue + for dir in os.listdir(realpath): + dist_path = os.path.join(realpath, dir) + if include_dist and dir.endswith('.dist-info'): + yield Distribution(dist_path) + elif include_egg and (dir.endswith('.egg-info') or + dir.endswith('.egg')): + yield EggInfoDistribution(dist_path) + + +def _generate_cache(use_egg_info, paths): + global _cache_generated, _cache_generated_egg + + if _cache_generated_egg or (_cache_generated and not use_egg_info): + return + else: + gen_dist = not _cache_generated + gen_egg = use_egg_info + + for dist in _yield_distributions(gen_dist, gen_egg, paths): + if isinstance(dist, Distribution): + _cache_path[dist.path] = dist + if dist.name not in _cache_name: + _cache_name[dist.name] = [] + _cache_name[dist.name].append(dist) + else: + _cache_path_egg[dist.path] = dist + if dist.name not in _cache_name_egg: + _cache_name_egg[dist.name] = [] + _cache_name_egg[dist.name].append(dist) + + if gen_dist: + _cache_generated = True + if gen_egg: + _cache_generated_egg = True + + +class Distribution: + """Created with the *path* of the ``.dist-info`` directory provided to the + constructor. It reads the metadata contained in ``METADATA`` when it is + instantiated.""" + + name = '' + """The name of the distribution.""" + + version = '' + """The version of the distribution.""" + + metadata = None + """A :class:`packaging.metadata.Metadata` instance loaded with + the distribution's ``METADATA`` file.""" + + requested = False + """A boolean that indicates whether the ``REQUESTED`` metadata file is + present (in other words, whether the package was installed by user + request or it was installed as a dependency).""" + + def __init__(self, path): + if _cache_enabled and path in _cache_path: + self.metadata = _cache_path[path].metadata + else: + metadata_path = os.path.join(path, 'METADATA') + self.metadata = Metadata(path=metadata_path) + + self.name = self.metadata['Name'] + self.version = self.metadata['Version'] + self.path = path + + if _cache_enabled and path not in _cache_path: + _cache_path[path] = self + + def __repr__(self): + return '<Distribution %r %s at %r>' % ( + self.name, self.version, self.path) + + def _get_records(self, local=False): + results = [] + with self.get_distinfo_file('RECORD') as record: + record_reader = csv.reader(record, delimiter=',', + lineterminator='\n') + for row in record_reader: + missing = [None for i in range(len(row), 3)] + path, checksum, size = row + missing + if local: + path = path.replace('/', os.sep) + path = os.path.join(sys.prefix, path) + results.append((path, checksum, size)) + return results + + def get_resource_path(self, relative_path): + with self.get_distinfo_file('RESOURCES') as resources_file: + resources_reader = csv.reader(resources_file, delimiter=',', + lineterminator='\n') + for relative, destination in resources_reader: + if relative == relative_path: + return destination + raise KeyError( + 'no resource file with relative path %r is installed' % + relative_path) + + def list_installed_files(self, local=False): + """ + Iterates over the ``RECORD`` entries and returns a tuple + ``(path, md5, size)`` for each line. If *local* is ``True``, + the returned path is transformed into a local absolute path. + Otherwise the raw value from RECORD is returned. + + A local absolute path is an absolute path in which occurrences of + ``'/'`` have been replaced by the system separator given by ``os.sep``. + + :parameter local: flag to say if the path should be returned as a local + absolute path + + :type local: boolean + :returns: iterator of (path, md5, size) + """ + for result in self._get_records(local): + yield result + + def uses(self, path): + """ + Returns ``True`` if path is listed in ``RECORD``. *path* can be a local + absolute path or a relative ``'/'``-separated path. + + :rtype: boolean + """ + for p, checksum, size in self._get_records(): + local_absolute = os.path.join(sys.prefix, p) + if path == p or path == local_absolute: + return True + return False + + def get_distinfo_file(self, path, binary=False): + """ + Returns a file located under the ``.dist-info`` directory. Returns a + ``file`` instance for the file pointed by *path*. + + :parameter path: a ``'/'``-separated path relative to the + ``.dist-info`` directory or an absolute path; + If *path* is an absolute path and doesn't start + with the ``.dist-info`` directory path, + a :class:`PackagingError` is raised + :type path: string + :parameter binary: If *binary* is ``True``, opens the file in read-only + binary mode (``rb``), otherwise opens it in + read-only mode (``r``). + :rtype: file object + """ + open_flags = 'r' + if binary: + open_flags += 'b' + + # Check if it is an absolute path # XXX use relpath, add tests + if path.find(os.sep) >= 0: + # it's an absolute path? + distinfo_dirname, path = path.split(os.sep)[-2:] + if distinfo_dirname != self.path.split(os.sep)[-1]: + raise PackagingError( + 'dist-info file %r does not belong to the %r %s ' + 'distribution' % (path, self.name, self.version)) + + # The file must be relative + if path not in DIST_FILES: + raise PackagingError('invalid path for a dist-info file: %r' % + path) + + path = os.path.join(self.path, path) + return open(path, open_flags) + + def list_distinfo_files(self, local=False): + """ + Iterates over the ``RECORD`` entries and returns paths for each line if + the path is pointing to a file located in the ``.dist-info`` directory + or one of its subdirectories. + + :parameter local: If *local* is ``True``, each returned path is + transformed into a local absolute path. Otherwise the + raw value from ``RECORD`` is returned. + :type local: boolean + :returns: iterator of paths + """ + for path, checksum, size in self._get_records(local): + # XXX add separator or use real relpath algo + if path.startswith(self.path): + yield path + + def __eq__(self, other): + return isinstance(other, Distribution) and self.path == other.path + + # See http://docs.python.org/reference/datamodel#object.__hash__ + __hash__ = object.__hash__ + + +class EggInfoDistribution: + """Created with the *path* of the ``.egg-info`` directory or file provided + to the constructor. It reads the metadata contained in the file itself, or + if the given path happens to be a directory, the metadata is read from the + file ``PKG-INFO`` under that directory.""" + + name = '' + """The name of the distribution.""" + + version = '' + """The version of the distribution.""" + + metadata = None + """A :class:`packaging.metadata.Metadata` instance loaded with + the distribution's ``METADATA`` file.""" + + _REQUIREMENT = re.compile( + r'(?P<name>[-A-Za-z0-9_.]+)\s*' + r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*' + r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*' + r'(?P<extras>\[.*\])?') + + def __init__(self, path): + self.path = path + if _cache_enabled and path in _cache_path_egg: + self.metadata = _cache_path_egg[path].metadata + self.name = self.metadata['Name'] + self.version = self.metadata['Version'] + return + + # reused from Distribute's pkg_resources + def yield_lines(strs): + """Yield non-empty/non-comment lines of a ``basestring`` + or sequence""" + if isinstance(strs, str): + for s in strs.splitlines(): + s = s.strip() + # skip blank lines/comments + if s and not s.startswith('#'): + yield s + else: + for ss in strs: + for s in yield_lines(ss): + yield s + + requires = None + + if path.endswith('.egg'): + if os.path.isdir(path): + meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO') + self.metadata = Metadata(path=meta_path) + try: + req_path = os.path.join(path, 'EGG-INFO', 'requires.txt') + with open(req_path, 'r') as fp: + requires = fp.read() + except IOError: + requires = None + else: + # FIXME handle the case where zipfile is not available + zipf = zipimport.zipimporter(path) + fileobj = StringIO( + zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8')) + self.metadata = Metadata(fileobj=fileobj) + try: + requires = zipf.get_data('EGG-INFO/requires.txt') + except IOError: + requires = None + self.name = self.metadata['Name'] + self.version = self.metadata['Version'] + + elif path.endswith('.egg-info'): + if os.path.isdir(path): + path = os.path.join(path, 'PKG-INFO') + try: + with open(os.path.join(path, 'requires.txt'), 'r') as fp: + requires = fp.read() + except IOError: + requires = None + self.metadata = Metadata(path=path) + self.name = self.metadata['Name'] + self.version = self.metadata['Version'] + + else: + raise ValueError('path must end with .egg-info or .egg, got %r' % + path) + + if requires is not None: + if self.metadata['Metadata-Version'] == '1.1': + # we can't have 1.1 metadata *and* Setuptools requires + for field in ('Obsoletes', 'Requires', 'Provides'): + del self.metadata[field] + + reqs = [] + + if requires is not None: + for line in yield_lines(requires): + if line.startswith('['): + logger.warning( + 'extensions in requires.txt are not supported ' + '(used by %r %s)', self.name, self.version) + break + else: + match = self._REQUIREMENT.match(line.strip()) + if not match: + # this happens when we encounter extras; since they + # are written at the end of the file we just exit + break + else: + if match.group('extras'): + msg = ('extra requirements are not supported ' + '(used by %r %s)', self.name, self.version) + logger.warning(msg, self.name) + name = match.group('name') + version = None + if match.group('first'): + version = match.group('first') + if match.group('rest'): + version += match.group('rest') + version = version.replace(' ', '') # trim spaces + if version is None: + reqs.append(name) + else: + reqs.append('%s (%s)' % (name, version)) + + if len(reqs) > 0: + self.metadata['Requires-Dist'] += reqs + + if _cache_enabled: + _cache_path_egg[self.path] = self + + def __repr__(self): + return '<EggInfoDistribution %r %s at %r>' % ( + self.name, self.version, self.path) + + def list_installed_files(self, local=False): + + def _md5(path): + with open(path, 'rb') as f: + content = f.read() + return md5(content).hexdigest() + + def _size(path): + return os.stat(path).st_size + + path = self.path + if local: + path = path.replace('/', os.sep) + + # XXX What about scripts and data files ? + if os.path.isfile(path): + return [(path, _md5(path), _size(path))] + else: + files = [] + for root, dir, files_ in os.walk(path): + for item in files_: + item = os.path.join(root, item) + files.append((item, _md5(item), _size(item))) + return files + + return [] + + def uses(self, path): + return False + + def __eq__(self, other): + return (isinstance(other, EggInfoDistribution) and + self.path == other.path) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + __hash__ = object.__hash__ + + +def distinfo_dirname(name, version): + """ + The *name* and *version* parameters are converted into their + filename-escaped form, i.e. any ``'-'`` characters are replaced + with ``'_'`` other than the one in ``'dist-info'`` and the one + separating the name from the version number. + + :parameter name: is converted to a standard distribution name by replacing + any runs of non- alphanumeric characters with a single + ``'-'``. + :type name: string + :parameter version: is converted to a standard version string. Spaces + become dots, and all other non-alphanumeric characters + (except dots) become dashes, with runs of multiple + dashes condensed to a single dash. + :type version: string + :returns: directory name + :rtype: string""" + file_extension = '.dist-info' + name = name.replace('-', '_') + normalized_version = suggest_normalized_version(version) + # Because this is a lookup procedure, something will be returned even if + # it is a version that cannot be normalized + if normalized_version is None: + # Unable to achieve normality? + normalized_version = version + return '-'.join([name, normalized_version]) + file_extension + + +def get_distributions(use_egg_info=False, paths=None): + """ + Provides an iterator that looks for ``.dist-info`` directories in + ``sys.path`` and returns :class:`Distribution` instances for each one of + them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info`` + files and directores are iterated as well. + + :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution` + instances + """ + if paths is None: + paths = sys.path + + if not _cache_enabled: + for dist in _yield_distributions(True, use_egg_info, paths): + yield dist + else: + _generate_cache(use_egg_info, paths) + + for dist in _cache_path.values(): + yield dist + + if use_egg_info: + for dist in _cache_path_egg.values(): + yield dist + + +def get_distribution(name, use_egg_info=False, paths=None): + """ + Scans all elements in ``sys.path`` and looks for all directories + ending with ``.dist-info``. Returns a :class:`Distribution` + corresponding to the ``.dist-info`` directory that contains the + ``METADATA`` that matches *name* for the *name* metadata field. + If no distribution exists with the given *name* and the parameter + *use_egg_info* is set to ``True``, then all files and directories ending + with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is + returned if one is found that has metadata that matches *name* for the + *name* metadata field. + + This function only returns the first result found, as no more than one + value is expected. If the directory is not found, ``None`` is returned. + + :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None + """ + if paths is None: + paths = sys.path + + if not _cache_enabled: + for dist in _yield_distributions(True, use_egg_info, paths): + if dist.name == name: + return dist + else: + _generate_cache(use_egg_info, paths) + + if name in _cache_name: + return _cache_name[name][0] + elif use_egg_info and name in _cache_name_egg: + return _cache_name_egg[name][0] + else: + return None + + +def obsoletes_distribution(name, version=None, use_egg_info=False): + """ + Iterates over all distributions to find which distributions obsolete + *name*. + + If a *version* is provided, it will be used to filter the results. + If the argument *use_egg_info* is set to ``True``, then ``.egg-info`` + distributions will be considered as well. + + :type name: string + :type version: string + :parameter name: + """ + for dist in get_distributions(use_egg_info): + obsoleted = (dist.metadata['Obsoletes-Dist'] + + dist.metadata['Obsoletes']) + for obs in obsoleted: + o_components = obs.split(' ', 1) + if len(o_components) == 1 or version is None: + if name == o_components[0]: + yield dist + break + else: + try: + predicate = VersionPredicate(obs) + except ValueError: + raise PackagingError( + 'distribution %r has ill-formed obsoletes field: ' + '%r' % (dist.name, obs)) + if name == o_components[0] and predicate.match(version): + yield dist + break + + +def provides_distribution(name, version=None, use_egg_info=False): + """ + Iterates over all distributions to find which distributions provide *name*. + If a *version* is provided, it will be used to filter the results. Scans + all elements in ``sys.path`` and looks for all directories ending with + ``.dist-info``. Returns a :class:`Distribution` corresponding to the + ``.dist-info`` directory that contains a ``METADATA`` that matches *name* + for the name metadata. If the argument *use_egg_info* is set to ``True``, + then all files and directories ending with ``.egg-info`` are considered + as well and returns an :class:`EggInfoDistribution` instance. + + This function only returns the first result found, since no more than + one values are expected. If the directory is not found, returns ``None``. + + :parameter version: a version specifier that indicates the version + required, conforming to the format in ``PEP-345`` + + :type name: string + :type version: string + """ + predicate = None + if not version is None: + try: + predicate = VersionPredicate(name + ' (' + version + ')') + except ValueError: + raise PackagingError('invalid name or version: %r, %r' % + (name, version)) + + for dist in get_distributions(use_egg_info): + provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides'] + + for p in provided: + p_components = p.rsplit(' ', 1) + if len(p_components) == 1 or predicate is None: + if name == p_components[0]: + yield dist + break + else: + p_name, p_ver = p_components + if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')': + raise PackagingError( + 'distribution %r has invalid Provides field: %r' % + (dist.name, p)) + p_ver = p_ver[1:-1] # trim off the parenthesis + if p_name == name and predicate.match(p_ver): + yield dist + break + + +def get_file_users(path): + """ + Iterates over all distributions to find out which distributions use + *path*. + + :parameter path: can be a local absolute path or a relative + ``'/'``-separated path. + :type path: string + :rtype: iterator of :class:`Distribution` instances + """ + for dist in get_distributions(): + if dist.uses(path): + yield dist + + +def get_file_path(distribution_name, relative_path): + """Return the path to a resource file.""" + dist = get_distribution(distribution_name) + if dist is not None: + return dist.get_resource_path(relative_path) + raise LookupError('no distribution named %r found' % distribution_name) + + +def get_file(distribution_name, relative_path, *args, **kwargs): + """Open and return a resource file.""" + return open(get_file_path(distribution_name, relative_path), + *args, **kwargs) |