diff options
author | Éric Araujo <merwok@netwok.org> | 2012-06-24 04:07:41 (GMT) |
---|---|---|
committer | Éric Araujo <merwok@netwok.org> | 2012-06-24 04:07:41 (GMT) |
commit | 859aad6a36262383b98ddd45fe3253a882b87ce8 (patch) | |
tree | 1cc50af4fc88c650fe997a2e72f5f26d92a1986c /Lib/packaging/pypi | |
parent | dc44f55cc9dc1d016799362c344958baab328ff4 (diff) | |
download | cpython-859aad6a36262383b98ddd45fe3253a882b87ce8.zip cpython-859aad6a36262383b98ddd45fe3253a882b87ce8.tar.gz cpython-859aad6a36262383b98ddd45fe3253a882b87ce8.tar.bz2 |
Remove packaging from the standard library.
Distutils2 will live on on PyPI and be included in the stdlib when it
is ready. See discussion starting at
http://mail.python.org/pipermail/python-dev/2012-June/120430.html
Diffstat (limited to 'Lib/packaging/pypi')
-rw-r--r-- | Lib/packaging/pypi/__init__.py | 9 | ||||
-rw-r--r-- | Lib/packaging/pypi/base.py | 48 | ||||
-rw-r--r-- | Lib/packaging/pypi/dist.py | 544 | ||||
-rw-r--r-- | Lib/packaging/pypi/errors.py | 39 | ||||
-rw-r--r-- | Lib/packaging/pypi/mirrors.py | 52 | ||||
-rw-r--r-- | Lib/packaging/pypi/simple.py | 462 | ||||
-rw-r--r-- | Lib/packaging/pypi/wrapper.py | 99 | ||||
-rw-r--r-- | Lib/packaging/pypi/xmlrpc.py | 200 |
8 files changed, 0 insertions, 1453 deletions
diff --git a/Lib/packaging/pypi/__init__.py b/Lib/packaging/pypi/__init__.py deleted file mode 100644 index 5660c50..0000000 --- a/Lib/packaging/pypi/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Low-level and high-level APIs to interact with project indexes.""" - -__all__ = ['simple', - 'xmlrpc', - 'dist', - 'errors', - 'mirrors'] - -from packaging.pypi.dist import ReleaseInfo, ReleasesList, DistInfo diff --git a/Lib/packaging/pypi/base.py b/Lib/packaging/pypi/base.py deleted file mode 100644 index 305fca9..0000000 --- a/Lib/packaging/pypi/base.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Base class for index crawlers.""" - -from packaging.pypi.dist import ReleasesList - - -class BaseClient: - """Base class containing common methods for the index crawlers/clients""" - - def __init__(self, prefer_final, prefer_source): - self._prefer_final = prefer_final - self._prefer_source = prefer_source - self._index = self - - def _get_prefer_final(self, prefer_final=None): - """Return the prefer_final internal parameter or the specified one if - provided""" - if prefer_final: - return prefer_final - else: - return self._prefer_final - - def _get_prefer_source(self, prefer_source=None): - """Return the prefer_source internal parameter or the specified one if - provided""" - if prefer_source: - return prefer_source - else: - return self._prefer_source - - def _get_project(self, project_name): - """Return an project instance, create it if necessary""" - return self._projects.setdefault(project_name.lower(), - ReleasesList(project_name, index=self._index)) - - def download_distribution(self, requirements, temp_path=None, - prefer_source=None, prefer_final=None): - """Download a distribution from the last release according to the - requirements. - - If temp_path is provided, download to this path, otherwise, create a - temporary location for the download and return it. - """ - prefer_final = self._get_prefer_final(prefer_final) - prefer_source = self._get_prefer_source(prefer_source) - release = self.get_release(requirements, prefer_final) - if release: - dist = release.get_distribution(prefer_source=prefer_source) - return dist.download(temp_path) diff --git a/Lib/packaging/pypi/dist.py b/Lib/packaging/pypi/dist.py deleted file mode 100644 index 541465e..0000000 --- a/Lib/packaging/pypi/dist.py +++ /dev/null @@ -1,544 +0,0 @@ -"""Classes representing releases and distributions retrieved from indexes. - -A project (= unique name) can have several releases (= versions) and -each release can have several distributions (= sdist and bdists). - -Release objects contain metadata-related information (see PEP 376); -distribution objects contain download-related information. -""" - -import re -import hashlib -import tempfile -import urllib.request -import urllib.parse -import urllib.error -import urllib.parse -from shutil import unpack_archive - -from packaging.errors import IrrationalVersionError -from packaging.version import (suggest_normalized_version, NormalizedVersion, - get_version_predicate) -from packaging.metadata import Metadata -from packaging.pypi.errors import (HashDoesNotMatch, UnsupportedHashName, - CantParseArchiveName) - - -__all__ = ['ReleaseInfo', 'DistInfo', 'ReleasesList', 'get_infos_from_url'] - -EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz .egg".split() -MD5_HASH = re.compile(r'^.*#md5=([a-f0-9]+)$') -DIST_TYPES = ['bdist', 'sdist'] - - -class IndexReference: - """Mixin used to store the index reference""" - def set_index(self, index=None): - self._index = index - - -class ReleaseInfo(IndexReference): - """Represent a release of a project (a project with a specific version). - The release contain the _metadata informations related to this specific - version, and is also a container for distribution related informations. - - See the DistInfo class for more information about distributions. - """ - - def __init__(self, name, version, metadata=None, hidden=False, - index=None, **kwargs): - """ - :param name: the name of the distribution - :param version: the version of the distribution - :param metadata: the metadata fields of the release. - :type metadata: dict - :param kwargs: optional arguments for a new distribution. - """ - self.set_index(index) - self.name = name - self._version = None - self.version = version - if metadata: - self.metadata = Metadata(mapping=metadata) - else: - self.metadata = None - self.dists = {} - self.hidden = hidden - - if 'dist_type' in kwargs: - dist_type = kwargs.pop('dist_type') - self.add_distribution(dist_type, **kwargs) - - def set_version(self, version): - try: - self._version = NormalizedVersion(version) - except IrrationalVersionError: - suggestion = suggest_normalized_version(version) - if suggestion: - self.version = suggestion - else: - raise IrrationalVersionError(version) - - def get_version(self): - return self._version - - version = property(get_version, set_version) - - def fetch_metadata(self): - """If the metadata is not set, use the indexes to get it""" - if not self.metadata: - self._index.get_metadata(self.name, str(self.version)) - return self.metadata - - @property - def is_final(self): - """proxy to version.is_final""" - return self.version.is_final - - def fetch_distributions(self): - if self.dists is None: - self._index.get_distributions(self.name, str(self.version)) - if self.dists is None: - self.dists = {} - return self.dists - - def add_distribution(self, dist_type='sdist', python_version=None, - **params): - """Add distribution informations to this release. - If distribution information is already set for this distribution type, - add the given url paths to the distribution. This can be useful while - some of them fails to download. - - :param dist_type: the distribution type (eg. "sdist", "bdist", etc.) - :param params: the fields to be passed to the distribution object - (see the :class:DistInfo constructor). - """ - if dist_type not in DIST_TYPES: - raise ValueError(dist_type) - if dist_type in self.dists: - self.dists[dist_type].add_url(**params) - else: - self.dists[dist_type] = DistInfo(self, dist_type, - index=self._index, **params) - if python_version: - self.dists[dist_type].python_version = python_version - - def get_distribution(self, dist_type=None, prefer_source=True): - """Return a distribution. - - If dist_type is set, find first for this distribution type, and just - act as an alias of __get_item__. - - If prefer_source is True, search first for source distribution, and if - not return one existing distribution. - """ - if len(self.dists) == 0: - raise LookupError - if dist_type: - return self[dist_type] - if prefer_source: - if "sdist" in self.dists: - dist = self["sdist"] - else: - dist = next(self.dists.values()) - return dist - - def unpack(self, path=None, prefer_source=True): - """Unpack the distribution to the given path. - - If not destination is given, creates a temporary location. - - Returns the location of the extracted files (root). - """ - return self.get_distribution(prefer_source=prefer_source)\ - .unpack(path=path) - - def download(self, temp_path=None, prefer_source=True): - """Download the distribution, using the requirements. - - If more than one distribution match the requirements, use the last - version. - Download the distribution, and put it in the temp_path. If no temp_path - is given, creates and return one. - - Returns the complete absolute path to the downloaded archive. - """ - return self.get_distribution(prefer_source=prefer_source)\ - .download(path=temp_path) - - def set_metadata(self, metadata): - if not self.metadata: - self.metadata = Metadata() - self.metadata.update(metadata) - - def __getitem__(self, item): - """distributions are available using release["sdist"]""" - return self.dists[item] - - def _check_is_comparable(self, other): - if not isinstance(other, ReleaseInfo): - raise TypeError("cannot compare %s and %s" - % (type(self).__name__, type(other).__name__)) - elif self.name != other.name: - raise TypeError("cannot compare %s and %s" - % (self.name, other.name)) - - def __repr__(self): - return "<%s %s>" % (self.name, self.version) - - def __eq__(self, other): - self._check_is_comparable(other) - return self.version == other.version - - def __lt__(self, other): - self._check_is_comparable(other) - return self.version < other.version - - def __ne__(self, other): - return not self.__eq__(other) - - def __gt__(self, other): - return not (self.__lt__(other) or self.__eq__(other)) - - def __le__(self, other): - return self.__eq__(other) or self.__lt__(other) - - def __ge__(self, other): - return self.__eq__(other) or self.__gt__(other) - - # See http://docs.python.org/reference/datamodel#object.__hash__ - __hash__ = object.__hash__ - - -class DistInfo(IndexReference): - """Represents a distribution retrieved from an index (sdist, bdist, ...) - """ - - def __init__(self, release, dist_type=None, url=None, hashname=None, - hashval=None, is_external=True, python_version=None, - index=None): - """Create a new instance of DistInfo. - - :param release: a DistInfo class is relative to a release. - :param dist_type: the type of the dist (eg. source, bin-*, etc.) - :param url: URL where we found this distribution - :param hashname: the name of the hash we want to use. Refer to the - hashlib.new documentation for more information. - :param hashval: the hash value. - :param is_external: we need to know if the provided url comes from - an index browsing, or from an external resource. - - """ - self.set_index(index) - self.release = release - self.dist_type = dist_type - self.python_version = python_version - self._unpacked_dir = None - # set the downloaded path to None by default. The goal here - # is to not download distributions multiple times - self.downloaded_location = None - # We store urls in dict, because we need to have a bit more infos - # than the simple URL. It will be used later to find the good url to - # use. - # We have two _url* attributes: _url and urls. urls contains a list - # of dict for the different urls, and _url contains the choosen url, in - # order to dont make the selection process multiple times. - self.urls = [] - self._url = None - self.add_url(url, hashname, hashval, is_external) - - def add_url(self, url=None, hashname=None, hashval=None, is_external=True): - """Add a new url to the list of urls""" - if hashname is not None: - try: - hashlib.new(hashname) - except ValueError: - raise UnsupportedHashName(hashname) - if url not in [u['url'] for u in self.urls]: - self.urls.append({ - 'url': url, - 'hashname': hashname, - 'hashval': hashval, - 'is_external': is_external, - }) - # reset the url selection process - self._url = None - - @property - def url(self): - """Pick up the right url for the list of urls in self.urls""" - # We return internal urls over externals. - # If there is more than one internal or external, return the first - # one. - if self._url is None: - if len(self.urls) > 1: - internals_urls = [u for u in self.urls \ - if u['is_external'] == False] - if len(internals_urls) >= 1: - self._url = internals_urls[0] - if self._url is None: - self._url = self.urls[0] - return self._url - - @property - def is_source(self): - """return if the distribution is a source one or not""" - return self.dist_type == 'sdist' - - def download(self, path=None): - """Download the distribution to a path, and return it. - - If the path is given in path, use this, otherwise, generates a new one - Return the download location. - """ - if path is None: - path = tempfile.mkdtemp() - - # if we do not have downloaded it yet, do it. - if self.downloaded_location is None: - url = self.url['url'] - archive_name = urllib.parse.urlparse(url)[2].split('/')[-1] - filename, headers = urllib.request.urlretrieve(url, - path + "/" + archive_name) - self.downloaded_location = filename - self._check_md5(filename) - return self.downloaded_location - - def unpack(self, path=None): - """Unpack the distribution to the given path. - - If not destination is given, creates a temporary location. - - Returns the location of the extracted files (root). - """ - if not self._unpacked_dir: - if path is None: - path = tempfile.mkdtemp() - - filename = self.download(path) - unpack_archive(filename, path) - self._unpacked_dir = path - - return path - - def _check_md5(self, filename): - """Check that the md5 checksum of the given file matches the one in - url param""" - hashname = self.url['hashname'] - expected_hashval = self.url['hashval'] - if None not in (expected_hashval, hashname): - with open(filename, 'rb') as f: - hashval = hashlib.new(hashname) - hashval.update(f.read()) - - if hashval.hexdigest() != expected_hashval: - raise HashDoesNotMatch("got %s instead of %s" - % (hashval.hexdigest(), expected_hashval)) - - def __repr__(self): - if self.release is None: - return "<? ? %s>" % self.dist_type - - return "<%s %s %s>" % ( - self.release.name, self.release.version, self.dist_type or "") - - -class ReleasesList(IndexReference): - """A container of Release. - - Provides useful methods and facilities to sort and filter releases. - """ - def __init__(self, name, releases=None, contains_hidden=False, index=None): - self.set_index(index) - self.releases = [] - self.name = name - self.contains_hidden = contains_hidden - if releases: - self.add_releases(releases) - - def fetch_releases(self): - self._index.get_releases(self.name) - return self.releases - - def filter(self, predicate): - """Filter and return a subset of releases matching the given predicate. - """ - return ReleasesList(self.name, [release for release in self.releases - if predicate.match(release.version)], - index=self._index) - - def get_last(self, requirements, prefer_final=None): - """Return the "last" release, that satisfy the given predicates. - - "last" is defined by the version number of the releases, you also could - set prefer_final parameter to True or False to change the order results - """ - predicate = get_version_predicate(requirements) - releases = self.filter(predicate) - if len(releases) == 0: - return None - releases.sort_releases(prefer_final, reverse=True) - return releases[0] - - def add_releases(self, releases): - """Add releases in the release list. - - :param: releases is a list of ReleaseInfo objects. - """ - for r in releases: - self.add_release(release=r) - - def add_release(self, version=None, dist_type='sdist', release=None, - **dist_args): - """Add a release to the list. - - The release can be passed in the `release` parameter, and in this case, - it will be crawled to extract the useful informations if necessary, or - the release informations can be directly passed in the `version` and - `dist_type` arguments. - - Other keywords arguments can be provided, and will be forwarded to the - distribution creation (eg. the arguments of the DistInfo constructor). - """ - if release: - if release.name.lower() != self.name.lower(): - raise ValueError("%s is not the same project as %s" % - (release.name, self.name)) - version = str(release.version) - - if version not in self.get_versions(): - # append only if not already exists - self.releases.append(release) - for dist in release.dists.values(): - for url in dist.urls: - self.add_release(version, dist.dist_type, **url) - else: - matches = [r for r in self.releases - if str(r.version) == version and r.name == self.name] - if not matches: - release = ReleaseInfo(self.name, version, index=self._index) - self.releases.append(release) - else: - release = matches[0] - - release.add_distribution(dist_type=dist_type, **dist_args) - - def sort_releases(self, prefer_final=False, reverse=True, *args, **kwargs): - """Sort the results with the given properties. - - The `prefer_final` argument can be used to specify if final - distributions (eg. not dev, beta or alpha) would be preferred or not. - - Results can be inverted by using `reverse`. - - Any other parameter provided will be forwarded to the sorted call. You - cannot redefine the key argument of "sorted" here, as it is used - internally to sort the releases. - """ - - sort_by = [] - if prefer_final: - sort_by.append("is_final") - sort_by.append("version") - - self.releases.sort( - key=lambda i: tuple(getattr(i, arg) for arg in sort_by), - reverse=reverse, *args, **kwargs) - - def get_release(self, version): - """Return a release from its version.""" - matches = [r for r in self.releases if str(r.version) == version] - if len(matches) != 1: - raise KeyError(version) - return matches[0] - - def get_versions(self): - """Return a list of releases versions contained""" - return [str(r.version) for r in self.releases] - - def __getitem__(self, key): - return self.releases[key] - - def __len__(self): - return len(self.releases) - - def __repr__(self): - string = 'Project "%s"' % self.name - if self.get_versions(): - string += ' versions: %s' % ', '.join(self.get_versions()) - return '<%s>' % string - - -def get_infos_from_url(url, probable_dist_name=None, is_external=True): - """Get useful informations from an URL. - - Return a dict of (name, version, url, hashtype, hash, is_external) - - :param url: complete url of the distribution - :param probable_dist_name: A probable name of the project. - :param is_external: Tell if the url commes from an index or from - an external URL. - """ - # if the url contains a md5 hash, get it. - md5_hash = None - match = MD5_HASH.match(url) - if match is not None: - md5_hash = match.group(1) - # remove the hash - url = url.replace("#md5=%s" % md5_hash, "") - - # parse the archive name to find dist name and version - archive_name = urllib.parse.urlparse(url)[2].split('/')[-1] - extension_matched = False - # remove the extension from the name - for ext in EXTENSIONS: - if archive_name.endswith(ext): - archive_name = archive_name[:-len(ext)] - extension_matched = True - - name, version = split_archive_name(archive_name) - if extension_matched is True: - return {'name': name, - 'version': version, - 'url': url, - 'hashname': "md5", - 'hashval': md5_hash, - 'is_external': is_external, - 'dist_type': 'sdist'} - - -def split_archive_name(archive_name, probable_name=None): - """Split an archive name into two parts: name and version. - - Return the tuple (name, version) - """ - # Try to determine wich part is the name and wich is the version using the - # "-" separator. Take the larger part to be the version number then reduce - # if this not works. - def eager_split(str, maxsplit=2): - # split using the "-" separator - splits = str.rsplit("-", maxsplit) - name = splits[0] - version = "-".join(splits[1:]) - if version.startswith("-"): - version = version[1:] - if suggest_normalized_version(version) is None and maxsplit >= 0: - # we dont get a good version number: recurse ! - return eager_split(str, maxsplit - 1) - else: - return name, version - if probable_name is not None: - probable_name = probable_name.lower() - name = None - if probable_name is not None and probable_name in archive_name: - # we get the name from probable_name, if given. - name = probable_name - version = archive_name.lstrip(name) - else: - name, version = eager_split(archive_name) - - version = suggest_normalized_version(version) - if version is not None and name != "": - return name.lower(), version - else: - raise CantParseArchiveName(archive_name) diff --git a/Lib/packaging/pypi/errors.py b/Lib/packaging/pypi/errors.py deleted file mode 100644 index 2191ac1..0000000 --- a/Lib/packaging/pypi/errors.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Exceptions raised by packaging.pypi code.""" - -from packaging.errors import PackagingPyPIError - - -class ProjectNotFound(PackagingPyPIError): - """Project has not been found""" - - -class DistributionNotFound(PackagingPyPIError): - """The release has not been found""" - - -class ReleaseNotFound(PackagingPyPIError): - """The release has not been found""" - - -class CantParseArchiveName(PackagingPyPIError): - """An archive name can't be parsed to find distribution name and version""" - - -class DownloadError(PackagingPyPIError): - """An error has occurs while downloading""" - - -class HashDoesNotMatch(DownloadError): - """Compared hashes does not match""" - - -class UnsupportedHashName(PackagingPyPIError): - """A unsupported hashname has been used""" - - -class UnableToDownload(PackagingPyPIError): - """All mirrors have been tried, without success""" - - -class InvalidSearchField(PackagingPyPIError): - """An invalid search field has been used""" diff --git a/Lib/packaging/pypi/mirrors.py b/Lib/packaging/pypi/mirrors.py deleted file mode 100644 index a646acff..0000000 --- a/Lib/packaging/pypi/mirrors.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Utilities related to the mirror infrastructure defined in PEP 381.""" - -from string import ascii_lowercase -import socket - -DEFAULT_MIRROR_URL = "last.pypi.python.org" - - -def get_mirrors(hostname=None): - """Return the list of mirrors from the last record found on the DNS - entry:: - - >>> from packaging.pypi.mirrors import get_mirrors - >>> get_mirrors() - ['a.pypi.python.org', 'b.pypi.python.org', 'c.pypi.python.org', - 'd.pypi.python.org'] - - """ - if hostname is None: - hostname = DEFAULT_MIRROR_URL - - # return the last mirror registered on PyPI. - try: - hostname = socket.gethostbyname_ex(hostname)[0] - except socket.gaierror: - return [] - end_letter = hostname.split(".", 1) - - # determine the list from the last one. - return ["%s.%s" % (s, end_letter[1]) for s in string_range(end_letter[0])] - - -def string_range(last): - """Compute the range of string between "a" and last. - - This works for simple "a to z" lists, but also for "a to zz" lists. - """ - for k in range(len(last)): - for x in product(ascii_lowercase, repeat=(k + 1)): - result = ''.join(x) - yield result - if result == last: - return - - -def product(*args, **kwds): - pools = [tuple(arg) for arg in args] * kwds.get('repeat', 1) - result = [[]] - for pool in pools: - result = [x + [y] for x in result for y in pool] - for prod in result: - yield tuple(prod) diff --git a/Lib/packaging/pypi/simple.py b/Lib/packaging/pypi/simple.py deleted file mode 100644 index e26d55d..0000000 --- a/Lib/packaging/pypi/simple.py +++ /dev/null @@ -1,462 +0,0 @@ -"""Spider using the screen-scraping "simple" PyPI API. - -This module contains the class Crawler, a simple spider that -can be used to find and retrieve distributions from a project index -(like the Python Package Index), using its so-called simple API (see -reference implementation available at http://pypi.python.org/simple/). -""" - -import http.client -import re -import socket -import sys -import urllib.request -import urllib.parse -import urllib.error -import os - -from fnmatch import translate -from functools import wraps -from packaging import logger -from packaging.metadata import Metadata -from packaging.version import get_version_predicate -from packaging import __version__ as packaging_version -from packaging.pypi.base import BaseClient -from packaging.pypi.dist import (ReleasesList, EXTENSIONS, - get_infos_from_url, MD5_HASH) -from packaging.pypi.errors import (PackagingPyPIError, DownloadError, - UnableToDownload, CantParseArchiveName, - ReleaseNotFound, ProjectNotFound) -from packaging.pypi.mirrors import get_mirrors - -__all__ = ['Crawler', 'DEFAULT_SIMPLE_INDEX_URL'] - -# -- Constants ----------------------------------------------- -DEFAULT_SIMPLE_INDEX_URL = "http://a.pypi.python.org/simple/" -DEFAULT_HOSTS = ("*",) -SOCKET_TIMEOUT = 15 -USER_AGENT = "Python-urllib/%s.%s packaging/%s" % ( - sys.version_info[0], sys.version_info[1], packaging_version) - -# -- Regexps ------------------------------------------------- -EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$') -HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I) -URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match - -# This pattern matches a character entity reference (a decimal numeric -# references, a hexadecimal numeric reference, or a named reference). -ENTITY_SUB = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub -REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I) - - -def socket_timeout(timeout=SOCKET_TIMEOUT): - """Decorator to add a socket timeout when requesting pages on PyPI. - """ - def wrapper(func): - @wraps(func) - def wrapped(self, *args, **kwargs): - old_timeout = socket.getdefaulttimeout() - if hasattr(self, "_timeout"): - timeout = self._timeout - socket.setdefaulttimeout(timeout) - try: - return func(self, *args, **kwargs) - finally: - socket.setdefaulttimeout(old_timeout) - return wrapped - return wrapper - - -def with_mirror_support(): - """Decorator that makes the mirroring support easier""" - def wrapper(func): - @wraps(func) - def wrapped(self, *args, **kwargs): - try: - return func(self, *args, **kwargs) - except DownloadError: - # if an error occurs, try with the next index_url - if self._mirrors_tries >= self._mirrors_max_tries: - try: - self._switch_to_next_mirror() - except KeyError: - raise UnableToDownload("Tried all mirrors") - else: - self._mirrors_tries += 1 - self._projects.clear() - return wrapped(self, *args, **kwargs) - return wrapped - return wrapper - - -class Crawler(BaseClient): - """Provides useful tools to request the Python Package Index simple API. - - You can specify both mirrors and mirrors_url, but mirrors_url will only be - used if mirrors is set to None. - - :param index_url: the url of the simple index to search on. - :param prefer_final: if the version is not mentioned, and the last - version is not a "final" one (alpha, beta, etc.), - pick up the last final version. - :param prefer_source: if the distribution type is not mentioned, pick up - the source one if available. - :param follow_externals: tell if following external links is needed or - not. Default is False. - :param hosts: a list of hosts allowed to be processed while using - follow_externals=True. Default behavior is to follow all - hosts. - :param follow_externals: tell if following external links is needed or - not. Default is False. - :param mirrors_url: the url to look on for DNS records giving mirror - addresses. - :param mirrors: a list of mirrors (see PEP 381). - :param timeout: time in seconds to consider a url has timeouted. - :param mirrors_max_tries": number of times to try requesting informations - on mirrors before switching. - """ - - def __init__(self, index_url=DEFAULT_SIMPLE_INDEX_URL, prefer_final=False, - prefer_source=True, hosts=DEFAULT_HOSTS, - follow_externals=False, mirrors_url=None, mirrors=None, - timeout=SOCKET_TIMEOUT, mirrors_max_tries=0): - super(Crawler, self).__init__(prefer_final, prefer_source) - self.follow_externals = follow_externals - - # mirroring attributes. - parsed = urllib.parse.urlparse(index_url) - self.scheme = parsed[0] - if self.scheme == 'file': - ender = os.path.sep - else: - ender = '/' - if not index_url.endswith(ender): - index_url += ender - # if no mirrors are defined, use the method described in PEP 381. - if mirrors is None: - mirrors = get_mirrors(mirrors_url) - self._mirrors = set(mirrors) - self._mirrors_used = set() - self.index_url = index_url - self._mirrors_max_tries = mirrors_max_tries - self._mirrors_tries = 0 - self._timeout = timeout - - # create a regexp to match all given hosts - self._allowed_hosts = re.compile('|'.join(map(translate, hosts))).match - - # we keep an index of pages we have processed, in order to avoid - # scanning them multple time (eg. if there is multiple pages pointing - # on one) - self._processed_urls = [] - self._projects = {} - - @with_mirror_support() - def search_projects(self, name=None, **kwargs): - """Search the index for projects containing the given name. - - Return a list of names. - """ - if '*' in name: - name.replace('*', '.*') - else: - name = "%s%s%s" % ('*.?', name, '*.?') - name = name.replace('*', '[^<]*') # avoid matching end tag - pattern = ('<a[^>]*>(%s)</a>' % name).encode('utf-8') - projectname = re.compile(pattern, re.I) - matching_projects = [] - - with self._open_url(self.index_url) as index: - index_content = index.read() - - for match in projectname.finditer(index_content): - project_name = match.group(1).decode('utf-8') - matching_projects.append(self._get_project(project_name)) - return matching_projects - - def get_releases(self, requirements, prefer_final=None, - force_update=False): - """Search for releases and return a ReleasesList object containing - the results. - """ - predicate = get_version_predicate(requirements) - if predicate.name.lower() in self._projects and not force_update: - return self._projects.get(predicate.name.lower()) - prefer_final = self._get_prefer_final(prefer_final) - logger.debug('Reading info on PyPI about %s', predicate.name) - self._process_index_page(predicate.name) - - if predicate.name.lower() not in self._projects: - raise ProjectNotFound - - releases = self._projects.get(predicate.name.lower()) - releases.sort_releases(prefer_final=prefer_final) - return releases - - def get_release(self, requirements, prefer_final=None): - """Return only one release that fulfill the given requirements""" - predicate = get_version_predicate(requirements) - release = self.get_releases(predicate, prefer_final)\ - .get_last(predicate) - if not release: - raise ReleaseNotFound("No release matches the given criterias") - return release - - def get_distributions(self, project_name, version): - """Return the distributions found on the index for the specific given - release""" - # as the default behavior of get_release is to return a release - # containing the distributions, just alias it. - return self.get_release("%s (%s)" % (project_name, version)) - - def get_metadata(self, project_name, version): - """Return the metadatas from the simple index. - - Currently, download one archive, extract it and use the PKG-INFO file. - """ - release = self.get_distributions(project_name, version) - if not release.metadata: - location = release.get_distribution().unpack() - pkg_info = os.path.join(location, 'PKG-INFO') - release.metadata = Metadata(pkg_info) - return release - - def _switch_to_next_mirror(self): - """Switch to the next mirror (eg. point self.index_url to the next - mirror url. - - Raise a KeyError if all mirrors have been tried. - """ - self._mirrors_used.add(self.index_url) - index_url = self._mirrors.pop() - # XXX use urllib.parse for a real check of missing scheme part - if not index_url.startswith(("http://", "https://", "file://")): - index_url = "http://%s" % index_url - - if not index_url.endswith("/simple"): - index_url = "%s/simple/" % index_url - - self.index_url = index_url - - def _is_browsable(self, url): - """Tell if the given URL can be browsed or not. - - It uses the follow_externals and the hosts list to tell if the given - url is browsable or not. - """ - # if _index_url is contained in the given URL, we are browsing the - # index, and it's always "browsable". - # local files are always considered browable resources - if self.index_url in url or urllib.parse.urlparse(url)[0] == "file": - return True - elif self.follow_externals: - if self._allowed_hosts(urllib.parse.urlparse(url)[1]): # 1 is netloc - return True - else: - return False - return False - - def _is_distribution(self, link): - """Tell if the given URL matches to a distribution name or not. - """ - #XXX find a better way to check that links are distributions - # Using a regexp ? - for ext in EXTENSIONS: - if ext in link: - return True - return False - - def _register_release(self, release=None, release_info={}): - """Register a new release. - - Both a release or a dict of release_info can be provided, the preferred - way (eg. the quicker) is the dict one. - - Return the list of existing releases for the given project. - """ - # Check if the project already has a list of releases (refering to - # the project name). If not, create a new release list. - # Then, add the release to the list. - if release: - name = release.name - else: - name = release_info['name'] - if name.lower() not in self._projects: - self._projects[name.lower()] = ReleasesList(name, index=self._index) - - if release: - self._projects[name.lower()].add_release(release=release) - else: - name = release_info.pop('name') - version = release_info.pop('version') - dist_type = release_info.pop('dist_type') - self._projects[name.lower()].add_release(version, dist_type, - **release_info) - return self._projects[name.lower()] - - def _process_url(self, url, project_name=None, follow_links=True): - """Process an url and search for distributions packages. - - For each URL found, if it's a download, creates a PyPIdistribution - object. If it's a homepage and we can follow links, process it too. - - :param url: the url to process - :param project_name: the project name we are searching for. - :param follow_links: Do not want to follow links more than from one - level. This parameter tells if we want to follow - the links we find (eg. run recursively this - method on it) - """ - with self._open_url(url) as f: - base_url = f.url - if url not in self._processed_urls: - self._processed_urls.append(url) - link_matcher = self._get_link_matcher(url) - for link, is_download in link_matcher(f.read().decode(), base_url): - if link not in self._processed_urls: - if self._is_distribution(link) or is_download: - self._processed_urls.append(link) - # it's a distribution, so create a dist object - try: - infos = get_infos_from_url(link, project_name, - is_external=self.index_url not in url) - except CantParseArchiveName as e: - logger.warning( - "version has not been parsed: %s", e) - else: - self._register_release(release_info=infos) - else: - if self._is_browsable(link) and follow_links: - self._process_url(link, project_name, - follow_links=False) - - def _get_link_matcher(self, url): - """Returns the right link matcher function of the given url - """ - if self.index_url in url: - return self._simple_link_matcher - else: - return self._default_link_matcher - - def _get_full_url(self, url, base_url): - return urllib.parse.urljoin(base_url, self._htmldecode(url)) - - def _simple_link_matcher(self, content, base_url): - """Yield all links with a rel="download" or rel="homepage". - - This matches the simple index requirements for matching links. - If follow_externals is set to False, dont yeld the external - urls. - - :param content: the content of the page we want to parse - :param base_url: the url of this page. - """ - for match in HREF.finditer(content): - url = self._get_full_url(match.group(1), base_url) - if MD5_HASH.match(url): - yield (url, True) - - for match in REL.finditer(content): - # search for rel links. - tag, rel = match.groups() - rels = [s.strip() for s in rel.lower().split(',')] - if 'homepage' in rels or 'download' in rels: - for match in HREF.finditer(tag): - url = self._get_full_url(match.group(1), base_url) - if 'download' in rels or self._is_browsable(url): - # yield a list of (url, is_download) - yield (url, 'download' in rels) - - def _default_link_matcher(self, content, base_url): - """Yield all links found on the page. - """ - for match in HREF.finditer(content): - url = self._get_full_url(match.group(1), base_url) - if self._is_browsable(url): - yield (url, False) - - @with_mirror_support() - def _process_index_page(self, name): - """Find and process a PyPI page for the given project name. - - :param name: the name of the project to find the page - """ - # Browse and index the content of the given PyPI page. - if self.scheme == 'file': - ender = os.path.sep - else: - ender = '/' - url = self.index_url + name + ender - self._process_url(url, name) - - @socket_timeout() - def _open_url(self, url): - """Open a urllib2 request, handling HTTP authentication, and local - files support. - - """ - scheme, netloc, path, params, query, frag = urllib.parse.urlparse(url) - - # authentication stuff - if scheme in ('http', 'https'): - auth, host = urllib.parse.splituser(netloc) - else: - auth = None - - # add index.html automatically for filesystem paths - if scheme == 'file': - if url.endswith(os.path.sep): - url += "index.html" - - # add authorization headers if auth is provided - if auth: - auth = "Basic " + \ - urllib.parse.unquote(auth).encode('base64').strip() - new_url = urllib.parse.urlunparse(( - scheme, host, path, params, query, frag)) - request = urllib.request.Request(new_url) - request.add_header("Authorization", auth) - else: - request = urllib.request.Request(url) - request.add_header('User-Agent', USER_AGENT) - try: - fp = urllib.request.urlopen(request) - except (ValueError, http.client.InvalidURL) as v: - msg = ' '.join([str(arg) for arg in v.args]) - raise PackagingPyPIError('%s %s' % (url, msg)) - except urllib.error.HTTPError as v: - return v - except urllib.error.URLError as v: - raise DownloadError("Download error for %s: %s" % (url, v.reason)) - except http.client.BadStatusLine as v: - raise DownloadError('%s returned a bad status line. ' - 'The server might be down, %s' % (url, v.line)) - except http.client.HTTPException as v: - raise DownloadError("Download error for %s: %s" % (url, v)) - except socket.timeout: - raise DownloadError("The server timeouted") - - if auth: - # Put authentication info back into request URL if same host, - # so that links found on the page will work - s2, h2, path2, param2, query2, frag2 = \ - urllib.parse.urlparse(fp.url) - if s2 == scheme and h2 == host: - fp.url = urllib.parse.urlunparse( - (s2, netloc, path2, param2, query2, frag2)) - return fp - - def _decode_entity(self, match): - what = match.group(1) - if what.startswith('#x'): - what = int(what[2:], 16) - elif what.startswith('#'): - what = int(what[1:]) - else: - from html.entities import name2codepoint - what = name2codepoint.get(what, match.group(0)) - return chr(what) - - def _htmldecode(self, text): - """Decode HTML entities in the given text.""" - return ENTITY_SUB(self._decode_entity, text) diff --git a/Lib/packaging/pypi/wrapper.py b/Lib/packaging/pypi/wrapper.py deleted file mode 100644 index 945d08a..0000000 --- a/Lib/packaging/pypi/wrapper.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Convenient client for all PyPI APIs. - -This module provides a ClientWrapper class which will use the "simple" -or XML-RPC API to request information or files from an index. -""" - -from packaging.pypi import simple, xmlrpc - -_WRAPPER_MAPPINGS = {'get_release': 'simple', - 'get_releases': 'simple', - 'search_projects': 'simple', - 'get_metadata': 'xmlrpc', - 'get_distributions': 'simple'} - -_WRAPPER_INDEXES = {'xmlrpc': xmlrpc.Client, - 'simple': simple.Crawler} - - -def switch_index_if_fails(func, wrapper): - """Decorator that switch of index (for instance from xmlrpc to simple) - if the first mirror return an empty list or raises an exception. - """ - def decorator(*args, **kwargs): - retry = True - exception = None - methods = [func] - for f in wrapper._indexes.values(): - if f != func.__self__ and hasattr(f, func.__name__): - methods.append(getattr(f, func.__name__)) - for method in methods: - try: - response = method(*args, **kwargs) - retry = False - except Exception as e: - exception = e - if not retry: - break - if retry and exception: - raise exception - else: - return response - return decorator - - -class ClientWrapper: - """Wrapper around simple and xmlrpc clients, - - Choose the best implementation to use depending the needs, using the given - mappings. - If one of the indexes returns an error, tries to use others indexes. - - :param index: tell which index to rely on by default. - :param index_classes: a dict of name:class to use as indexes. - :param indexes: a dict of name:index already instantiated - :param mappings: the mappings to use for this wrapper - """ - - def __init__(self, default_index='simple', index_classes=_WRAPPER_INDEXES, - indexes={}, mappings=_WRAPPER_MAPPINGS): - self._projects = {} - self._mappings = mappings - self._indexes = indexes - self._default_index = default_index - - # instantiate the classes and set their _project attribute to the one - # of the wrapper. - for name, cls in index_classes.items(): - obj = self._indexes.setdefault(name, cls()) - obj._projects = self._projects - obj._index = self - - def __getattr__(self, method_name): - """When asking for methods of the wrapper, return the implementation of - the wrapped classes, depending the mapping. - - Decorate the methods to switch of implementation if an error occurs - """ - real_method = None - if method_name in _WRAPPER_MAPPINGS: - obj = self._indexes[_WRAPPER_MAPPINGS[method_name]] - real_method = getattr(obj, method_name) - else: - # the method is not defined in the mappings, so we try first to get - # it via the default index, and rely on others if needed. - try: - real_method = getattr(self._indexes[self._default_index], - method_name) - except AttributeError: - other_indexes = [i for i in self._indexes - if i != self._default_index] - for index in other_indexes: - real_method = getattr(self._indexes[index], method_name, - None) - if real_method: - break - if real_method: - return switch_index_if_fails(real_method, self) - else: - raise AttributeError("No index have attribute '%s'" % method_name) diff --git a/Lib/packaging/pypi/xmlrpc.py b/Lib/packaging/pypi/xmlrpc.py deleted file mode 100644 index befdf6d..0000000 --- a/Lib/packaging/pypi/xmlrpc.py +++ /dev/null @@ -1,200 +0,0 @@ -"""Spider using the XML-RPC PyPI API. - -This module contains the class Client, a spider that can be used to find -and retrieve distributions from a project index (like the Python Package -Index), using its XML-RPC API (see documentation of the reference -implementation at http://wiki.python.org/moin/PyPiXmlRpc). -""" - -import xmlrpc.client - -from packaging import logger -from packaging.errors import IrrationalVersionError -from packaging.version import get_version_predicate -from packaging.pypi.base import BaseClient -from packaging.pypi.errors import (ProjectNotFound, InvalidSearchField, - ReleaseNotFound) -from packaging.pypi.dist import ReleaseInfo - -__all__ = ['Client', 'DEFAULT_XMLRPC_INDEX_URL'] - -DEFAULT_XMLRPC_INDEX_URL = 'http://python.org/pypi' - -_SEARCH_FIELDS = ['name', 'version', 'author', 'author_email', 'maintainer', - 'maintainer_email', 'home_page', 'license', 'summary', - 'description', 'keywords', 'platform', 'download_url'] - - -class Client(BaseClient): - """Client to query indexes using XML-RPC method calls. - - If no server_url is specified, use the default PyPI XML-RPC URL, - defined in the DEFAULT_XMLRPC_INDEX_URL constant:: - - >>> client = Client() - >>> client.server_url == DEFAULT_XMLRPC_INDEX_URL - True - - >>> client = Client("http://someurl/") - >>> client.server_url - 'http://someurl/' - """ - - def __init__(self, server_url=DEFAULT_XMLRPC_INDEX_URL, prefer_final=False, - prefer_source=True): - super(Client, self).__init__(prefer_final, prefer_source) - self.server_url = server_url - self._projects = {} - - def get_release(self, requirements, prefer_final=False): - """Return a release with all complete metadata and distribution - related informations. - """ - prefer_final = self._get_prefer_final(prefer_final) - predicate = get_version_predicate(requirements) - releases = self.get_releases(predicate.name) - release = releases.get_last(predicate, prefer_final) - self.get_metadata(release.name, str(release.version)) - self.get_distributions(release.name, str(release.version)) - return release - - def get_releases(self, requirements, prefer_final=None, show_hidden=True, - force_update=False): - """Return the list of existing releases for a specific project. - - Cache the results from one call to another. - - If show_hidden is True, return the hidden releases too. - If force_update is True, reprocess the index to update the - informations (eg. make a new XML-RPC call). - :: - - >>> client = Client() - >>> client.get_releases('Foo') - ['1.1', '1.2', '1.3'] - - If no such project exists, raise a ProjectNotFound exception:: - - >>> client.get_project_versions('UnexistingProject') - ProjectNotFound: UnexistingProject - - """ - def get_versions(project_name, show_hidden): - return self.proxy.package_releases(project_name, show_hidden) - - predicate = get_version_predicate(requirements) - prefer_final = self._get_prefer_final(prefer_final) - project_name = predicate.name - if not force_update and (project_name.lower() in self._projects): - project = self._projects[project_name.lower()] - if not project.contains_hidden and show_hidden: - # if hidden releases are requested, and have an existing - # list of releases that does not contains hidden ones - all_versions = get_versions(project_name, show_hidden) - existing_versions = project.get_versions() - hidden_versions = set(all_versions) - set(existing_versions) - for version in hidden_versions: - project.add_release(release=ReleaseInfo(project_name, - version, index=self._index)) - else: - versions = get_versions(project_name, show_hidden) - if not versions: - raise ProjectNotFound(project_name) - project = self._get_project(project_name) - project.add_releases([ReleaseInfo(project_name, version, - index=self._index) - for version in versions]) - project = project.filter(predicate) - if len(project) == 0: - raise ReleaseNotFound("%s" % predicate) - project.sort_releases(prefer_final) - return project - - - def get_distributions(self, project_name, version): - """Grab informations about distributions from XML-RPC. - - Return a ReleaseInfo object, with distribution-related informations - filled in. - """ - url_infos = self.proxy.release_urls(project_name, version) - project = self._get_project(project_name) - if version not in project.get_versions(): - project.add_release(release=ReleaseInfo(project_name, version, - index=self._index)) - release = project.get_release(version) - for info in url_infos: - packagetype = info['packagetype'] - dist_infos = {'url': info['url'], - 'hashval': info['md5_digest'], - 'hashname': 'md5', - 'is_external': False, - 'python_version': info['python_version']} - release.add_distribution(packagetype, **dist_infos) - return release - - def get_metadata(self, project_name, version): - """Retrieve project metadata. - - Return a ReleaseInfo object, with metadata informations filled in. - """ - # to be case-insensitive, get the informations from the XMLRPC API - projects = [d['name'] for d in - self.proxy.search({'name': project_name}) - if d['name'].lower() == project_name] - if len(projects) > 0: - project_name = projects[0] - - metadata = self.proxy.release_data(project_name, version) - project = self._get_project(project_name) - if version not in project.get_versions(): - project.add_release(release=ReleaseInfo(project_name, version, - index=self._index)) - release = project.get_release(version) - release.set_metadata(metadata) - return release - - def search_projects(self, name=None, operator="or", **kwargs): - """Find using the keys provided in kwargs. - - You can set operator to "and" or "or". - """ - for key in kwargs: - if key not in _SEARCH_FIELDS: - raise InvalidSearchField(key) - if name: - kwargs["name"] = name - projects = self.proxy.search(kwargs, operator) - for p in projects: - project = self._get_project(p['name']) - try: - project.add_release(release=ReleaseInfo(p['name'], - p['version'], metadata={'summary': p['summary']}, - index=self._index)) - except IrrationalVersionError as e: - logger.warning("Irrational version error found: %s", e) - return [self._projects[p['name'].lower()] for p in projects] - - def get_all_projects(self): - """Return the list of all projects registered in the package index""" - projects = self.proxy.list_packages() - for name in projects: - self.get_releases(name, show_hidden=True) - - return [self._projects[name.lower()] for name in set(projects)] - - @property - def proxy(self): - """Property used to return the XMLRPC server proxy. - - If no server proxy is defined yet, creates a new one:: - - >>> client = Client() - >>> client.proxy() - <ServerProxy for python.org/pypi> - - """ - if not hasattr(self, '_server_proxy'): - self._server_proxy = xmlrpc.client.ServerProxy(self.server_url) - - return self._server_proxy |