summaryrefslogtreecommitdiffstats
path: root/Lib/packaging/tests/test_pypi_simple.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/packaging/tests/test_pypi_simple.py')
-rw-r--r--Lib/packaging/tests/test_pypi_simple.py353
1 files changed, 0 insertions, 353 deletions
diff --git a/Lib/packaging/tests/test_pypi_simple.py b/Lib/packaging/tests/test_pypi_simple.py
deleted file mode 100644
index 59204c4..0000000
--- a/Lib/packaging/tests/test_pypi_simple.py
+++ /dev/null
@@ -1,353 +0,0 @@
-"""Tests for the packaging.pypi.simple module."""
-import re
-import os
-import sys
-import http.client
-import urllib.error
-import urllib.parse
-import urllib.request
-
-from packaging.pypi.simple import Crawler
-
-from packaging.tests import unittest
-from packaging.tests.support import (TempdirManager, LoggingCatcher,
- fake_dec)
-
-try:
- import _thread
- from packaging.tests.pypi_server import (use_pypi_server, PyPIServer,
- PYPI_DEFAULT_STATIC_PATH)
-except ImportError:
- _thread = None
- use_pypi_server = fake_dec
- PYPI_DEFAULT_STATIC_PATH = os.path.join(
- os.path.dirname(os.path.abspath(__file__)), 'pypiserver')
-
-
-
-class SimpleCrawlerTestCase(TempdirManager,
- LoggingCatcher,
- unittest.TestCase):
-
- def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
- *args, **kwargs):
- """Build and return a SimpleIndex with the test server urls"""
- if hosts is None:
- hosts = (server.full_address.replace("http://", ""),)
- kwargs['hosts'] = hosts
- return Crawler(server.full_address + base_url, *args,
- **kwargs)
-
- @unittest.skipIf(_thread is None, 'needs threads')
- @use_pypi_server()
- def test_bad_urls(self, server):
- crawler = Crawler()
- url = 'http://127.0.0.1:0/nonesuch/test_simple'
- try:
- v = crawler._open_url(url)
- except Exception as v:
- self.assertIn(url, str(v))
- else:
- v.close()
- self.assertIsInstance(v, urllib.error.HTTPError)
-
- # issue 16
- # easy_install inquant.contentmirror.plone breaks because of a typo
- # in its home URL
- crawler = Crawler(hosts=('example.org',))
- url = ('url:%20https://svn.plone.org/svn/collective/'
- 'inquant.contentmirror.plone/trunk')
- try:
- v = crawler._open_url(url)
- except Exception as v:
- self.assertIn(url, str(v))
- else:
- v.close()
- self.assertIsInstance(v, urllib.error.HTTPError)
-
- def _urlopen(*args):
- raise http.client.BadStatusLine('line')
-
- old_urlopen = urllib.request.urlopen
- urllib.request.urlopen = _urlopen
- url = 'http://example.org'
- try:
- v = crawler._open_url(url)
- except Exception as v:
- self.assertIn('line', str(v))
- else:
- v.close()
- # TODO use self.assertRaises
- raise AssertionError('Should have raise here!')
- finally:
- urllib.request.urlopen = old_urlopen
-
- # issue 20
- url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
- try:
- crawler._open_url(url)
- except Exception as v:
- self.assertIn('Download error', str(v))
-
- # issue #160
- url = server.full_address
- page = ('<a href="http://www.famfamfam.com]('
- 'http://www.famfamfam.com/">')
- crawler._process_url(url, page)
-
- @unittest.skipIf(_thread is None, 'needs threads')
- @use_pypi_server("test_found_links")
- def test_found_links(self, server):
- # Browse the index, asking for a specified release version
- # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
- crawler = self._get_simple_crawler(server)
- last_release = crawler.get_release("foobar")
-
- # we have scanned the index page
- self.assertIn(server.full_address + "/simple/foobar/",
- crawler._processed_urls)
-
- # we have found 4 releases in this page
- self.assertEqual(len(crawler._projects["foobar"]), 4)
-
- # and returned the most recent one
- self.assertEqual("%s" % last_release.version, '2.0.1')
-
- def test_is_browsable(self):
- crawler = Crawler(follow_externals=False)
- self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
-
- # Now, when following externals, we can have a list of hosts to trust.
- # and don't follow other external links than the one described here.
- crawler = Crawler(hosts=["pypi.python.org", "example.org"],
- follow_externals=True)
- good_urls = (
- "http://pypi.python.org/foo/bar",
- "http://pypi.python.org/simple/foobar",
- "http://example.org",
- "http://example.org/",
- "http://example.org/simple/",
- )
- bad_urls = (
- "http://python.org",
- "http://example.tld",
- )
-
- for url in good_urls:
- self.assertTrue(crawler._is_browsable(url))
-
- for url in bad_urls:
- self.assertFalse(crawler._is_browsable(url))
-
- # allow all hosts
- crawler = Crawler(follow_externals=True, hosts=("*",))
- self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
- self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
-
- # specify a list of hosts we want to allow
- crawler = Crawler(follow_externals=True,
- hosts=("*.example.org",))
- self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
- self.assertTrue(
- crawler._is_browsable("http://pypi.example.org/a/path"))
-
- @unittest.skipIf(_thread is None, 'needs threads')
- @use_pypi_server("with_externals")
- def test_follow_externals(self, server):
- # Include external pages
- # Try to request the package index, wich contains links to "externals"
- # resources. They have to be scanned too.
- crawler = self._get_simple_crawler(server, follow_externals=True)
- crawler.get_release("foobar")
- self.assertIn(server.full_address + "/external/external.html",
- crawler._processed_urls)
-
- @unittest.skipIf(_thread is None, 'needs threads')
- @use_pypi_server("with_real_externals")
- def test_restrict_hosts(self, server):
- # Only use a list of allowed hosts is possible
- # Test that telling the simple pyPI client to not retrieve external
- # works
- crawler = self._get_simple_crawler(server, follow_externals=False)
- crawler.get_release("foobar")
- self.assertNotIn(server.full_address + "/external/external.html",
- crawler._processed_urls)
-
- @unittest.skipIf(_thread is None, 'needs threads')
- @use_pypi_server(static_filesystem_paths=["with_externals"],
- static_uri_paths=["simple", "external"])
- def test_links_priority(self, server):
- # Download links from the pypi simple index should be used before
- # external download links.
- # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
- #
- # Usecase :
- # - someone uploads a package on pypi, a md5 is generated
- # - someone manually coindexes this link (with the md5 in the url) onto
- # an external page accessible from the package page.
- # - someone reuploads the package (with a different md5)
- # - while easy_installing, an MD5 error occurs because the external
- # link is used
- # -> The index should use the link from pypi, not the external one.
-
- # start an index server
- index_url = server.full_address + '/simple/'
-
- # scan a test index
- crawler = Crawler(index_url, follow_externals=True)
- releases = crawler.get_releases("foobar")
- server.stop()
-
- # we have only one link, because links are compared without md5
- self.assertEqual(1, len(releases))
- self.assertEqual(1, len(releases[0].dists))
- # the link should be from the index
- self.assertEqual(2, len(releases[0].dists['sdist'].urls))
- self.assertEqual('12345678901234567',
- releases[0].dists['sdist'].url['hashval'])
- self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
-
- @unittest.skipIf(_thread is None, 'needs threads')
- @use_pypi_server(static_filesystem_paths=["with_norel_links"],
- static_uri_paths=["simple", "external"])
- def test_not_scan_all_links(self, server):
- # Do not follow all index page links.
- # The links not tagged with rel="download" and rel="homepage" have
- # to not be processed by the package index, while processing "pages".
-
- # process the pages
- crawler = self._get_simple_crawler(server, follow_externals=True)
- crawler.get_releases("foobar")
- # now it should have processed only pages with links rel="download"
- # and rel="homepage"
- self.assertIn("%s/simple/foobar/" % server.full_address,
- crawler._processed_urls) # it's the simple index page
- self.assertIn("%s/external/homepage.html" % server.full_address,
- crawler._processed_urls) # the external homepage is rel="homepage"
- self.assertNotIn("%s/external/nonrel.html" % server.full_address,
- crawler._processed_urls) # this link contains no rel=*
- self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
- crawler._processed_urls) # linked from simple index (no rel)
- self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
- crawler._processed_urls) # linked from simple index (rel)
- self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
- crawler._processed_urls) # linked from external homepage (rel)
-
- @unittest.skipIf(_thread is None, 'needs threads')
- def test_uses_mirrors(self):
- # When the main repository seems down, try using the given mirrors"""
- server = PyPIServer("foo_bar_baz")
- mirror = PyPIServer("foo_bar_baz")
- mirror.start() # we dont start the server here
-
- try:
- # create the index using both servers
- crawler = Crawler(server.full_address + "/simple/", hosts=('*',),
- # set the timeout to 1s for the tests
- timeout=1, mirrors=[mirror.full_address])
-
- # this should not raise a timeout
- self.assertEqual(4, len(crawler.get_releases("foo")))
- finally:
- mirror.stop()
- server.stop()
-
- def test_simple_link_matcher(self):
- # Test that the simple link matcher finds the right links"""
- crawler = Crawler(follow_externals=False)
-
- # Here, we define:
- # 1. one link that must be followed, cause it's a download one
- # 2. one link that must *not* be followed, cause the is_browsable
- # returns false for it.
- # 3. one link that must be followed cause it's a homepage that is
- # browsable
- # 4. one link that must be followed, because it contain a md5 hash
- self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
- self.assertFalse(crawler._is_browsable("http://dl-link2"))
- content = """
- <a href="http://dl-link1" rel="download">download_link1</a>
- <a href="http://dl-link2" rel="homepage">homepage_link1</a>
- <a href="%(index_url)stest" rel="homepage">homepage_link2</a>
- <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
- """ % {'index_url': crawler.index_url}
-
- # Test that the simple link matcher yield the good links.
- generator = crawler._simple_link_matcher(content, crawler.index_url)
- self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
- crawler.index_url, True), next(generator))
- self.assertEqual(('http://dl-link1', True), next(generator))
- self.assertEqual(('%stest' % crawler.index_url, False),
- next(generator))
- self.assertRaises(StopIteration, generator.__next__)
-
- # Follow the external links is possible (eg. homepages)
- crawler.follow_externals = True
- generator = crawler._simple_link_matcher(content, crawler.index_url)
- self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
- crawler.index_url, True), next(generator))
- self.assertEqual(('http://dl-link1', True), next(generator))
- self.assertEqual(('http://dl-link2', False), next(generator))
- self.assertEqual(('%stest' % crawler.index_url, False),
- next(generator))
- self.assertRaises(StopIteration, generator.__next__)
-
- def test_browse_local_files(self):
- # Test that we can browse local files"""
- index_url = "file://" + PYPI_DEFAULT_STATIC_PATH
- if sys.platform == 'win32':
- # under windows the correct syntax is:
- # file:///C|\the\path\here
- # instead of
- # file://C:\the\path\here
- fix = re.compile(r'^(file://)([A-Za-z])(:)')
- index_url = fix.sub('\\1/\\2|', index_url)
-
- index_path = os.sep.join([index_url, "test_found_links", "simple"])
- crawler = Crawler(index_path)
- dists = crawler.get_releases("foobar")
- self.assertEqual(4, len(dists))
-
- def test_get_link_matcher(self):
- crawler = Crawler("http://example.org")
- self.assertEqual('_simple_link_matcher', crawler._get_link_matcher(
- "http://example.org/some/file").__name__)
- self.assertEqual('_default_link_matcher', crawler._get_link_matcher(
- "http://other-url").__name__)
-
- def test_default_link_matcher(self):
- crawler = Crawler("http://example.org", mirrors=[])
- crawler.follow_externals = True
- crawler._is_browsable = lambda *args: True
- base_url = "http://example.org/some/file/"
- content = """
-<a href="../homepage" rel="homepage">link</a>
-<a href="../download" rel="download">link2</a>
-<a href="../simpleurl">link2</a>
- """
- found_links = set(uri for uri, _ in
- crawler._default_link_matcher(content, base_url))
- self.assertIn('http://example.org/some/homepage', found_links)
- self.assertIn('http://example.org/some/simpleurl', found_links)
- self.assertIn('http://example.org/some/download', found_links)
-
- @unittest.skipIf(_thread is None, 'needs threads')
- @use_pypi_server("project_list")
- def test_search_projects(self, server):
- # we can search the index for some projects, on their names
- # the case used no matters here
- crawler = self._get_simple_crawler(server)
- tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
- ('foobar*', ['FooBar-bar', 'Foobar-baz']),
- ('*foobar', ['Baz-FooBar']))
-
- for search, expected in tests:
- projects = [p.name for p in crawler.search_projects(search)]
- self.assertListEqual(expected, projects)
-
-
-def test_suite():
- return unittest.makeSuite(SimpleCrawlerTestCase)
-
-if __name__ == '__main__':
- unittest.main(defaultTest="test_suite")