diff options
Diffstat (limited to 'Lib/packaging/tests/test_pypi_simple.py')
-rw-r--r-- | Lib/packaging/tests/test_pypi_simple.py | 353 |
1 files changed, 0 insertions, 353 deletions
diff --git a/Lib/packaging/tests/test_pypi_simple.py b/Lib/packaging/tests/test_pypi_simple.py deleted file mode 100644 index 59204c4..0000000 --- a/Lib/packaging/tests/test_pypi_simple.py +++ /dev/null @@ -1,353 +0,0 @@ -"""Tests for the packaging.pypi.simple module.""" -import re -import os -import sys -import http.client -import urllib.error -import urllib.parse -import urllib.request - -from packaging.pypi.simple import Crawler - -from packaging.tests import unittest -from packaging.tests.support import (TempdirManager, LoggingCatcher, - fake_dec) - -try: - import _thread - from packaging.tests.pypi_server import (use_pypi_server, PyPIServer, - PYPI_DEFAULT_STATIC_PATH) -except ImportError: - _thread = None - use_pypi_server = fake_dec - PYPI_DEFAULT_STATIC_PATH = os.path.join( - os.path.dirname(os.path.abspath(__file__)), 'pypiserver') - - - -class SimpleCrawlerTestCase(TempdirManager, - LoggingCatcher, - unittest.TestCase): - - def _get_simple_crawler(self, server, base_url="/simple/", hosts=None, - *args, **kwargs): - """Build and return a SimpleIndex with the test server urls""" - if hosts is None: - hosts = (server.full_address.replace("http://", ""),) - kwargs['hosts'] = hosts - return Crawler(server.full_address + base_url, *args, - **kwargs) - - @unittest.skipIf(_thread is None, 'needs threads') - @use_pypi_server() - def test_bad_urls(self, server): - crawler = Crawler() - url = 'http://127.0.0.1:0/nonesuch/test_simple' - try: - v = crawler._open_url(url) - except Exception as v: - self.assertIn(url, str(v)) - else: - v.close() - self.assertIsInstance(v, urllib.error.HTTPError) - - # issue 16 - # easy_install inquant.contentmirror.plone breaks because of a typo - # in its home URL - crawler = Crawler(hosts=('example.org',)) - url = ('url:%20https://svn.plone.org/svn/collective/' - 'inquant.contentmirror.plone/trunk') - try: - v = crawler._open_url(url) - except Exception as v: - self.assertIn(url, str(v)) - else: - v.close() - self.assertIsInstance(v, urllib.error.HTTPError) - - def _urlopen(*args): - raise http.client.BadStatusLine('line') - - old_urlopen = urllib.request.urlopen - urllib.request.urlopen = _urlopen - url = 'http://example.org' - try: - v = crawler._open_url(url) - except Exception as v: - self.assertIn('line', str(v)) - else: - v.close() - # TODO use self.assertRaises - raise AssertionError('Should have raise here!') - finally: - urllib.request.urlopen = old_urlopen - - # issue 20 - url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk' - try: - crawler._open_url(url) - except Exception as v: - self.assertIn('Download error', str(v)) - - # issue #160 - url = server.full_address - page = ('<a href="http://www.famfamfam.com](' - 'http://www.famfamfam.com/">') - crawler._process_url(url, page) - - @unittest.skipIf(_thread is None, 'needs threads') - @use_pypi_server("test_found_links") - def test_found_links(self, server): - # Browse the index, asking for a specified release version - # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1 - crawler = self._get_simple_crawler(server) - last_release = crawler.get_release("foobar") - - # we have scanned the index page - self.assertIn(server.full_address + "/simple/foobar/", - crawler._processed_urls) - - # we have found 4 releases in this page - self.assertEqual(len(crawler._projects["foobar"]), 4) - - # and returned the most recent one - self.assertEqual("%s" % last_release.version, '2.0.1') - - def test_is_browsable(self): - crawler = Crawler(follow_externals=False) - self.assertTrue(crawler._is_browsable(crawler.index_url + "test")) - - # Now, when following externals, we can have a list of hosts to trust. - # and don't follow other external links than the one described here. - crawler = Crawler(hosts=["pypi.python.org", "example.org"], - follow_externals=True) - good_urls = ( - "http://pypi.python.org/foo/bar", - "http://pypi.python.org/simple/foobar", - "http://example.org", - "http://example.org/", - "http://example.org/simple/", - ) - bad_urls = ( - "http://python.org", - "http://example.tld", - ) - - for url in good_urls: - self.assertTrue(crawler._is_browsable(url)) - - for url in bad_urls: - self.assertFalse(crawler._is_browsable(url)) - - # allow all hosts - crawler = Crawler(follow_externals=True, hosts=("*",)) - self.assertTrue(crawler._is_browsable("http://an-external.link/path")) - self.assertTrue(crawler._is_browsable("pypi.example.org/a/path")) - - # specify a list of hosts we want to allow - crawler = Crawler(follow_externals=True, - hosts=("*.example.org",)) - self.assertFalse(crawler._is_browsable("http://an-external.link/path")) - self.assertTrue( - crawler._is_browsable("http://pypi.example.org/a/path")) - - @unittest.skipIf(_thread is None, 'needs threads') - @use_pypi_server("with_externals") - def test_follow_externals(self, server): - # Include external pages - # Try to request the package index, wich contains links to "externals" - # resources. They have to be scanned too. - crawler = self._get_simple_crawler(server, follow_externals=True) - crawler.get_release("foobar") - self.assertIn(server.full_address + "/external/external.html", - crawler._processed_urls) - - @unittest.skipIf(_thread is None, 'needs threads') - @use_pypi_server("with_real_externals") - def test_restrict_hosts(self, server): - # Only use a list of allowed hosts is possible - # Test that telling the simple pyPI client to not retrieve external - # works - crawler = self._get_simple_crawler(server, follow_externals=False) - crawler.get_release("foobar") - self.assertNotIn(server.full_address + "/external/external.html", - crawler._processed_urls) - - @unittest.skipIf(_thread is None, 'needs threads') - @use_pypi_server(static_filesystem_paths=["with_externals"], - static_uri_paths=["simple", "external"]) - def test_links_priority(self, server): - # Download links from the pypi simple index should be used before - # external download links. - # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error - # - # Usecase : - # - someone uploads a package on pypi, a md5 is generated - # - someone manually coindexes this link (with the md5 in the url) onto - # an external page accessible from the package page. - # - someone reuploads the package (with a different md5) - # - while easy_installing, an MD5 error occurs because the external - # link is used - # -> The index should use the link from pypi, not the external one. - - # start an index server - index_url = server.full_address + '/simple/' - - # scan a test index - crawler = Crawler(index_url, follow_externals=True) - releases = crawler.get_releases("foobar") - server.stop() - - # we have only one link, because links are compared without md5 - self.assertEqual(1, len(releases)) - self.assertEqual(1, len(releases[0].dists)) - # the link should be from the index - self.assertEqual(2, len(releases[0].dists['sdist'].urls)) - self.assertEqual('12345678901234567', - releases[0].dists['sdist'].url['hashval']) - self.assertEqual('md5', releases[0].dists['sdist'].url['hashname']) - - @unittest.skipIf(_thread is None, 'needs threads') - @use_pypi_server(static_filesystem_paths=["with_norel_links"], - static_uri_paths=["simple", "external"]) - def test_not_scan_all_links(self, server): - # Do not follow all index page links. - # The links not tagged with rel="download" and rel="homepage" have - # to not be processed by the package index, while processing "pages". - - # process the pages - crawler = self._get_simple_crawler(server, follow_externals=True) - crawler.get_releases("foobar") - # now it should have processed only pages with links rel="download" - # and rel="homepage" - self.assertIn("%s/simple/foobar/" % server.full_address, - crawler._processed_urls) # it's the simple index page - self.assertIn("%s/external/homepage.html" % server.full_address, - crawler._processed_urls) # the external homepage is rel="homepage" - self.assertNotIn("%s/external/nonrel.html" % server.full_address, - crawler._processed_urls) # this link contains no rel=* - self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address, - crawler._processed_urls) # linked from simple index (no rel) - self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address, - crawler._processed_urls) # linked from simple index (rel) - self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address, - crawler._processed_urls) # linked from external homepage (rel) - - @unittest.skipIf(_thread is None, 'needs threads') - def test_uses_mirrors(self): - # When the main repository seems down, try using the given mirrors""" - server = PyPIServer("foo_bar_baz") - mirror = PyPIServer("foo_bar_baz") - mirror.start() # we dont start the server here - - try: - # create the index using both servers - crawler = Crawler(server.full_address + "/simple/", hosts=('*',), - # set the timeout to 1s for the tests - timeout=1, mirrors=[mirror.full_address]) - - # this should not raise a timeout - self.assertEqual(4, len(crawler.get_releases("foo"))) - finally: - mirror.stop() - server.stop() - - def test_simple_link_matcher(self): - # Test that the simple link matcher finds the right links""" - crawler = Crawler(follow_externals=False) - - # Here, we define: - # 1. one link that must be followed, cause it's a download one - # 2. one link that must *not* be followed, cause the is_browsable - # returns false for it. - # 3. one link that must be followed cause it's a homepage that is - # browsable - # 4. one link that must be followed, because it contain a md5 hash - self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url)) - self.assertFalse(crawler._is_browsable("http://dl-link2")) - content = """ - <a href="http://dl-link1" rel="download">download_link1</a> - <a href="http://dl-link2" rel="homepage">homepage_link1</a> - <a href="%(index_url)stest" rel="homepage">homepage_link2</a> - <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a> - """ % {'index_url': crawler.index_url} - - # Test that the simple link matcher yield the good links. - generator = crawler._simple_link_matcher(content, crawler.index_url) - self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % - crawler.index_url, True), next(generator)) - self.assertEqual(('http://dl-link1', True), next(generator)) - self.assertEqual(('%stest' % crawler.index_url, False), - next(generator)) - self.assertRaises(StopIteration, generator.__next__) - - # Follow the external links is possible (eg. homepages) - crawler.follow_externals = True - generator = crawler._simple_link_matcher(content, crawler.index_url) - self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % - crawler.index_url, True), next(generator)) - self.assertEqual(('http://dl-link1', True), next(generator)) - self.assertEqual(('http://dl-link2', False), next(generator)) - self.assertEqual(('%stest' % crawler.index_url, False), - next(generator)) - self.assertRaises(StopIteration, generator.__next__) - - def test_browse_local_files(self): - # Test that we can browse local files""" - index_url = "file://" + PYPI_DEFAULT_STATIC_PATH - if sys.platform == 'win32': - # under windows the correct syntax is: - # file:///C|\the\path\here - # instead of - # file://C:\the\path\here - fix = re.compile(r'^(file://)([A-Za-z])(:)') - index_url = fix.sub('\\1/\\2|', index_url) - - index_path = os.sep.join([index_url, "test_found_links", "simple"]) - crawler = Crawler(index_path) - dists = crawler.get_releases("foobar") - self.assertEqual(4, len(dists)) - - def test_get_link_matcher(self): - crawler = Crawler("http://example.org") - self.assertEqual('_simple_link_matcher', crawler._get_link_matcher( - "http://example.org/some/file").__name__) - self.assertEqual('_default_link_matcher', crawler._get_link_matcher( - "http://other-url").__name__) - - def test_default_link_matcher(self): - crawler = Crawler("http://example.org", mirrors=[]) - crawler.follow_externals = True - crawler._is_browsable = lambda *args: True - base_url = "http://example.org/some/file/" - content = """ -<a href="../homepage" rel="homepage">link</a> -<a href="../download" rel="download">link2</a> -<a href="../simpleurl">link2</a> - """ - found_links = set(uri for uri, _ in - crawler._default_link_matcher(content, base_url)) - self.assertIn('http://example.org/some/homepage', found_links) - self.assertIn('http://example.org/some/simpleurl', found_links) - self.assertIn('http://example.org/some/download', found_links) - - @unittest.skipIf(_thread is None, 'needs threads') - @use_pypi_server("project_list") - def test_search_projects(self, server): - # we can search the index for some projects, on their names - # the case used no matters here - crawler = self._get_simple_crawler(server) - tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']), - ('foobar*', ['FooBar-bar', 'Foobar-baz']), - ('*foobar', ['Baz-FooBar'])) - - for search, expected in tests: - projects = [p.name for p in crawler.search_projects(search)] - self.assertListEqual(expected, projects) - - -def test_suite(): - return unittest.makeSuite(SimpleCrawlerTestCase) - -if __name__ == '__main__': - unittest.main(defaultTest="test_suite") |