diff options
author | Berker Peksag <berker.peksag@gmail.com> | 2016-09-18 08:21:57 (GMT) |
---|---|---|
committer | Berker Peksag <berker.peksag@gmail.com> | 2016-09-18 08:21:57 (GMT) |
commit | 2a8d7f1c4727a3d69074d35ebb9c022d3c417601 (patch) | |
tree | 15b887b2b2c3d99bc949b171dc46a7489874bcc8 /Lib/test/test_robotparser.py | |
parent | a2365c1de2e94b9709f8d776252a9849666614c8 (diff) | |
download | cpython-2a8d7f1c4727a3d69074d35ebb9c022d3c417601.zip cpython-2a8d7f1c4727a3d69074d35ebb9c022d3c417601.tar.gz cpython-2a8d7f1c4727a3d69074d35ebb9c022d3c417601.tar.bz2 |
Issue #28151: Use pythontest.net in test_robotparser
Diffstat (limited to 'Lib/test/test_robotparser.py')
-rw-r--r-- | Lib/test/test_robotparser.py | 43 |
1 files changed, 36 insertions, 7 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index d4bf453..51b48ce 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -1,4 +1,5 @@ import io +import os import unittest import urllib.robotparser from collections import namedtuple @@ -272,14 +273,42 @@ class PasswordProtectedSiteTestCase(unittest.TestCase): class NetworkTestCase(unittest.TestCase): - def testPythonOrg(self): + base_url = 'http://www.pythontest.net/' + robots_txt = '{}elsewhere/robots.txt'.format(base_url) + + @classmethod + def setUpClass(cls): support.requires('network') - with support.transient_internet('www.python.org'): - parser = urllib.robotparser.RobotFileParser( - "http://www.python.org/robots.txt") - parser.read() - self.assertTrue( - parser.can_fetch("*", "http://www.python.org/robots.txt")) + with support.transient_internet(cls.base_url): + cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt) + cls.parser.read() + + def url(self, path): + return '{}{}{}'.format( + self.base_url, path, '/' if not os.path.splitext(path)[1] else '' + ) + + def test_basic(self): + self.assertFalse(self.parser.disallow_all) + self.assertFalse(self.parser.allow_all) + self.assertGreater(self.parser.mtime(), 0) + self.assertFalse(self.parser.crawl_delay('*')) + self.assertFalse(self.parser.request_rate('*')) + + def test_can_fetch(self): + self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere'))) + self.assertFalse(self.parser.can_fetch('Nutch', self.base_url)) + self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian'))) + self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats'))) + self.assertFalse(self.parser.can_fetch('*', self.url('webstats'))) + self.assertTrue(self.parser.can_fetch('*', self.base_url)) + + def test_read_404(self): + parser = urllib.robotparser.RobotFileParser(self.url('i-robot.txt')) + parser.read() + self.assertTrue(parser.allow_all) + self.assertFalse(parser.disallow_all) + self.assertEqual(parser.mtime(), 0) if __name__=='__main__': unittest.main() |