summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_robotparser.py
diff options
context:
space:
mode:
authorBerker Peksag <berker.peksag@gmail.com>2016-09-18 08:21:57 (GMT)
committerBerker Peksag <berker.peksag@gmail.com>2016-09-18 08:21:57 (GMT)
commit2a8d7f1c4727a3d69074d35ebb9c022d3c417601 (patch)
tree15b887b2b2c3d99bc949b171dc46a7489874bcc8 /Lib/test/test_robotparser.py
parenta2365c1de2e94b9709f8d776252a9849666614c8 (diff)
downloadcpython-2a8d7f1c4727a3d69074d35ebb9c022d3c417601.zip
cpython-2a8d7f1c4727a3d69074d35ebb9c022d3c417601.tar.gz
cpython-2a8d7f1c4727a3d69074d35ebb9c022d3c417601.tar.bz2
Issue #28151: Use pythontest.net in test_robotparser
Diffstat (limited to 'Lib/test/test_robotparser.py')
-rw-r--r--Lib/test/test_robotparser.py43
1 files changed, 36 insertions, 7 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index d4bf453..51b48ce 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -1,4 +1,5 @@
import io
+import os
import unittest
import urllib.robotparser
from collections import namedtuple
@@ -272,14 +273,42 @@ class PasswordProtectedSiteTestCase(unittest.TestCase):
class NetworkTestCase(unittest.TestCase):
- def testPythonOrg(self):
+ base_url = 'http://www.pythontest.net/'
+ robots_txt = '{}elsewhere/robots.txt'.format(base_url)
+
+ @classmethod
+ def setUpClass(cls):
support.requires('network')
- with support.transient_internet('www.python.org'):
- parser = urllib.robotparser.RobotFileParser(
- "http://www.python.org/robots.txt")
- parser.read()
- self.assertTrue(
- parser.can_fetch("*", "http://www.python.org/robots.txt"))
+ with support.transient_internet(cls.base_url):
+ cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt)
+ cls.parser.read()
+
+ def url(self, path):
+ return '{}{}{}'.format(
+ self.base_url, path, '/' if not os.path.splitext(path)[1] else ''
+ )
+
+ def test_basic(self):
+ self.assertFalse(self.parser.disallow_all)
+ self.assertFalse(self.parser.allow_all)
+ self.assertGreater(self.parser.mtime(), 0)
+ self.assertFalse(self.parser.crawl_delay('*'))
+ self.assertFalse(self.parser.request_rate('*'))
+
+ def test_can_fetch(self):
+ self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere')))
+ self.assertFalse(self.parser.can_fetch('Nutch', self.base_url))
+ self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian')))
+ self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats')))
+ self.assertFalse(self.parser.can_fetch('*', self.url('webstats')))
+ self.assertTrue(self.parser.can_fetch('*', self.base_url))
+
+ def test_read_404(self):
+ parser = urllib.robotparser.RobotFileParser(self.url('i-robot.txt'))
+ parser.read()
+ self.assertTrue(parser.allow_all)
+ self.assertFalse(parser.disallow_all)
+ self.assertEqual(parser.mtime(), 0)
if __name__=='__main__':
unittest.main()