diff options
author | Christopher Beacham <mcscope@gmail.com> | 2018-05-16 14:52:07 (GMT) |
---|---|---|
committer | Ned Deily <nad@python.org> | 2018-05-16 14:52:07 (GMT) |
commit | 5db5c0669e624767375593cc1a01f32092c91c58 (patch) | |
tree | 0172f5c0c9ae6879ca21c82c175be1c2b829b4c9 /Lib/test/test_robotparser.py | |
parent | 7a1c02750171d9895754da5d560700aaba93da56 (diff) | |
download | cpython-5db5c0669e624767375593cc1a01f32092c91c58.zip cpython-5db5c0669e624767375593cc1a01f32092c91c58.tar.gz cpython-5db5c0669e624767375593cc1a01f32092c91c58.tar.bz2 |
bpo-21475: Support the Sitemap extension in robotparser (GH-6883)
Diffstat (limited to 'Lib/test/test_robotparser.py')
-rw-r--r-- | Lib/test/test_robotparser.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index bee8d23..84a267a 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -12,6 +12,7 @@ class BaseRobotTest: agent = 'test_robotparser' good = [] bad = [] + site_maps = None def setUp(self): lines = io.StringIO(self.robots_txt).readlines() @@ -36,6 +37,9 @@ class BaseRobotTest: with self.subTest(url=url, agent=agent): self.assertFalse(self.parser.can_fetch(agent, url)) + def test_site_maps(self): + self.assertEqual(self.parser.site_maps(), self.site_maps) + class UserAgentWildcardTest(BaseRobotTest, unittest.TestCase): robots_txt = """\ @@ -65,6 +69,23 @@ Disallow: bad = ['/cyberworld/map/index.html'] +class SitemapTest(BaseRobotTest, unittest.TestCase): + robots_txt = """\ +# robots.txt for http://www.example.com/ + +User-agent: * +Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml +Sitemap: http://www.google.com/hostednews/sitemap_index.xml +Request-rate: 3/15 +Disallow: /cyberworld/map/ # This is an infinite virtual URL space + + """ + good = ['/', '/test.html'] + bad = ['/cyberworld/map/index.html'] + site_maps = ['http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml', + 'http://www.google.com/hostednews/sitemap_index.xml'] + + class RejectAllRobotsTest(BaseRobotTest, unittest.TestCase): robots_txt = """\ # go away |