diff options
author | Jeremy Hylton <jeremy@alum.mit.edu> | 2008-07-18 20:59:44 (GMT) |
---|---|---|
committer | Jeremy Hylton <jeremy@alum.mit.edu> | 2008-07-18 20:59:44 (GMT) |
commit | 73fd46d24e45c34f0fb87261e5471584a7c273df (patch) | |
tree | cf36eca08149e5fe933a90b71e7b3b3a1521305a /Lib/test/test_robotparser.py | |
parent | 48577d1944c6b03be12bd7b144eb22db6bd6d296 (diff) | |
download | cpython-73fd46d24e45c34f0fb87261e5471584a7c273df.zip cpython-73fd46d24e45c34f0fb87261e5471584a7c273df.tar.gz cpython-73fd46d24e45c34f0fb87261e5471584a7c273df.tar.bz2 |
Bug 3347: robotparser failed because it didn't convert bytes to string.
The solution is to convert bytes to text via utf-8. I'm not entirely
sure if this is safe, but it looks like robots.txt is expected to be
ascii.
Diffstat (limited to 'Lib/test/test_robotparser.py')
-rw-r--r-- | Lib/test/test_robotparser.py | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index fbb02bc..f02f986 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -136,8 +136,9 @@ bad = [] # Bug report says "/" should be denied, but that is not in the RFC RobotTest(7, doc, good, bad) -class TestCase(unittest.TestCase): - def runTest(self): +class NetworkTestCase(unittest.TestCase): + + def testPasswordProtectedSite(self): support.requires('network') # whole site is password-protected. url = 'http://mueblesmoraleda.com' @@ -146,9 +147,17 @@ class TestCase(unittest.TestCase): parser.read() self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False) + def testPythonOrg(self): + support.requires('network') + parser = urllib.robotparser.RobotFileParser( + "http://www.python.org/robots.txt") + parser.read() + self.assertTrue(parser.can_fetch("*", + "http://www.python.org/robots.txt")) + def test_main(): + support.run_unittest(NetworkTestCase) support.run_unittest(tests) - TestCase().run() if __name__=='__main__': support.Verbose = 1 |