From a4f79f97db7920387d6c7704a2b212d6b1503d9d Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Wed, 28 Jul 2010 16:35:35 +0000 Subject: Merged revisions 83209 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r83209 | senthil.kumaran | 2010-07-28 21:57:56 +0530 (Wed, 28 Jul 2010) | 3 lines Fix Issue6325 - robotparse to honor urls with query strings. ........ --- Lib/robotparser.py | 7 ++++++- Lib/test/test_robotparser.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Lib/robotparser.py b/Lib/robotparser.py index 447563f..730426f 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -131,7 +131,12 @@ class RobotFileParser: return True # search for given user agent matches # the first match counts - url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/" + parsed_url = urlparse.urlparse(urllib.unquote(url)) + url = urlparse.urlunparse(('', '', parsed_url.path, + parsed_url.params, parsed_url.query, parsed_url.fragment)) + url = urllib.quote(url) + if not url: + url = "/" for entry in self.entries: if entry.applies_to(useragent): return entry.allowance(url) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 405d517..0415884 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -202,6 +202,17 @@ bad = ['/folder1/anotherfile.html'] RobotTest(13, doc, good, bad, agent="googlebot") +# 14. For issue #6325 (query string support) +doc = """ +User-agent: * +Disallow: /some/path?name=value +""" + +good = ['/some/path'] +bad = ['/some/path?name=value'] + +RobotTest(14, doc, good, bad) + class NetworkTestCase(unittest.TestCase): -- cgit v0.12