diff options
author | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-28 16:30:46 (GMT) |
---|---|---|
committer | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-28 16:30:46 (GMT) |
commit | 42b0c2f74068d74694187d229f65fcce0ae19643 (patch) | |
tree | c2a02aa73ab05d8516352e5e8f4c81810fd2d28b | |
parent | b8f96c16036f34d8e913cca95c5ad07a592b161d (diff) | |
download | cpython-42b0c2f74068d74694187d229f65fcce0ae19643.zip cpython-42b0c2f74068d74694187d229f65fcce0ae19643.tar.gz cpython-42b0c2f74068d74694187d229f65fcce0ae19643.tar.bz2 |
Merged revisions 83209 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r83209 | senthil.kumaran | 2010-07-28 21:57:56 +0530 (Wed, 28 Jul 2010) | 3 lines
Fix Issue6325 - robotparse to honor urls with query strings.
........
-rw-r--r-- | Lib/test/test_robotparser.py | 11 | ||||
-rw-r--r-- | Lib/urllib/robotparser.py | 6 |
2 files changed, 15 insertions, 2 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 4c3b536..9d30405 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -205,6 +205,17 @@ bad = ['/folder1/anotherfile.html'] RobotTest(13, doc, good, bad, agent="googlebot") +# 14. For issue #6325 (query string support) +doc = """ +User-agent: * +Disallow: /some/path?name=value +""" + +good = ['/some/path'] +bad = ['/some/path?name=value'] + +RobotTest(14, doc, good, bad) + class NetworkTestCase(unittest.TestCase): diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index bafb611..30baa05 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -129,8 +129,10 @@ class RobotFileParser: return True # search for given user agent matches # the first match counts - url = urllib.parse.quote( - urllib.parse.urlparse(urllib.parse.unquote(url))[2]) + parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) + url = urllib.parse.urlunparse(('','',parsed_url.path, + parsed_url.params,parsed_url.query, parsed_url.fragment)) + url = urllib.parse.quote(url) if not url: url = "/" for entry in self.entries: |