diff options
author | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-28 16:27:56 (GMT) |
---|---|---|
committer | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-28 16:27:56 (GMT) |
commit | 3f8ab965f722b3bda679c9271fb8907e2bbcdc64 (patch) | |
tree | 851810d60ce71d36a5d67bc1e404da7bc36e9ddd /Lib/urllib/robotparser.py | |
parent | 96a60ae90c291d94c058c80351fa38b6d73eda92 (diff) | |
download | cpython-3f8ab965f722b3bda679c9271fb8907e2bbcdc64.zip cpython-3f8ab965f722b3bda679c9271fb8907e2bbcdc64.tar.gz cpython-3f8ab965f722b3bda679c9271fb8907e2bbcdc64.tar.bz2 |
Fix Issue6325 - robotparse to honor urls with query strings.
Diffstat (limited to 'Lib/urllib/robotparser.py')
-rw-r--r-- | Lib/urllib/robotparser.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index bafb611..30baa05 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -129,8 +129,10 @@ class RobotFileParser: return True # search for given user agent matches # the first match counts - url = urllib.parse.quote( - urllib.parse.urlparse(urllib.parse.unquote(url))[2]) + parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) + url = urllib.parse.urlunparse(('','',parsed_url.path, + parsed_url.params,parsed_url.query, parsed_url.fragment)) + url = urllib.parse.quote(url) if not url: url = "/" for entry in self.entries: |