summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSenthil Kumaran <orsenthil@gmail.com>2010-07-28 16:27:56 (GMT)
committerSenthil Kumaran <orsenthil@gmail.com>2010-07-28 16:27:56 (GMT)
commit3f8ab965f722b3bda679c9271fb8907e2bbcdc64 (patch)
tree851810d60ce71d36a5d67bc1e404da7bc36e9ddd /Lib
parent96a60ae90c291d94c058c80351fa38b6d73eda92 (diff)
downloadcpython-3f8ab965f722b3bda679c9271fb8907e2bbcdc64.zip
cpython-3f8ab965f722b3bda679c9271fb8907e2bbcdc64.tar.gz
cpython-3f8ab965f722b3bda679c9271fb8907e2bbcdc64.tar.bz2
Fix Issue6325 - robotparse to honor urls with query strings.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_robotparser.py11
-rw-r--r--Lib/urllib/robotparser.py6
2 files changed, 15 insertions, 2 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index 4c3b536..9d30405 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -205,6 +205,17 @@ bad = ['/folder1/anotherfile.html']
RobotTest(13, doc, good, bad, agent="googlebot")
+# 14. For issue #6325 (query string support)
+doc = """
+User-agent: *
+Disallow: /some/path?name=value
+"""
+
+good = ['/some/path']
+bad = ['/some/path?name=value']
+
+RobotTest(14, doc, good, bad)
+
class NetworkTestCase(unittest.TestCase):
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index bafb611..30baa05 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -129,8 +129,10 @@ class RobotFileParser:
return True
# search for given user agent matches
# the first match counts
- url = urllib.parse.quote(
- urllib.parse.urlparse(urllib.parse.unquote(url))[2])
+ parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
+ url = urllib.parse.urlunparse(('','',parsed_url.path,
+ parsed_url.params,parsed_url.query, parsed_url.fragment))
+ url = urllib.parse.quote(url)
if not url:
url = "/"
for entry in self.entries: