summaryrefslogtreecommitdiffstats
path: root/Lib/urllib/robotparser.py
diff options
context:
space:
mode:
authorSenthil Kumaran <senthil@uthcode.com>2013-05-29 12:54:31 (GMT)
committerSenthil Kumaran <senthil@uthcode.com>2013-05-29 12:54:31 (GMT)
commitc70a6ae49bd162af06130e48a45579d445e058a8 (patch)
treececb299ec31a93cda7d9256a7092cd601be0c0b4 /Lib/urllib/robotparser.py
parenteb4c9c77b8257c05b40467651bdc7764295926e8 (diff)
downloadcpython-c70a6ae49bd162af06130e48a45579d445e058a8.zip
cpython-c70a6ae49bd162af06130e48a45579d445e058a8.tar.gz
cpython-c70a6ae49bd162af06130e48a45579d445e058a8.tar.bz2
#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.
This helps in handling certain types invalid urls in a conservative manner.
Diffstat (limited to 'Lib/urllib/robotparser.py')
-rw-r--r--Lib/urllib/robotparser.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 75be4af..978ba58 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -157,6 +157,7 @@ class RuleLine:
if path == '' and not allowance:
# an empty value means allow all
allowance = True
+ path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
self.path = urllib.parse.quote(path)
self.allowance = allowance