diff options
author | Senthil Kumaran <senthil@uthcode.com> | 2013-05-29 12:57:21 (GMT) |
---|---|---|
committer | Senthil Kumaran <senthil@uthcode.com> | 2013-05-29 12:57:21 (GMT) |
commit | 6b3026ce7299056a3f10b25426cda34d771857ac (patch) | |
tree | 4aab00c18f16f3a9e9f9dc02e616417836caaac5 /Lib/urllib | |
parent | 7b503965a0c19c1a19d244e15b74cb0964f08c3c (diff) | |
parent | c70a6ae49bd162af06130e48a45579d445e058a8 (diff) | |
download | cpython-6b3026ce7299056a3f10b25426cda34d771857ac.zip cpython-6b3026ce7299056a3f10b25426cda34d771857ac.tar.gz cpython-6b3026ce7299056a3f10b25426cda34d771857ac.tar.bz2 |
merge from 3.3
#17403: urllib.parse.robotparser normalizes the urls before adding to
ruleline. This helps in handling certain types invalid urls in a conservative
manner. Patch contributed by Mher Movsisyan.
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/robotparser.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 75be4af..978ba58 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -157,6 +157,7 @@ class RuleLine: if path == '' and not allowance: # an empty value means allow all allowance = True + path = urllib.parse.urlunparse(urllib.parse.urlparse(path)) self.path = urllib.parse.quote(path) self.allowance = allowance |