diff options
author | Senthil Kumaran <senthil@uthcode.com> | 2013-05-29 12:58:47 (GMT) |
---|---|---|
committer | Senthil Kumaran <senthil@uthcode.com> | 2013-05-29 12:58:47 (GMT) |
commit | 2c4810efa2421c1a3e0042888b71193a917b39c5 (patch) | |
tree | 462411035677ad245fc32db3b0fd2eabab22b2c0 /Lib/robotparser.py | |
parent | 1ab29e78f9e61c86879fa8884a543022ea43112d (diff) | |
download | cpython-2c4810efa2421c1a3e0042888b71193a917b39c5.zip cpython-2c4810efa2421c1a3e0042888b71193a917b39c5.tar.gz cpython-2c4810efa2421c1a3e0042888b71193a917b39c5.tar.bz2 |
#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.
This helps in handling certain types invalid urls in a conservative manner.
Diffstat (limited to 'Lib/robotparser.py')
-rw-r--r-- | Lib/robotparser.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py index 1722863..ad3be94 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -160,6 +160,7 @@ class RuleLine: if path == '' and not allowance: # an empty value means allow all allowance = True + path = urlparse.urlunparse(urlparse.urlparse(path)) self.path = urllib.quote(path) self.allowance = allowance |