summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSenthil Kumaran <orsenthil@gmail.com>2010-11-22 04:48:26 (GMT)
committerSenthil Kumaran <orsenthil@gmail.com>2010-11-22 04:48:26 (GMT)
commitc295862ce04c6bf2b1d2ba5b8218f6198c62a241 (patch)
treeceaa20b7dc94b3ecf4ee5bc0446011e6d32ac8a6
parent1e600dc01fa294deb05243378e7419df1b6750ba (diff)
downloadcpython-c295862ce04c6bf2b1d2ba5b8218f6198c62a241.zip
cpython-c295862ce04c6bf2b1d2ba5b8218f6198c62a241.tar.gz
cpython-c295862ce04c6bf2b1d2ba5b8218f6198c62a241.tar.bz2
Fix Issue4493 - urllib2 adds '/' to the path component of url, when it does not
starts with one. This behavior is exhibited by browser and other clients.
-rw-r--r--Lib/test/test_urllib2.py19
-rw-r--r--Lib/urllib/parse.py7
-rw-r--r--Lib/urllib/request.py2
-rw-r--r--Misc/NEWS3
4 files changed, 29 insertions, 2 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index e5e3c39..9cc9697 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -848,6 +848,25 @@ class HandlerTests(unittest.TestCase):
p_ds_req = h.do_request_(ds_req)
self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
+ def test_fixpath_in_weirdurls(self):
+ # Issue4493: urllib2 to supply '/' when to urls where path does not
+ # start with'/'
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ weird_url = 'http://www.python.org?getspam'
+ req = Request(weird_url)
+ newreq = h.do_request_(req)
+ self.assertEqual(newreq.host,'www.python.org')
+ self.assertEqual(newreq.selector,'/?getspam')
+
+ url_without_path = 'http://www.python.org'
+ req = Request(url_without_path)
+ newreq = h.do_request_(req)
+ self.assertEqual(newreq.host,'www.python.org')
+ self.assertEqual(newreq.selector,'')
+
def test_errors(self):
h = urllib.request.HTTPErrorProcessor()
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 2ddd281..78f3084 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -699,7 +699,12 @@ def splithost(url):
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
- if match: return match.group(1, 2)
+ if match:
+ host_port = match.group(1)
+ path = match.group(2)
+ if path and not path.startswith('/'):
+ path = '/' + path
+ return host_port, path
return None, url
_userprog = None
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index f3fb7be..fe66a67 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -105,7 +105,7 @@ from urllib.response import addinfourl, addclosehook
# check for SSL
try:
import ssl
-except:
+except ImportError:
_have_ssl = False
else:
_have_ssl = True
diff --git a/Misc/NEWS b/Misc/NEWS
index b51a644..e10c856 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,9 @@ Core and Builtins
Library
-------
+- Issue #4493: urllib2 adds '/' in front of path components which does not
+ start with '/. Common behavior exhibited by browsers and other clients.
+
- Issue #6378: idle.bat now runs with the appropriate Python version rather than
the system default. Patch by Sridhar Ratnakumar.