diff options
author | Senthil Kumaran <orsenthil@gmail.com> | 2010-08-08 11:27:53 (GMT) |
---|---|---|
committer | Senthil Kumaran <orsenthil@gmail.com> | 2010-08-08 11:27:53 (GMT) |
commit | d95cc754836762be6571aa44bcad1a01cf6def1d (patch) | |
tree | b93a7d02f984ab5820ed90705395ac4d1a2e9fc4 | |
parent | ad537f23e2e60b92c67fe01855ec1a0a7479df46 (diff) | |
download | cpython-d95cc754836762be6571aa44bcad1a01cf6def1d.zip cpython-d95cc754836762be6571aa44bcad1a01cf6def1d.tar.gz cpython-d95cc754836762be6571aa44bcad1a01cf6def1d.tar.bz2 |
Fix Issue8280 - urllib2's Request method will remove fragements in the url.
This is how it should work,wget and curl work like this way too. Old behavior was wrong.
-rw-r--r-- | Lib/test/test_urllib2.py | 10 | ||||
-rw-r--r-- | Lib/test/test_urllib2net.py | 7 | ||||
-rw-r--r-- | Lib/urllib/request.py | 3 |
3 files changed, 19 insertions, 1 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index b2f7ea8..02dc83c 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1249,6 +1249,16 @@ class RequestTests(unittest.TestCase): self.assertEqual("www.python.org", self.get.get_origin_req_host()) self.assertEqual("www.perl.org", self.get.get_host()) + def test_wrapped_url(self): + req = Request("<URL:http://www.python.org>") + self.assertEqual("www.python.org", req.get_host()) + + def test_urlwith_fragment(self): + req = Request("http://www.python.org/?qs=query#fragment=true") + self.assertEqual("/?qs=query", req.get_selector()) + req = Request("http://www.python.org/#fun=true") + self.assertEqual("/", req.get_selector()) + def test_main(verbose=None): from test import test_urllib2 diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index ff7c7bf..8b9435a 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -149,6 +149,13 @@ class OtherNetworkTests(unittest.TestCase): ## self._test_urls(urls, self._extra_handlers()+[bauth, dauth]) + def test_urlwithfrag(self): + urlwith_frag = "http://docs.python.org/glossary.html#glossary" + req = urllib.request.Request(urlwith_frag) + res = urllib.request.urlopen(req) + self.assertEqual(res.geturl(), + "http://docs.python.org/glossary.html") + def _test_urls(self, urls, handlers, retry=True): import time import logging diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 012814c..0a083b8 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -99,7 +99,7 @@ from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, splittype, splithost, splitport, splituser, splitpasswd, - splitattr, splitquery, splitvalue, to_bytes, urlunparse) + splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook # check for SSL @@ -163,6 +163,7 @@ class Request: origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.full_url = unwrap(url) + self.full_url, fragment = splittag(self.full_url) self.data = data self.headers = {} self._tunnel_host = None |