summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2006-04-02 20:45:34 (GMT)
committerGeorg Brandl <georg@python.org>2006-04-02 20:45:34 (GMT)
commit720096a6bffe00e05aa3811c0f7490249903bd3f (patch)
treee8d62adfdf5396a28d3bb2be89417babee92aa09
parent4eb521e595216a406ad1d3175056dc8cd8be157b (diff)
downloadcpython-720096a6bffe00e05aa3811c0f7490249903bd3f.zip
cpython-720096a6bffe00e05aa3811c0f7490249903bd3f.tar.gz
cpython-720096a6bffe00e05aa3811c0f7490249903bd3f.tar.bz2
Patch #1462790: fix urllib2 ProxyHandler for host:port proxies
-rw-r--r--Lib/test/test_urllib2.py24
-rw-r--r--Lib/urllib2.py108
2 files changed, 110 insertions, 22 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 7e0bbf0..c79a733 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -13,8 +13,7 @@ from urllib2 import Request, OpenerDirector
# parse_keqv_list, parse_http_list (I'm leaving this for Anthony Baxter
# and Greg Stein, since they're doing Digest Authentication)
# Authentication stuff (ditto)
-# ProxyHandler, CustomProxy, CustomProxyHandler (I don't use a proxy)
-# GopherHandler (haven't used gopher for a decade or so...)
+# CustomProxy, CustomProxyHandler
class TrivialTests(unittest.TestCase):
def test_trivial(self):
@@ -90,6 +89,7 @@ class FakeMethod:
return self.handle(self.meth_name, self.action, *args)
class MockHandler:
+ handler_order = 500
def __init__(self, methods):
self._define_methods(methods)
def _define_methods(self, methods):
@@ -154,7 +154,7 @@ def add_ordered_mock_handlers(opener, meth_spec):
for meths in meth_spec:
class MockHandlerSubclass(MockHandler): pass
h = MockHandlerSubclass(meths)
- h.handler_order = count
+ h.handler_order += count
h.add_parent(opener)
count = count + 1
handlers.append(h)
@@ -642,6 +642,23 @@ class HandlerTests(unittest.TestCase):
o.open("http://www.example.com/")
self.assert_(not hh.req.has_header("Cookie"))
+ def test_proxy(self):
+ o = OpenerDirector()
+ ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
+ o.add_handler(ph)
+ meth_spec = [
+ [("http_open", "return response")]
+ ]
+ handlers = add_ordered_mock_handlers(o, meth_spec)
+
+ req = Request("http://acme.example.com/")
+ self.assertEqual(req.get_host(), "acme.example.com")
+ r = o.open(req)
+ self.assertEqual(req.get_host(), "proxy.example.com:3128")
+
+ self.assertEqual([(handlers[0], "http_open")],
+ [tup[0:2] for tup in o.calls])
+
class MiscTests(unittest.TestCase):
@@ -827,6 +844,7 @@ class NetworkTests(unittest.TestCase):
def test_main(verbose=None):
+ test_support.run_doctest(urllib2, verbose)
tests = (TrivialTests,
OpenerDirectorTests,
HandlerTests,
diff --git a/Lib/urllib2.py b/Lib/urllib2.py
index 21f916c..91bcc2b 100644
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -119,7 +119,8 @@ from urllib import (unwrap, unquote, splittype, splithost, quote,
# support for FileHandler, proxies via environment variables
from urllib import localhost, url2pathname, getproxies
-__version__ = "2.5"
+# used in User-Agent header sent
+__version__ = sys.version[:3]
_opener = None
def urlopen(url, data=None):
@@ -563,6 +564,80 @@ class HTTPRedirectHandler(BaseHandler):
"lead to an infinite loop.\n" \
"The last 30x error message was:\n"
+
+def _parse_proxy(proxy):
+ """Return (scheme, user, password, host/port) given a URL or an authority.
+
+ If a URL is supplied, it must have an authority (host:port) component.
+ According to RFC 3986, having an authority component means the URL must
+ have two slashes after the scheme:
+
+ >>> _parse_proxy('file:/ftp.example.com/')
+ Traceback (most recent call last):
+ ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+ The first three items of the returned tuple may be None.
+
+ Examples of authority parsing:
+
+ >>> _parse_proxy('proxy.example.com')
+ (None, None, None, 'proxy.example.com')
+ >>> _parse_proxy('proxy.example.com:3128')
+ (None, None, None, 'proxy.example.com:3128')
+
+ The authority component may optionally include userinfo (assumed to be
+ username:password):
+
+ >>> _parse_proxy('joe:password@proxy.example.com')
+ (None, 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('joe:password@proxy.example.com:3128')
+ (None, 'joe', 'password', 'proxy.example.com:3128')
+
+ Same examples, but with URLs instead:
+
+ >>> _parse_proxy('http://proxy.example.com/')
+ ('http', None, None, 'proxy.example.com')
+ >>> _parse_proxy('http://proxy.example.com:3128/')
+ ('http', None, None, 'proxy.example.com:3128')
+ >>> _parse_proxy('http://joe:password@proxy.example.com/')
+ ('http', 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+ ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+ Everything after the authority is ignored:
+
+ >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+ ('ftp', 'joe', 'password', 'proxy.example.com')
+
+ Test for no trailing '/' case:
+
+ >>> _parse_proxy('http://joe:password@proxy.example.com')
+ ('http', 'joe', 'password', 'proxy.example.com')
+
+ """
+ from urlparse import _splitnetloc
+ scheme, r_scheme = splittype(proxy)
+ if not r_scheme.startswith("/"):
+ # authority
+ scheme = None
+ authority = proxy
+ else:
+ # URL
+ if not r_scheme.startswith("//"):
+ raise ValueError("proxy URL with no authority: %r" % proxy)
+ # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+ # and 3.3.), path is empty or starts with '/'
+ end = r_scheme.find("/", 2)
+ if end == -1:
+ end = None
+ authority = r_scheme[2:end]
+ userinfo, hostport = splituser(authority)
+ if userinfo is not None:
+ user, password = splitpasswd(userinfo)
+ else:
+ user = password = None
+ return scheme, user, password, hostport
+
class ProxyHandler(BaseHandler):
# Proxies must be in front
handler_order = 100
@@ -579,30 +654,25 @@ class ProxyHandler(BaseHandler):
def proxy_open(self, req, proxy, type):
orig_type = req.get_type()
- type, r_type = splittype(proxy)
- if not type or r_type.isdigit():
- # proxy is specified without protocol
- type = orig_type
- host = proxy
- else:
- host, r_host = splithost(r_type)
- user_pass, host = splituser(host)
- user, password = splitpasswd(user_pass)
+ proxy_type, user, password, hostport = _parse_proxy(proxy)
+ if proxy_type is None:
+ proxy_type = orig_type
if user and password:
- user, password = user_pass.split(':', 1)
- user_pass = base64.encodestring('%s:%s' % (unquote(user),
- unquote(password))).strip()
- req.add_header('Proxy-authorization', 'Basic ' + user_pass)
- host = unquote(host)
- req.set_proxy(host, type)
- if orig_type == type:
+ user_pass = '%s:%s' % (unquote(user), unquote(password))
+ creds = base64.encodestring(user_pass).strip()
+ req.add_header('Proxy-authorization', 'Basic ' + creds)
+ hostport = unquote(hostport)
+ req.set_proxy(hostport, proxy_type)
+ if orig_type == proxy_type:
# let other handlers take care of it
- # XXX this only makes sense if the proxy is before the
- # other handlers
return None
else:
# need to start over, because the other handlers don't
# grok the proxy's URL type
+ # e.g. if we have a constructor arg proxies like so:
+ # {'http': 'ftp://proxy.example.com'}, we may end up turning
+ # a request for http://acme.example.com/a into one for
+ # ftp://proxy.example.com/a
return self.parent.open(req)
# feature suggested by Duncan Booth