summaryrefslogtreecommitdiffstats
path: root/Lib/urllib2.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib2.py')
-rw-r--r--Lib/urllib2.py79
1 files changed, 52 insertions, 27 deletions
diff --git a/Lib/urllib2.py b/Lib/urllib2.py
index ec01c8f..cdb3a22 100644
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -85,23 +85,18 @@ f = urllib2.urlopen('http://www.python.org/')
# abstract factory for opener
import base64
-import ftplib
+import hashlib
import httplib
-import inspect
-import md5
-import mimetypes
import mimetools
import os
import posixpath
import random
import re
-import sha
import socket
import sys
import time
import urlparse
import bisect
-import cookielib
try:
from cStringIO import StringIO
@@ -169,6 +164,23 @@ class HTTPError(URLError, addinfourl):
class GopherError(URLError):
pass
+# copied from cookielib.py
+_cut_port_re = re.compile(r":\d+$")
+def request_host(request):
+ """Return request-host, as defined by RFC 2965.
+
+ Variation from RFC: returned value is lowercased, for convenient
+ comparison.
+
+ """
+ url = request.get_full_url()
+ host = urlparse.urlparse(url)[1]
+ if host == "":
+ host = request.get_header("Host", "")
+
+ # remove port, if present
+ host = _cut_port_re.sub("", host, 1)
+ return host.lower()
class Request:
@@ -186,7 +198,7 @@ class Request:
self.add_header(key, value)
self.unredirected_hdrs = {}
if origin_req_host is None:
- origin_req_host = cookielib.request_host(self)
+ origin_req_host = request_host(self)
self.origin_req_host = origin_req_host
self.unverifiable = unverifiable
@@ -414,6 +426,9 @@ def build_opener(*handlers):
If any of the handlers passed as arguments are subclasses of the
default handlers, the default handlers will not be used.
"""
+ import types
+ def isclass(obj):
+ return isinstance(obj, types.ClassType) or hasattr(obj, "__bases__")
opener = OpenerDirector()
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
@@ -424,7 +439,7 @@ def build_opener(*handlers):
skip = []
for klass in default_classes:
for check in handlers:
- if inspect.isclass(check):
+ if isclass(check):
if issubclass(check, klass):
skip.append(klass)
elif isinstance(check, klass):
@@ -436,7 +451,7 @@ def build_opener(*handlers):
opener.add_handler(klass())
for h in handlers:
- if inspect.isclass(h):
+ if isclass(h):
h = h()
opener.add_handler(h)
return opener
@@ -612,7 +627,6 @@ def _parse_proxy(proxy):
('http', 'joe', 'password', 'proxy.example.com')
"""
- from urlparse import _splitnetloc
scheme, r_scheme = splittype(proxy)
if not r_scheme.startswith("/"):
# authority
@@ -673,6 +687,7 @@ class ProxyHandler(BaseHandler):
return self.parent.open(req)
class HTTPPasswordMgr:
+
def __init__(self):
self.passwd = {}
@@ -696,10 +711,15 @@ class HTTPPasswordMgr:
def reduce_uri(self, uri):
"""Accept netloc or URI and extract only the netloc and path"""
- parts = urlparse.urlparse(uri)
+ parts = urlparse.urlsplit(uri)
if parts[1]:
+ # URI
return parts[1], parts[2] or '/'
+ elif parts[0]:
+ # host:port
+ return uri, '/'
else:
+ # host
return parts[2], '/'
def is_suburi(self, base, test):
@@ -742,6 +762,8 @@ class AbstractBasicAuthHandler:
self.add_password = self.passwd.add_password
def http_error_auth_reqed(self, authreq, host, req, headers):
+ # host may be an authority (without userinfo) or a URL with an
+ # authority
# XXX could be multiple headers
authreq = headers.get(authreq, None)
if authreq:
@@ -752,10 +774,7 @@ class AbstractBasicAuthHandler:
return self.retry_http_basic_auth(host, req, realm)
def retry_http_basic_auth(self, host, req, realm):
- # TODO(jhylton): Remove the host argument? It depends on whether
- # retry_http_basic_auth() is consider part of the public API.
- # It probably is.
- user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+ user, pw = self.passwd.find_user_password(realm, host)
if pw is not None:
raw = "%s:%s" % (user, pw)
auth = 'Basic %s' % base64.encodestring(raw).strip()
@@ -766,14 +785,15 @@ class AbstractBasicAuthHandler:
else:
return None
+
class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
auth_header = 'Authorization'
def http_error_401(self, req, fp, code, msg, headers):
- host = urlparse.urlparse(req.get_full_url())[1]
+ url = req.get_full_url()
return self.http_error_auth_reqed('www-authenticate',
- host, req, headers)
+ url, req, headers)
class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
@@ -781,9 +801,13 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
auth_header = 'Proxy-authorization'
def http_error_407(self, req, fp, code, msg, headers):
- host = req.get_host()
+ # http_error_auth_reqed requires that there is no userinfo component in
+ # authority. Assume there isn't one, since urllib2 does not (and
+ # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+ # userinfo.
+ authority = req.get_host()
return self.http_error_auth_reqed('proxy-authenticate',
- host, req, headers)
+ authority, req, headers)
def randombytes(n):
@@ -838,9 +862,6 @@ class AbstractDigestAuthHandler:
scheme = authreq.split()[0]
if scheme.lower() == 'digest':
return self.retry_http_digest_auth(req, authreq)
- else:
- raise ValueError("AbstractDigestAuthHandler doesn't know "
- "about %s"%(scheme))
def retry_http_digest_auth(self, req, auth):
token, challenge = auth.split(' ', 1)
@@ -850,7 +871,7 @@ class AbstractDigestAuthHandler:
auth_val = 'Digest %s' % auth
if req.headers.get(self.auth_header, None) == auth_val:
return None
- req.add_header(self.auth_header, auth_val)
+ req.add_unredirected_header(self.auth_header, auth_val)
resp = self.parent.open(req)
return resp
@@ -860,8 +881,8 @@ class AbstractDigestAuthHandler:
# and server to avoid chosen plaintext attacks, to provide mutual
# authentication, and to provide some message integrity protection.
# This isn't a fabulous effort, but it's probably Good Enough.
- dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
- randombytes(8))).hexdigest()
+ dig = hashlib.sha1("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
+ randombytes(8))).hexdigest()
return dig[:16]
def get_authorization(self, req, chal):
@@ -923,9 +944,9 @@ class AbstractDigestAuthHandler:
def get_algorithm_impls(self, algorithm):
# lambdas assume digest modules are imported at the top level
if algorithm == 'MD5':
- H = lambda x: md5.new(x).hexdigest()
+ H = lambda x: hashlib.md5(x).hexdigest()
elif algorithm == 'SHA':
- H = lambda x: sha.new(x).hexdigest()
+ H = lambda x: hashlib.sha1(x).hexdigest()
# XXX MD5-sess
KD = lambda s, d: H("%s:%s" % (s, d))
return H, KD
@@ -1066,6 +1087,7 @@ if hasattr(httplib, 'HTTPS'):
class HTTPCookieProcessor(BaseHandler):
def __init__(self, cookiejar=None):
+ import cookielib
if cookiejar is None:
cookiejar = cookielib.CookieJar()
self.cookiejar = cookiejar
@@ -1163,6 +1185,7 @@ class FileHandler(BaseHandler):
# not entirely sure what the rules are here
def open_local_file(self, req):
import email.Utils
+ import mimetypes
host = req.get_host()
file = req.get_selector()
localfile = url2pathname(file)
@@ -1183,6 +1206,8 @@ class FileHandler(BaseHandler):
class FTPHandler(BaseHandler):
def ftp_open(self, req):
+ import ftplib
+ import mimetypes
host = req.get_host()
if not host:
raise IOError, ('ftp error', 'no host given')