summaryrefslogtreecommitdiffstats
path: root/Lib/urllib2.py
diff options
context:
space:
mode:
authorJeremy Hylton <jeremy@alum.mit.edu>2003-05-04 23:44:49 (GMT)
committerJeremy Hylton <jeremy@alum.mit.edu>2003-05-04 23:44:49 (GMT)
commit828023b6b5d4d84a3fc01e9907142662a33d45a6 (patch)
tree1f7797a585fb80a1e618fd115d14e827190586fa /Lib/urllib2.py
parent258dfebfb4286afe176cce1d6f6245d53112fc1a (diff)
downloadcpython-828023b6b5d4d84a3fc01e9907142662a33d45a6.zip
cpython-828023b6b5d4d84a3fc01e9907142662a33d45a6.tar.gz
cpython-828023b6b5d4d84a3fc01e9907142662a33d45a6.tar.bz2
Repair redirect handling and raise URLError on host-not-found.
The latest changes to the redirect handler couldn't possibly have been tested, because they did not compute a newurl and failed with a NameError. The __name__ == "__main__": block has a test for redirects. Also, fix SF bug 723831. A urlopen() that failed because the host was not found raised a socket.gaierror unlike earlier versions of urllib2. The problem is that httplib actually establishes the connection at a different point starting with Python 2.2. Move the try/except to endheaders(), which is where the connection gets established.
Diffstat (limited to 'Lib/urllib2.py')
-rw-r--r--Lib/urllib2.py58
1 files changed, 36 insertions, 22 deletions
diff --git a/Lib/urllib2.py b/Lib/urllib2.py
index b6b2ac6..75583e3 100644
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -416,14 +416,25 @@ class HTTPRedirectHandler(BaseHandler):
Request to allow http_error_30x to perform the redirect. Otherwise,
raise HTTPError if no-one else should try to handle this url. Return
None if you can't but another Handler might.
-
"""
- if (code in (301, 302, 303, 307) and req.method() in ("GET", "HEAD") or
- code in (302, 303) and req.method() == "POST"):
- # Strictly (according to RFC 2616), 302 in response to a POST
- # MUST NOT cause a redirection without confirmation from the user
- # (of urllib2, in this case). In practice, essentially all clients
- # do redirect in this case, so we do the same.
+ # XXX 301 and 302 errors must have a location or uri header.
+ # Not sure about the other error codes.
+ if "location" in headers:
+ newurl = headers["location"]
+ elif "uri" in headers:
+ newurl = headers["uri"]
+ else:
+ return
+ newurl = urlparse.urljoin(req.get_full_url(), newurl)
+
+ m = req.get_method()
+ if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+ or code in (302, 303) and m == "POST"):
+ # Strictly (according to RFC 2616), 302 in response to a
+ # POST MUST NOT cause a redirection without confirmation
+ # from the user (of urllib2, in this case). In practice,
+ # essentially all clients do redirect in this case, so we
+ # do the same.
return Request(newurl, headers=req.headers)
else:
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
@@ -777,25 +788,25 @@ def encode_digest(digest):
class AbstractHTTPHandler(BaseHandler):
+ # XXX Should rewrite do_open() to use the new httplib interface,
+ # would would be a little simpler.
+
def do_open(self, http_class, req):
host = req.get_host()
if not host:
raise URLError('no host given')
- try:
- h = http_class(host) # will parse host:port
- if req.has_data():
- data = req.get_data()
- h.putrequest('POST', req.get_selector())
- if not 'Content-type' in req.headers:
- h.putheader('Content-type',
- 'application/x-www-form-urlencoded')
- if not 'Content-length' in req.headers:
- h.putheader('Content-length', '%d' % len(data))
- else:
- h.putrequest('GET', req.get_selector())
- except socket.error, err:
- raise URLError(err)
+ h = http_class(host) # will parse host:port
+ if req.has_data():
+ data = req.get_data()
+ h.putrequest('POST', req.get_selector())
+ if not 'Content-type' in req.headers:
+ h.putheader('Content-type',
+ 'application/x-www-form-urlencoded')
+ if not 'Content-length' in req.headers:
+ h.putheader('Content-length', '%d' % len(data))
+ else:
+ h.putrequest('GET', req.get_selector())
scheme, sel = splittype(req.get_selector())
sel_host, sel_path = splithost(sel)
@@ -806,7 +817,10 @@ class AbstractHTTPHandler(BaseHandler):
h.putheader(*args)
for k, v in req.headers.items():
h.putheader(k, v)
- h.endheaders()
+ try:
+ h.endheaders()
+ except socket.error, err:
+ raise URLError(err)
if req.has_data():
h.send(data)