summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2003-04-24 15:32:12 (GMT)
committerRaymond Hettinger <python@rcn.com>2003-04-24 15:32:12 (GMT)
commit024aaa1bfe27cbc47c2a7f88c21c9bb1b6605191 (patch)
tree4281503f96d6e650cd0338bfbbf58c5479f9dc51
parent1d5854fdc49073a3e49799192bc762ae0a68b64f (diff)
downloadcpython-024aaa1bfe27cbc47c2a7f88c21c9bb1b6605191.zip
cpython-024aaa1bfe27cbc47c2a7f88c21c9bb1b6605191.tar.gz
cpython-024aaa1bfe27cbc47c2a7f88c21c9bb1b6605191.tar.bz2
SF Patch 549151: urllib2 POSTs on redirect
(contributed by John J Lee)
-rw-r--r--Doc/lib/liburllib.tex18
-rw-r--r--Doc/lib/liburllib2.tex40
-rw-r--r--Lib/urllib.py4
-rw-r--r--Lib/urllib2.py38
4 files changed, 85 insertions, 15 deletions
diff --git a/Doc/lib/liburllib.tex b/Doc/lib/liburllib.tex
index c458616..a36df6b 100644
--- a/Doc/lib/liburllib.tex
+++ b/Doc/lib/liburllib.tex
@@ -254,12 +254,18 @@ actually retrieve a resource at an \file{https:} URL.
\begin{classdesc}{FancyURLopener}{...}
\class{FancyURLopener} subclasses \class{URLopener} providing default
-handling for the following HTTP response codes: 301, 302 or 401. For
-301 and 302 response codes, the \mailheader{Location} header is used to
-fetch the actual URL. For 401 response codes (authentication
-required), basic HTTP authentication is performed. For 301 and 302 response
-codes, recursion is bounded by the value of the \var{maxtries} attribute,
-which defaults 10.
+handling for the following HTTP response codes: 301, 302, 303 and 401.
+For 301, 302 and 303 response codes, the \mailheader{Location} header
+is used to fetch the actual URL. For 401 response codes
+(authentication required), basic HTTP authentication is performed.
+For 301, 302 and 303 response codes, recursion is bounded by the value
+of the \var{maxtries} attribute, which defaults 10.
+
+\note{According to the letter of \rfc{2616}, 301 and 302 responses to
+ POST requests must not be automatically redirected without
+ confirmation by the user. In reality, browsers do allow automatic
+ redirection of these responses, changing the POST to a GET, and
+ \module{urllib} reproduces this behaviour.}
The parameters to the constructor are the same as those for
\class{URLopener}.
diff --git a/Doc/lib/liburllib2.tex b/Doc/lib/liburllib2.tex
index 02a3cc3..9f279df 100644
--- a/Doc/lib/liburllib2.tex
+++ b/Doc/lib/liburllib2.tex
@@ -217,6 +217,12 @@ by all handlers except HTTP handlers --- and there it should be an
request to be \code{POST} rather than \code{GET}.
\end{methoddesc}
+\begin{methoddesc}[Request]{get_method}{}
+Return a string indicating the HTTP request method. This is only
+meaningful for HTTP requests, and currently always takes one of the
+values ("GET", "POST").
+\end{methoddesc}
+
\begin{methoddesc}[Request]{has_data}{}
Return whether the instance has a non-\code{None} data.
\end{methoddesc}
@@ -394,25 +400,49 @@ Arguments, return values and exceptions raised should be the same as
for \method{http_error_default()}.
\end{methoddesc}
-
\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
-\note{303 redirection is not supported by this version of
-\module{urllib2}.}
+\note{Some HTTP redirections require action from this module's client
+ code. If this is the case, \exception{HTTPError} is raised. See
+ \rfc{2616} for details of the precise meanings of the various
+ redirection codes.}
+
+\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
+ fp, code, msg, hdrs}
+Return a \class{Request} or \code{None} in response to a redirect.
+This is called by the default implementations of the
+\code{http_error_30x()} methods when a redirection is received from
+the server. If a redirection should take place, return a new
+\class{Request} to allow \code{http_error_30x()} to perform the
+redirect. Otherwise, raise \exception{HTTPError} if no other
+\class{Handler} should try to handle this URL, or return \code{None}
+if you can't but another \class{Handler} might.
+
+\note{The default implementation of this method does not strictly
+ follow \rfc{2616}: it allows automatic 302 redirection of POST
+ requests, because essentially all HTTP clients do this.}
+
+\end{methoddesc}
+
\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
fp, code, msg, hdrs}
Redirect to the \code{Location:} URL. This method is called by
the parent \class{OpenerDirector} when getting an HTTP
-permanent-redirect response.
+`moved permanently' response.
\end{methoddesc}
\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
fp, code, msg, hdrs}
The same as \method{http_error_301()}, but called for the
-temporary-redirect response.
+`found' response.
\end{methoddesc}
+\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
+ fp, code, msg, hdrs}
+The same as \method{http_error_301()}, but called for the
+`see other' redirect response.
+\end{methoddesc}
\subsection{ProxyHandler Objects \label{proxy-handler}}
diff --git a/Lib/urllib.py b/Lib/urllib.py
index bd8347e..42851ee 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -586,6 +586,10 @@ class FancyURLopener(URLopener):
"""Error 301 -- also relocated (permanently)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+ def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 303 -- also relocated (essentially identical to 302)."""
+ return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 401 -- authentication required.
See this URL for a description of the basic authentication scheme:
diff --git a/Lib/urllib2.py b/Lib/urllib2.py
index f189b39..b6b2ac6 100644
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -11,8 +11,8 @@ option. The OpenerDirector is a composite object that invokes the
Handlers needed to open the requested URL. For example, the
HTTPHandler performs HTTP GET and POST requests and deals with
non-error returns. The HTTPRedirectHandler automatically deals with
-HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
-with digest authentication.
+HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
+deals with digest authentication.
urlopen(url, data=None) -- basic usage is that same as original
urllib. pass the url and optionally data to post to an HTTP URL, and
@@ -207,6 +207,12 @@ class Request:
return getattr(self, attr)
raise AttributeError, attr
+ def get_method(self):
+ if self.has_data():
+ return "POST"
+ else:
+ return "GET"
+
def add_data(self, data):
self.data = data
@@ -402,6 +408,26 @@ class HTTPDefaultErrorHandler(BaseHandler):
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
class HTTPRedirectHandler(BaseHandler):
+ def redirect_request(self, req, fp, code, msg, headers):
+ """Return a Request or None in response to a redirect.
+
+ This is called by the http_error_30x methods when a redirection
+ response is received. If a redirection should take place, return a new
+ Request to allow http_error_30x to perform the redirect. Otherwise,
+ raise HTTPError if no-one else should try to handle this url. Return
+ None if you can't but another Handler might.
+
+ """
+ if (code in (301, 302, 303, 307) and req.method() in ("GET", "HEAD") or
+ code in (302, 303) and req.method() == "POST"):
+ # Strictly (according to RFC 2616), 302 in response to a POST
+ # MUST NOT cause a redirection without confirmation from the user
+ # (of urllib2, in this case). In practice, essentially all clients
+ # do redirect in this case, so we do the same.
+ return Request(newurl, headers=req.headers)
+ else:
+ raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
+
# Implementation note: To avoid the server sending us into an
# infinite loop, the request object needs to track what URLs we
# have already seen. Do this by adding a handler-specific
@@ -418,7 +444,11 @@ class HTTPRedirectHandler(BaseHandler):
# XXX Probably want to forget about the state of the current
# request, although that might interact poorly with other
# handlers that also use handler-specific request attributes
- new = Request(newurl, req.get_data(), req.headers)
+ new = self.redirect_request(req, fp, code, msg, headers)
+ if new is None:
+ return
+
+ # loop detection
new.error_302_dict = {}
if hasattr(req, 'error_302_dict'):
if len(req.error_302_dict)>10 or \
@@ -435,7 +465,7 @@ class HTTPRedirectHandler(BaseHandler):
return self.parent.open(new)
- http_error_301 = http_error_302
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
inf_msg = "The HTTP server returned a redirect error that would" \
"lead to an infinite loop.\n" \