From 024aaa1bfe27cbc47c2a7f88c21c9bb1b6605191 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 24 Apr 2003 15:32:12 +0000 Subject: SF Patch 549151: urllib2 POSTs on redirect (contributed by John J Lee) --- Doc/lib/liburllib.tex | 18 ++++++++++++------ Doc/lib/liburllib2.tex | 40 +++++++++++++++++++++++++++++++++++----- Lib/urllib.py | 4 ++++ Lib/urllib2.py | 38 ++++++++++++++++++++++++++++++++++---- 4 files changed, 85 insertions(+), 15 deletions(-) diff --git a/Doc/lib/liburllib.tex b/Doc/lib/liburllib.tex index c458616..a36df6b 100644 --- a/Doc/lib/liburllib.tex +++ b/Doc/lib/liburllib.tex @@ -254,12 +254,18 @@ actually retrieve a resource at an \file{https:} URL. \begin{classdesc}{FancyURLopener}{...} \class{FancyURLopener} subclasses \class{URLopener} providing default -handling for the following HTTP response codes: 301, 302 or 401. For -301 and 302 response codes, the \mailheader{Location} header is used to -fetch the actual URL. For 401 response codes (authentication -required), basic HTTP authentication is performed. For 301 and 302 response -codes, recursion is bounded by the value of the \var{maxtries} attribute, -which defaults 10. +handling for the following HTTP response codes: 301, 302, 303 and 401. +For 301, 302 and 303 response codes, the \mailheader{Location} header +is used to fetch the actual URL. For 401 response codes +(authentication required), basic HTTP authentication is performed. +For 301, 302 and 303 response codes, recursion is bounded by the value +of the \var{maxtries} attribute, which defaults 10. + +\note{According to the letter of \rfc{2616}, 301 and 302 responses to + POST requests must not be automatically redirected without + confirmation by the user. In reality, browsers do allow automatic + redirection of these responses, changing the POST to a GET, and + \module{urllib} reproduces this behaviour.} The parameters to the constructor are the same as those for \class{URLopener}. diff --git a/Doc/lib/liburllib2.tex b/Doc/lib/liburllib2.tex index 02a3cc3..9f279df 100644 --- a/Doc/lib/liburllib2.tex +++ b/Doc/lib/liburllib2.tex @@ -217,6 +217,12 @@ by all handlers except HTTP handlers --- and there it should be an request to be \code{POST} rather than \code{GET}. \end{methoddesc} +\begin{methoddesc}[Request]{get_method}{} +Return a string indicating the HTTP request method. This is only +meaningful for HTTP requests, and currently always takes one of the +values ("GET", "POST"). +\end{methoddesc} + \begin{methoddesc}[Request]{has_data}{} Return whether the instance has a non-\code{None} data. \end{methoddesc} @@ -394,25 +400,49 @@ Arguments, return values and exceptions raised should be the same as for \method{http_error_default()}. \end{methoddesc} - \subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}} -\note{303 redirection is not supported by this version of -\module{urllib2}.} +\note{Some HTTP redirections require action from this module's client + code. If this is the case, \exception{HTTPError} is raised. See + \rfc{2616} for details of the precise meanings of the various + redirection codes.} + +\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req, + fp, code, msg, hdrs} +Return a \class{Request} or \code{None} in response to a redirect. +This is called by the default implementations of the +\code{http_error_30x()} methods when a redirection is received from +the server. If a redirection should take place, return a new +\class{Request} to allow \code{http_error_30x()} to perform the +redirect. Otherwise, raise \exception{HTTPError} if no other +\class{Handler} should try to handle this URL, or return \code{None} +if you can't but another \class{Handler} might. + +\note{The default implementation of this method does not strictly + follow \rfc{2616}: it allows automatic 302 redirection of POST + requests, because essentially all HTTP clients do this.} + +\end{methoddesc} + \begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req, fp, code, msg, hdrs} Redirect to the \code{Location:} URL. This method is called by the parent \class{OpenerDirector} when getting an HTTP -permanent-redirect response. +`moved permanently' response. \end{methoddesc} \begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req, fp, code, msg, hdrs} The same as \method{http_error_301()}, but called for the -temporary-redirect response. +`found' response. \end{methoddesc} +\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req, + fp, code, msg, hdrs} +The same as \method{http_error_301()}, but called for the +`see other' redirect response. +\end{methoddesc} \subsection{ProxyHandler Objects \label{proxy-handler}} diff --git a/Lib/urllib.py b/Lib/urllib.py index bd8347e..42851ee 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -586,6 +586,10 @@ class FancyURLopener(URLopener): """Error 301 -- also relocated (permanently).""" return self.http_error_302(url, fp, errcode, errmsg, headers, data) + def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): + """Error 303 -- also relocated (essentially identical to 302).""" + return self.http_error_302(url, fp, errcode, errmsg, headers, data) + def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): """Error 401 -- authentication required. See this URL for a description of the basic authentication scheme: diff --git a/Lib/urllib2.py b/Lib/urllib2.py index f189b39..b6b2ac6 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -11,8 +11,8 @@ option. The OpenerDirector is a composite object that invokes the Handlers needed to open the requested URL. For example, the HTTPHandler performs HTTP GET and POST requests and deals with non-error returns. The HTTPRedirectHandler automatically deals with -HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals -with digest authentication. +HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler +deals with digest authentication. urlopen(url, data=None) -- basic usage is that same as original urllib. pass the url and optionally data to post to an HTTP URL, and @@ -207,6 +207,12 @@ class Request: return getattr(self, attr) raise AttributeError, attr + def get_method(self): + if self.has_data(): + return "POST" + else: + return "GET" + def add_data(self, data): self.data = data @@ -402,6 +408,26 @@ class HTTPDefaultErrorHandler(BaseHandler): raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) class HTTPRedirectHandler(BaseHandler): + def redirect_request(self, req, fp, code, msg, headers): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a redirection + response is received. If a redirection should take place, return a new + Request to allow http_error_30x to perform the redirect. Otherwise, + raise HTTPError if no-one else should try to handle this url. Return + None if you can't but another Handler might. + + """ + if (code in (301, 302, 303, 307) and req.method() in ("GET", "HEAD") or + code in (302, 303) and req.method() == "POST"): + # Strictly (according to RFC 2616), 302 in response to a POST + # MUST NOT cause a redirection without confirmation from the user + # (of urllib2, in this case). In practice, essentially all clients + # do redirect in this case, so we do the same. + return Request(newurl, headers=req.headers) + else: + raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) + # Implementation note: To avoid the server sending us into an # infinite loop, the request object needs to track what URLs we # have already seen. Do this by adding a handler-specific @@ -418,7 +444,11 @@ class HTTPRedirectHandler(BaseHandler): # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other # handlers that also use handler-specific request attributes - new = Request(newurl, req.get_data(), req.headers) + new = self.redirect_request(req, fp, code, msg, headers) + if new is None: + return + + # loop detection new.error_302_dict = {} if hasattr(req, 'error_302_dict'): if len(req.error_302_dict)>10 or \ @@ -435,7 +465,7 @@ class HTTPRedirectHandler(BaseHandler): return self.parent.open(new) - http_error_301 = http_error_302 + http_error_301 = http_error_303 = http_error_307 = http_error_302 inf_msg = "The HTTP server returned a redirect error that would" \ "lead to an infinite loop.\n" \ -- cgit v0.12