diff options
author | Jeremy Hylton <jeremy@alum.mit.edu> | 2002-07-06 18:48:07 (GMT) |
---|---|---|
committer | Jeremy Hylton <jeremy@alum.mit.edu> | 2002-07-06 18:48:07 (GMT) |
commit | d46aa37d35811a37397104f02074c8a44e7dbec1 (patch) | |
tree | c2b6e2bc6c2c1f011f769a59d6687b8d2d8b0859 /Lib/httplib.py | |
parent | 889f8bf259eee088d2d81e3978fbdf34585fc9ae (diff) | |
download | cpython-d46aa37d35811a37397104f02074c8a44e7dbec1.zip cpython-d46aa37d35811a37397104f02074c8a44e7dbec1.tar.gz cpython-d46aa37d35811a37397104f02074c8a44e7dbec1.tar.bz2 |
Handle HTTP/0.9 responses.
Section 19.6 of RFC 2616 (HTTP/1.1):
It is beyond the scope of a protocol specification to mandate
compliance with previous versions. HTTP/1.1 was deliberately
designed, however, to make supporting previous versions easy....
And we would expect HTTP/1.1 clients to:
- recognize the format of the Status-Line for HTTP/1.0 and 1.1
responses;
- understand any valid response in the format of HTTP/0.9, 1.0, or
1.1.
The changes to the code do handle response in the format of HTTP/0.9.
Some users may consider this a bug because all responses with a
sufficiently corrupted status line will look like an HTTP/0.9
response. These users can pass strict=1 to the HTTP constructors to
get a BadStatusLine exception instead.
While this is a new feature of sorts, it enhances the robustness of
the code (be tolerant in what you accept). Thus, I consider it a bug
fix candidate.
XXX strict needs to be documented.
Diffstat (limited to 'Lib/httplib.py')
-rw-r--r-- | Lib/httplib.py | 151 |
1 files changed, 132 insertions, 19 deletions
diff --git a/Lib/httplib.py b/Lib/httplib.py index 722a8c6..b3b9915 100644 --- a/Lib/httplib.py +++ b/Lib/httplib.py @@ -95,9 +95,19 @@ _CS_REQ_SENT = 'Request-sent' class HTTPResponse: - def __init__(self, sock, debuglevel=0): + + # strict: If true, raise BadStatusLine if the status line can't be + # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is + # false because it prvents clients from talking to HTTP/0.9 + # servers. Note that a response with a sufficiently corrupted + # status line will look like an HTTP/0.9 response. + + # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. + + def __init__(self, sock, debuglevel=0, strict=0): self.fp = sock.makefile('rb', 0) self.debuglevel = debuglevel + self.strict = strict self.msg = None @@ -112,6 +122,7 @@ class HTTPResponse: self.will_close = _UNKNOWN # conn will close at end of response def _read_status(self): + # Initialize with Simple-Response defaults line = self.fp.readline() if self.debuglevel > 0: print "reply:", repr(line) @@ -122,12 +133,17 @@ class HTTPResponse: [version, status] = line.split(None, 1) reason = "" except ValueError: - version = "HTTP/0.9" - status = "200" - reason = "" - if version[:5] != 'HTTP/': - self.close() - raise BadStatusLine(line) + # empty version will cause next test to fail and status + # will be treated as 0.9 response. + version = "" + if not version.startswith('HTTP/'): + if self.strict: + self.close() + raise BadStatusLine(line) + else: + # assume it's a Simple-Response from an 0.9 server + self.fp = LineAndFileWrapper(line, self.fp) + return "HTTP/0.9", 200, "" # The status code is a three-digit number try: @@ -169,6 +185,7 @@ class HTTPResponse: if self.version == 9: self.chunked = 0 + self.will_close = 1 self.msg = mimetools.Message(StringIO()) return @@ -353,13 +370,16 @@ class HTTPConnection: default_port = HTTP_PORT auto_open = 1 debuglevel = 0 + strict = 0 - def __init__(self, host, port=None): + def __init__(self, host, port=None, strict=None): self.sock = None self.__response = None self.__state = _CS_IDLE - + self._set_hostport(host, port) + if strict is not None: + self.strict = strict def _set_hostport(self, host, port): if port is None: @@ -610,9 +630,10 @@ class HTTPConnection: raise ResponseNotReady() if self.debuglevel > 0: - response = self.response_class(self.sock, self.debuglevel) + response = self.response_class(self.sock, self.debuglevel, + strict=self.strict) else: - response = self.response_class(self.sock) + response = self.response_class(self.sock, strict=self.strict) response._begin() assert response.will_close != _UNKNOWN @@ -733,8 +754,9 @@ class HTTPSConnection(HTTPConnection): default_port = HTTPS_PORT - def __init__(self, host, port=None, key_file=None, cert_file=None): - HTTPConnection.__init__(self, host, port) + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None): + HTTPConnection.__init__(self, host, port, strict) self.key_file = key_file self.cert_file = cert_file @@ -760,7 +782,7 @@ class HTTP: _connection_class = HTTPConnection - def __init__(self, host='', port=None): + def __init__(self, host='', port=None, strict=None): "Provide a default host, since the superclass requires one." # some joker passed 0 explicitly, meaning default port @@ -770,7 +792,7 @@ class HTTP: # Note that we may pass an empty string as the host; this will throw # an error when we attempt to connect. Presumably, the client code # will call connect before then, with a proper host. - self._setup(self._connection_class(host, port)) + self._setup(self._connection_class(host, port, strict)) def _setup(self, conn): self._conn = conn @@ -850,18 +872,20 @@ if hasattr(socket, 'ssl'): _connection_class = HTTPSConnection - def __init__(self, host='', port=None, **x509): + def __init__(self, host='', port=None, key_file=None, cert_file=None, + strict=None): # provide a default host, pass the X509 cert info # urf. compensate for bad input. if port == 0: port = None - self._setup(self._connection_class(host, port, **x509)) + self._setup(self._connection_class(host, port, key_file, + cert_file, strict)) # we never actually use these for anything, but we keep them # here for compatibility with post-1.5.2 CVS. - self.key_file = x509.get('key_file') - self.cert_file = x509.get('cert_file') + self.key_file = key_file + self.cert_file = cert_file class HTTPException(Exception): @@ -906,6 +930,65 @@ class BadStatusLine(HTTPException): # for backwards compatibility error = HTTPException +class LineAndFileWrapper: + """A limited file-like object for HTTP/0.9 responses.""" + + # The status-line parsing code calls readline(), which normally + # get the HTTP status line. For a 0.9 response, however, this is + # actually the first line of the body! Clients need to get a + # readable file object that contains that line. + + def __init__(self, line, file): + self._line = line + self._file = file + self._line_consumed = 0 + self._line_offset = 0 + self._line_left = len(line) + + def __getattr__(self, attr): + return getattr(self._file, attr) + + def _done(self): + # called when the last byte is read from the line. After the + # call, all read methods are delegated to the underlying file + # obhect. + self._line_consumed = 1 + self.read = self._file.read + self.readline = self._file.readline + self.readlines = self._file.readlines + + def read(self, amt=None): + assert not self._line_consumed and self._line_left + if amt is None or amt > self._line_left: + s = self._line[self._line_offset:] + self._done() + if amt is None: + return s + self._file.read() + else: + return s + self._file.read(amt - len(s)) + else: + assert amt <= self._line_left + i = self._line_offset + j = i + amt + s = self._line[i:j] + self._line_offset = j + self._line_left -= amt + if self._line_left == 0: + self._done() + return s + + def readline(self): + s = self._line[self._line_offset:] + self._done() + return s + + def readlines(self, size=None): + L = [self._line[self._line_offset:]] + self._done() + if size is None: + return L + self._file.readlines() + else: + return L + self._file.readlines(size) # # snarfed from httplib.py for now... @@ -971,5 +1054,35 @@ def test(): print "read", len(hs.getfile().read()) + # Test a buggy server -- returns garbled status line. + # http://www.yahoo.com/promotions/mom_com97/supermom.html + c = HTTPConnection("promotions.yahoo.com") + c.set_debuglevel(1) + c.connect() + c.request("GET", "/promotions/mom_com97/supermom.html") + r = c.getresponse() + print r.status, r.version + lines = r.read().split("\n") + print "\n".join(lines[:5]) + + c = HTTPConnection("promotions.yahoo.com", strict=1) + c.set_debuglevel(1) + c.connect() + c.request("GET", "/promotions/mom_com97/supermom.html") + try: + r = c.getresponse() + except BadStatusLine, err: + print "strict mode failed as expected" + else: + print "XXX strict mode should have failed" + + for strict in 0, 1: + h = HTTP(strict=strict) + h.connect("promotions.yahoo.com") + h.putrequest('GET', "/promotions/mom_com97/supermom.html") + h.endheaders() + status, reason, headers = h.getreply() + assert (strict and status == -1) or status == 200, (strict, status) + if __name__ == '__main__': test() |