From 236654b82df30b2df5edf72cd808f792cdde9d3a Mon Sep 17 00:00:00 2001 From: Jeremy Hylton Date: Fri, 27 Mar 2009 20:24:34 +0000 Subject: Fix some string encoding issues with entity bodies in HTTP requests. RFC 2616 says that iso-8859-1 is the default charset for HTTP entity bodies, but we encoded strings using ascii. See http://bugs.python.org/issue5314. Changed docs and code to use iso-8859-1. Also fix some brokenness with passing a file as the body instead of a string. Add tests to show that some of this behavior actually works. --- Doc/library/http.client.rst | 24 ++++++++++----- Lib/http/client.py | 30 ++++++++++++------- Lib/test/test_httplib.py | 73 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 108 insertions(+), 19 deletions(-) diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index 26d919d..5fcc0f0 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -351,14 +351,22 @@ HTTPConnection Objects .. method:: HTTPConnection.request(method, url[, body[, headers]]) - This will send a request to the server using the HTTP request method *method* - and the selector *url*. If the *body* argument is present, it should be a - string of data to send after the headers are finished. Alternatively, it may - be an open file object, in which case the contents of the file is sent; this - file object should support ``fileno()`` and ``read()`` methods. The header - Content-Length is automatically set to the correct value. The *headers* - argument should be a mapping of extra HTTP headers to send with the request. - + This will send a request to the server using the HTTP request + method *method* and the selector *url*. If the *body* argument is + present, it should be string or bytes object of data to send after + the headers are finished. Strings are encoded as ISO-8859-1, the + default charset for HTTP. To use other encodings, pass a bytes + object. The Content-Length header is set to the length of the + string. + + The *body* may also be an open file object, in which case the + contents of the file is sent; this file object should support + ``fileno()`` and ``read()`` methods. The header Content-Length is + automatically set to the length of the file as reported by + stat. + + The *headers* argument should be a mapping of extra HTTP + headers to send with the request. .. method:: HTTPConnection.getresponse() diff --git a/Lib/http/client.py b/Lib/http/client.py index 5e091b8..0ea15ab 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage): if line in (b'\r\n', b'\n', b''): break hstring = b''.join(headers).decode('iso-8859-1') - return email.parser.Parser(_class=_class).parsestr(hstring) class HTTPResponse(io.RawIOBase): @@ -675,13 +674,22 @@ class HTTPConnection: if self.debuglevel > 0: print("send:", repr(str)) try: - blocksize=8192 - if hasattr(str,'read') : - if self.debuglevel > 0: print("sendIng a read()able") - data=str.read(blocksize) - while data: + blocksize = 8192 + if hasattr(str, "read") : + if self.debuglevel > 0: + print("sendIng a read()able") + encode = False + if "b" not in str.mode: + encode = True + if self.debuglevel > 0: + print("encoding file using iso-8859-1") + while 1: + data = str.read(blocksize) + if not data: + break + if encode: + data = data.encode("iso-8859-1") self.sock.sendall(data) - data=str.read(blocksize) else: self.sock.sendall(str) except socket.error as v: @@ -713,8 +721,8 @@ class HTTPConnection: message_body = None self.send(msg) if message_body is not None: - #message_body was not a string (i.e. it is a file) and - #we must run the risk of Nagle + # message_body was not a string (i.e. it is a file), and + # we must run the risk of Nagle. self.send(message_body) def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): @@ -904,7 +912,9 @@ class HTTPConnection: for hdr, value in headers.items(): self.putheader(hdr, value) if isinstance(body, str): - body = body.encode('ascii') + # RFC 2616 Section 3.7.1 says that text default has a + # default charset of iso-8859-1. + body = body.encode('iso-8859-1') self.endheaders(body) def getresponse(self): diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index a433474..fd5c123 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -272,9 +272,80 @@ class HTTPSTimeoutTest(TestCase): h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30) self.assertEqual(h.timeout, 30) +class RequestBodyTest(TestCase): + """Test cases where a request includes a message body.""" + + def setUp(self): + self.conn = httplib.HTTPConnection('example.com') + self.sock = FakeSocket("") + self.conn.sock = self.sock + + def get_headers_and_fp(self): + f = io.BytesIO(self.sock.data) + f.readline() # read the request line + message = httplib.parse_headers(f) + return message, f + + def test_manual_content_length(self): + # Set an incorrect content-length so that we can verify that + # it will not be over-ridden by the library. + self.conn.request("PUT", "/url", "body", + {"Content-Length": "42"}) + message, f = self.get_headers_and_fp() + self.assertEqual("42", message.get("content-length")) + self.assertEqual(4, len(f.read())) + + def test_ascii_body(self): + self.conn.request("PUT", "/url", "body") + message, f = self.get_headers_and_fp() + self.assertEqual("text/plain", message.get_content_type()) + self.assertEqual(None, message.get_charset()) + self.assertEqual("4", message.get("content-length")) + self.assertEqual(b'body', f.read()) + + def test_latin1_body(self): + self.conn.request("PUT", "/url", "body\xc1") + message, f = self.get_headers_and_fp() + self.assertEqual("text/plain", message.get_content_type()) + self.assertEqual(None, message.get_charset()) + self.assertEqual("5", message.get("content-length")) + self.assertEqual(b'body\xc1', f.read()) + + def test_bytes_body(self): + self.conn.request("PUT", "/url", b"body\xc1") + message, f = self.get_headers_and_fp() + self.assertEqual("text/plain", message.get_content_type()) + self.assertEqual(None, message.get_charset()) + self.assertEqual("5", message.get("content-length")) + self.assertEqual(b'body\xc1', f.read()) + + def test_file_body(self): + f = open(support.TESTFN, "w") + f.write("body") + f.close() + f = open(support.TESTFN) + self.conn.request("PUT", "/url", f) + message, f = self.get_headers_and_fp() + self.assertEqual("text/plain", message.get_content_type()) + self.assertEqual(None, message.get_charset()) + self.assertEqual("4", message.get("content-length")) + self.assertEqual(b'body', f.read()) + + def test_binary_file_body(self): + f = open(support.TESTFN, "wb") + f.write(b"body\xc1") + f.close() + f = open(support.TESTFN, "rb") + self.conn.request("PUT", "/url", f) + message, f = self.get_headers_and_fp() + self.assertEqual("text/plain", message.get_content_type()) + self.assertEqual(None, message.get_charset()) + self.assertEqual("5", message.get("content-length")) + self.assertEqual(b'body\xc1', f.read()) + def test_main(verbose=None): support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest, - HTTPSTimeoutTest) + HTTPSTimeoutTest, RequestBodyTest) if __name__ == '__main__': test_main() -- cgit v0.12