summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Hylton <jeremy@alum.mit.edu>2009-03-27 20:24:34 (GMT)
committerJeremy Hylton <jeremy@alum.mit.edu>2009-03-27 20:24:34 (GMT)
commit236654b82df30b2df5edf72cd808f792cdde9d3a (patch)
treebf699a151050ff282f72d178273daf8a5b3fa1f4
parent98eb6c283881168adbedf37bb34300c39954397a (diff)
downloadcpython-236654b82df30b2df5edf72cd808f792cdde9d3a.zip
cpython-236654b82df30b2df5edf72cd808f792cdde9d3a.tar.gz
cpython-236654b82df30b2df5edf72cd808f792cdde9d3a.tar.bz2
Fix some string encoding issues with entity bodies in HTTP requests.
RFC 2616 says that iso-8859-1 is the default charset for HTTP entity bodies, but we encoded strings using ascii. See http://bugs.python.org/issue5314. Changed docs and code to use iso-8859-1. Also fix some brokenness with passing a file as the body instead of a string. Add tests to show that some of this behavior actually works.
-rw-r--r--Doc/library/http.client.rst24
-rw-r--r--Lib/http/client.py30
-rw-r--r--Lib/test/test_httplib.py73
3 files changed, 108 insertions, 19 deletions
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index 26d919d..5fcc0f0 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -351,14 +351,22 @@ HTTPConnection Objects
.. method:: HTTPConnection.request(method, url[, body[, headers]])
- This will send a request to the server using the HTTP request method *method*
- and the selector *url*. If the *body* argument is present, it should be a
- string of data to send after the headers are finished. Alternatively, it may
- be an open file object, in which case the contents of the file is sent; this
- file object should support ``fileno()`` and ``read()`` methods. The header
- Content-Length is automatically set to the correct value. The *headers*
- argument should be a mapping of extra HTTP headers to send with the request.
-
+ This will send a request to the server using the HTTP request
+ method *method* and the selector *url*. If the *body* argument is
+ present, it should be string or bytes object of data to send after
+ the headers are finished. Strings are encoded as ISO-8859-1, the
+ default charset for HTTP. To use other encodings, pass a bytes
+ object. The Content-Length header is set to the length of the
+ string.
+
+ The *body* may also be an open file object, in which case the
+ contents of the file is sent; this file object should support
+ ``fileno()`` and ``read()`` methods. The header Content-Length is
+ automatically set to the length of the file as reported by
+ stat.
+
+ The *headers* argument should be a mapping of extra HTTP
+ headers to send with the request.
.. method:: HTTPConnection.getresponse()
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 5e091b8..0ea15ab 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage):
if line in (b'\r\n', b'\n', b''):
break
hstring = b''.join(headers).decode('iso-8859-1')
-
return email.parser.Parser(_class=_class).parsestr(hstring)
class HTTPResponse(io.RawIOBase):
@@ -675,13 +674,22 @@ class HTTPConnection:
if self.debuglevel > 0:
print("send:", repr(str))
try:
- blocksize=8192
- if hasattr(str,'read') :
- if self.debuglevel > 0: print("sendIng a read()able")
- data=str.read(blocksize)
- while data:
+ blocksize = 8192
+ if hasattr(str, "read") :
+ if self.debuglevel > 0:
+ print("sendIng a read()able")
+ encode = False
+ if "b" not in str.mode:
+ encode = True
+ if self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
+ while 1:
+ data = str.read(blocksize)
+ if not data:
+ break
+ if encode:
+ data = data.encode("iso-8859-1")
self.sock.sendall(data)
- data=str.read(blocksize)
else:
self.sock.sendall(str)
except socket.error as v:
@@ -713,8 +721,8 @@ class HTTPConnection:
message_body = None
self.send(msg)
if message_body is not None:
- #message_body was not a string (i.e. it is a file) and
- #we must run the risk of Nagle
+ # message_body was not a string (i.e. it is a file), and
+ # we must run the risk of Nagle.
self.send(message_body)
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
@@ -904,7 +912,9 @@ class HTTPConnection:
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
- body = body.encode('ascii')
+ # RFC 2616 Section 3.7.1 says that text default has a
+ # default charset of iso-8859-1.
+ body = body.encode('iso-8859-1')
self.endheaders(body)
def getresponse(self):
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index a433474..fd5c123 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -272,9 +272,80 @@ class HTTPSTimeoutTest(TestCase):
h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
self.assertEqual(h.timeout, 30)
+class RequestBodyTest(TestCase):
+ """Test cases where a request includes a message body."""
+
+ def setUp(self):
+ self.conn = httplib.HTTPConnection('example.com')
+ self.sock = FakeSocket("")
+ self.conn.sock = self.sock
+
+ def get_headers_and_fp(self):
+ f = io.BytesIO(self.sock.data)
+ f.readline() # read the request line
+ message = httplib.parse_headers(f)
+ return message, f
+
+ def test_manual_content_length(self):
+ # Set an incorrect content-length so that we can verify that
+ # it will not be over-ridden by the library.
+ self.conn.request("PUT", "/url", "body",
+ {"Content-Length": "42"})
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("42", message.get("content-length"))
+ self.assertEqual(4, len(f.read()))
+
+ def test_ascii_body(self):
+ self.conn.request("PUT", "/url", "body")
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("4", message.get("content-length"))
+ self.assertEqual(b'body', f.read())
+
+ def test_latin1_body(self):
+ self.conn.request("PUT", "/url", "body\xc1")
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("5", message.get("content-length"))
+ self.assertEqual(b'body\xc1', f.read())
+
+ def test_bytes_body(self):
+ self.conn.request("PUT", "/url", b"body\xc1")
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("5", message.get("content-length"))
+ self.assertEqual(b'body\xc1', f.read())
+
+ def test_file_body(self):
+ f = open(support.TESTFN, "w")
+ f.write("body")
+ f.close()
+ f = open(support.TESTFN)
+ self.conn.request("PUT", "/url", f)
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("4", message.get("content-length"))
+ self.assertEqual(b'body', f.read())
+
+ def test_binary_file_body(self):
+ f = open(support.TESTFN, "wb")
+ f.write(b"body\xc1")
+ f.close()
+ f = open(support.TESTFN, "rb")
+ self.conn.request("PUT", "/url", f)
+ message, f = self.get_headers_and_fp()
+ self.assertEqual("text/plain", message.get_content_type())
+ self.assertEqual(None, message.get_charset())
+ self.assertEqual("5", message.get("content-length"))
+ self.assertEqual(b'body\xc1', f.read())
+
def test_main(verbose=None):
support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
- HTTPSTimeoutTest)
+ HTTPSTimeoutTest, RequestBodyTest)
if __name__ == '__main__':
test_main()