summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/http.client.rst98
-rw-r--r--Doc/library/urllib.request.rst60
-rw-r--r--Doc/whatsnew/3.6.rst19
-rw-r--r--Lib/http/client.py199
-rw-r--r--Lib/test/test_httplib.py151
-rw-r--r--Lib/test/test_urllib2.py103
-rw-r--r--Lib/urllib/request.py42
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS8
9 files changed, 531 insertions, 150 deletions
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index a9ca4b0..9429fb6 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -219,39 +219,62 @@ HTTPConnection Objects
:class:`HTTPConnection` instances have the following methods:
-.. method:: HTTPConnection.request(method, url, body=None, headers={})
+.. method:: HTTPConnection.request(method, url, body=None, headers={}, *, \
+ encode_chunked=False)
This will send a request to the server using the HTTP request
method *method* and the selector *url*.
If *body* is specified, the specified data is sent after the headers are
- finished. It may be a string, a :term:`bytes-like object`, an open
- :term:`file object`, or an iterable of :term:`bytes-like object`\s. If
- *body* is a string, it is encoded as ISO-8859-1, the default for HTTP. If
- it is a bytes-like object the bytes are sent as is. If it is a :term:`file
- object`, the contents of the file is sent; this file object should support
- at least the ``read()`` method. If the file object has a ``mode``
- attribute, the data returned by the ``read()`` method will be encoded as
- ISO-8859-1 unless the ``mode`` attribute contains the substring ``b``,
- otherwise the data returned by ``read()`` is sent as is. If *body* is an
- iterable, the elements of the iterable are sent as is until the iterable is
- exhausted.
-
- The *headers* argument should be a mapping of extra HTTP
- headers to send with the request.
-
- If *headers* does not contain a Content-Length item, one is added
- automatically if possible. If *body* is ``None``, the Content-Length header
- is set to ``0`` for methods that expect a body (``PUT``, ``POST``, and
- ``PATCH``). If *body* is a string or bytes object, the Content-Length
- header is set to its length. If *body* is a :term:`file object` and it
- works to call :func:`~os.fstat` on the result of its ``fileno()`` method,
- then the Content-Length header is set to the ``st_size`` reported by the
- ``fstat`` call. Otherwise no Content-Length header is added.
+ finished. It may be a :class:`str`, a :term:`bytes-like object`, an
+ open :term:`file object`, or an iterable of :class:`bytes`. If *body*
+ is a string, it is encoded as ISO-8859-1, the default for HTTP. If it
+ is a bytes-like object, the bytes are sent as is. If it is a :term:`file
+ object`, the contents of the file is sent; this file object should
+ support at least the ``read()`` method. If the file object is an
+ instance of :class:`io.TextIOBase`, the data returned by the ``read()``
+ method will be encoded as ISO-8859-1, otherwise the data returned by
+ ``read()`` is sent as is. If *body* is an iterable, the elements of the
+ iterable are sent as is until the iterable is exhausted.
+
+ The *headers* argument should be a mapping of extra HTTP headers to send
+ with the request.
+
+ If *headers* contains neither Content-Length nor Transfer-Encoding, a
+ Content-Length header will be added automatically if possible. If
+ *body* is ``None``, the Content-Length header is set to ``0`` for
+ methods that expect a body (``PUT``, ``POST``, and ``PATCH``). If
+ *body* is a string or bytes-like object, the Content-Length header is
+ set to its length. If *body* is a binary :term:`file object`
+ supporting :meth:`~io.IOBase.seek`, this will be used to determine
+ its size. Otherwise, the Content-Length header is not added
+ automatically. In cases where determining the Content-Length up
+ front is not possible, the body will be chunk-encoded and the
+ Transfer-Encoding header will automatically be set.
+
+ The *encode_chunked* argument is only relevant if Transfer-Encoding is
+ specified in *headers*. If *encode_chunked* is ``False``, the
+ HTTPConnection object assumes that all encoding is handled by the
+ calling code. If it is ``True``, the body will be chunk-encoded.
+
+ .. note::
+ Chunked transfer encoding has been added to the HTTP protocol
+ version 1.1. Unless the HTTP server is known to handle HTTP 1.1,
+ the caller must either specify the Content-Length or must use a
+ body representation whose length can be determined automatically.
.. versionadded:: 3.2
*body* can now be an iterable.
+ .. versionchanged:: 3.6
+ If neither Content-Length nor Transfer-Encoding are set in
+ *headers* and Content-Length cannot be determined, *body* will now
+ be automatically chunk-encoded. The *encode_chunked* argument
+ was added.
+ The Content-Length for binary file objects is determined with seek.
+ No attempt is made to determine the Content-Length for text file
+ objects.
+
.. method:: HTTPConnection.getresponse()
Should be called after a request is sent to get the response from the server.
@@ -336,13 +359,32 @@ also send your request step by step, by using the four functions below.
an argument.
-.. method:: HTTPConnection.endheaders(message_body=None)
+.. method:: HTTPConnection.endheaders(message_body=None, *, encode_chunked=False)
Send a blank line to the server, signalling the end of the headers. The
optional *message_body* argument can be used to pass a message body
- associated with the request. The message body will be sent in the same
- packet as the message headers if it is string, otherwise it is sent in a
- separate packet.
+ associated with the request.
+
+ If *encode_chunked* is ``True``, the result of each iteration of
+ *message_body* will be chunk-encoded as specified in :rfc:`7230`,
+ Section 3.3.1. How the data is encoded is dependent on the type of
+ *message_body*. If *message_body* implements the :ref:`buffer interface
+ <bufferobjects>` the encoding will result in a single chunk.
+ If *message_body* is a :class:`collections.Iterable`, each iteration
+ of *message_body* will result in a chunk. If *message_body* is a
+ :term:`file object`, each call to ``.read()`` will result in a chunk.
+ The method automatically signals the end of the chunk-encoded data
+ immediately after *message_body*.
+
+ .. note:: Due to the chunked encoding specification, empty chunks
+ yielded by an iterator body will be ignored by the chunk-encoder.
+ This is to avoid premature termination of the read of the request by
+ the target server due to malformed encoding.
+
+ .. versionadded:: 3.6
+ Chunked encoding support. The *encode_chunked* parameter was
+ added.
+
.. method:: HTTPConnection.send(data)
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 1291aeb..e619cc1 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -30,18 +30,9 @@ The :mod:`urllib.request` module defines the following functions:
Open the URL *url*, which can be either a string or a
:class:`Request` object.
- *data* must be a bytes object specifying additional data to be sent to the
- server, or ``None`` if no such data is needed. *data* may also be an
- iterable object and in that case Content-Length value must be specified in
- the headers. Currently HTTP requests are the only ones that use *data*; the
- HTTP request will be a POST instead of a GET when the *data* parameter is
- provided.
-
- *data* should be a buffer in the standard
- :mimetype:`application/x-www-form-urlencoded` format. The
- :func:`urllib.parse.urlencode` function takes a mapping or sequence of
- 2-tuples and returns an ASCII text string in this format. It should
- be encoded to bytes before being used as the *data* parameter.
+ *data* must be an object specifying additional data to be sent to the
+ server, or ``None`` if no such data is needed. See :class:`Request`
+ for details.
urllib.request module uses HTTP/1.1 and includes ``Connection:close`` header
in its HTTP requests.
@@ -192,14 +183,22 @@ The following classes are provided:
*url* should be a string containing a valid URL.
- *data* must be a bytes object specifying additional data to send to the
- server, or ``None`` if no such data is needed. Currently HTTP requests are
- the only ones that use *data*; the HTTP request will be a POST instead of a
- GET when the *data* parameter is provided. *data* should be a buffer in the
- standard :mimetype:`application/x-www-form-urlencoded` format.
- The :func:`urllib.parse.urlencode` function takes a mapping or sequence of
- 2-tuples and returns an ASCII string in this format. It should be
- encoded to bytes before being used as the *data* parameter.
+ *data* must be an object specifying additional data to send to the
+ server, or ``None`` if no such data is needed. Currently HTTP
+ requests are the only ones that use *data*. The supported object
+ types include bytes, file-like objects, and iterables. If no
+ ``Content-Length`` header has been provided, :class:`HTTPHandler` will
+ try to determine the length of *data* and set this header accordingly.
+ If this fails, ``Transfer-Encoding: chunked`` as specified in
+ :rfc:`7230`, Section 3.3.1 will be used to send the data. See
+ :meth:`http.client.HTTPConnection.request` for details on the
+ supported object types and on how the content length is determined.
+
+ For an HTTP POST request method, *data* should be a buffer in the
+ standard :mimetype:`application/x-www-form-urlencoded` format. The
+ :func:`urllib.parse.urlencode` function takes a mapping or sequence
+ of 2-tuples and returns an ASCII string in this format. It should
+ be encoded to bytes before being used as the *data* parameter.
*headers* should be a dictionary, and will be treated as if
:meth:`add_header` was called with each key and value as arguments.
@@ -211,8 +210,10 @@ The following classes are provided:
:mod:`urllib`'s default user agent string is
``"Python-urllib/2.6"`` (on Python 2.6).
- An example of using ``Content-Type`` header with *data* argument would be
- sending a dictionary like ``{"Content-Type": "application/x-www-form-urlencoded"}``.
+ An appropriate ``Content-Type`` header should be included if the *data*
+ argument is present. If this header has not been provided and *data*
+ is not None, ``Content-Type: application/x-www-form-urlencoded`` will
+ be added as a default.
The final two arguments are only of interest for correct handling
of third-party HTTP cookies:
@@ -235,15 +236,28 @@ The following classes are provided:
*method* should be a string that indicates the HTTP request method that
will be used (e.g. ``'HEAD'``). If provided, its value is stored in the
:attr:`~Request.method` attribute and is used by :meth:`get_method()`.
- Subclasses may indicate a default method by setting the
+ The default is ``'GET'`` if *data* is ``None`` or ``'POST'`` otherwise.
+ Subclasses may indicate a different default method by setting the
:attr:`~Request.method` attribute in the class itself.
+ .. note::
+ The request will not work as expected if the data object is unable
+ to deliver its content more than once (e.g. a file or an iterable
+ that can produce the content only once) and the request is retried
+ for HTTP redirects or authentication. The *data* is sent to the
+ HTTP server right away after the headers. There is no support for
+ a 100-continue expectation in the library.
+
.. versionchanged:: 3.3
:attr:`Request.method` argument is added to the Request class.
.. versionchanged:: 3.4
Default :attr:`Request.method` may be indicated at the class level.
+ .. versionchanged:: 3.6
+ Do not raise an error if the ``Content-Length`` has not been
+ provided and could not be determined. Fall back to use chunked
+ transfer encoding instead.
.. class:: OpenerDirector()
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
index 8b85b22..6d5bbc0 100644
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -324,6 +324,15 @@ exceptions: see :func:`faulthandler.enable`. (Contributed by Victor Stinner in
:issue:`23848`.)
+http.client
+-----------
+
+:meth:`HTTPConnection.request() <http.client.HTTPConnection.request>` and
+:meth:`~http.client.HTTPConnection.endheaders` both now support
+chunked encoding request bodies.
+(Contributed by Demian Brecht and Rolf Krahl in :issue:`12319`.)
+
+
idlelib and IDLE
----------------
@@ -500,6 +509,16 @@ The :class:`~unittest.mock.Mock` class has the following improvements:
(Contributed by Amit Saha in :issue:`26323`.)
+urllib.request
+--------------
+
+If a HTTP request has a non-empty body but no Content-Length header
+and the content length cannot be determined up front, rather than
+throwing an error, :class:`~urllib.request.AbstractHTTPHandler` now
+falls back to use chunked transfer encoding.
+(Contributed by Demian Brecht and Rolf Krahl in :issue:`12319`.)
+
+
urllib.robotparser
------------------
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 763e1ef..b242ba6 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -795,6 +795,58 @@ class HTTPConnection:
auto_open = 1
debuglevel = 0
+ @staticmethod
+ def _is_textIO(stream):
+ """Test whether a file-like object is a text or a binary stream.
+ """
+ return isinstance(stream, io.TextIOBase)
+
+ @staticmethod
+ def _get_content_length(body, method):
+ """Get the content-length based on the body.
+
+ If the body is "empty", we set Content-Length: 0 for methods
+ that expect a body (RFC 7230, Section 3.3.2). If the body is
+ set for other methods, we set the header provided we can
+ figure out what the length is.
+ """
+ if not body:
+ # do an explicit check for not None here to distinguish
+ # between unset and set but empty
+ if method.upper() in _METHODS_EXPECTING_BODY or body is not None:
+ return 0
+ else:
+ return None
+
+ if hasattr(body, 'read'):
+ # file-like object.
+ if HTTPConnection._is_textIO(body):
+ # text streams are unpredictable because it depends on
+ # character encoding and line ending translation.
+ return None
+ else:
+ # Is it seekable?
+ try:
+ curpos = body.tell()
+ sz = body.seek(0, io.SEEK_END)
+ except (TypeError, AttributeError, OSError):
+ return None
+ else:
+ body.seek(curpos)
+ return sz - curpos
+
+ try:
+ # does it implement the buffer protocol (bytes, bytearray, array)?
+ mv = memoryview(body)
+ return mv.nbytes
+ except TypeError:
+ pass
+
+ if isinstance(body, str):
+ return len(body)
+
+ return None
+
def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
self.timeout = timeout
@@ -933,18 +985,9 @@ class HTTPConnection:
if hasattr(data, "read") :
if self.debuglevel > 0:
print("sendIng a read()able")
- encode = False
- try:
- mode = data.mode
- except AttributeError:
- # io.BytesIO and other file-like objects don't have a `mode`
- # attribute.
- pass
- else:
- if "b" not in mode:
- encode = True
- if self.debuglevel > 0:
- print("encoding file using iso-8859-1")
+ encode = self._is_textIO(data)
+ if encode and self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
while 1:
datablock = data.read(blocksize)
if not datablock:
@@ -970,7 +1013,22 @@ class HTTPConnection:
"""
self._buffer.append(s)
- def _send_output(self, message_body=None):
+ def _read_readable(self, readable):
+ blocksize = 8192
+ if self.debuglevel > 0:
+ print("sendIng a read()able")
+ encode = self._is_textIO(readable)
+ if encode and self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
+ while True:
+ datablock = readable.read(blocksize)
+ if not datablock:
+ break
+ if encode:
+ datablock = datablock.encode("iso-8859-1")
+ yield datablock
+
+ def _send_output(self, message_body=None, encode_chunked=False):
"""Send the currently buffered request and clear the buffer.
Appends an extra \\r\\n to the buffer.
@@ -979,10 +1037,50 @@ class HTTPConnection:
self._buffer.extend((b"", b""))
msg = b"\r\n".join(self._buffer)
del self._buffer[:]
-
self.send(msg)
+
if message_body is not None:
- self.send(message_body)
+
+ # create a consistent interface to message_body
+ if hasattr(message_body, 'read'):
+ # Let file-like take precedence over byte-like. This
+ # is needed to allow the current position of mmap'ed
+ # files to be taken into account.
+ chunks = self._read_readable(message_body)
+ else:
+ try:
+ # this is solely to check to see if message_body
+ # implements the buffer API. it /would/ be easier
+ # to capture if PyObject_CheckBuffer was exposed
+ # to Python.
+ memoryview(message_body)
+ except TypeError:
+ try:
+ chunks = iter(message_body)
+ except TypeError:
+ raise TypeError("message_body should be a bytes-like "
+ "object or an iterable, got %r"
+ % type(message_body))
+ else:
+ # the object implements the buffer interface and
+ # can be passed directly into socket methods
+ chunks = (message_body,)
+
+ for chunk in chunks:
+ if not chunk:
+ if self.debuglevel > 0:
+ print('Zero length chunk ignored')
+ continue
+
+ if encode_chunked and self._http_vsn == 11:
+ # chunked encoding
+ chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
+ + b'\r\n'
+ self.send(chunk)
+
+ if encode_chunked and self._http_vsn == 11:
+ # end chunked transfer
+ self.send(b'0\r\n\r\n')
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
"""Send a request to the server.
@@ -1135,52 +1233,27 @@ class HTTPConnection:
header = header + b': ' + value
self._output(header)
- def endheaders(self, message_body=None):
+ def endheaders(self, message_body=None, *, encode_chunked=False):
"""Indicate that the last header line has been sent to the server.
This method sends the request to the server. The optional message_body
argument can be used to pass a message body associated with the
- request. The message body will be sent in the same packet as the
- message headers if it is a string, otherwise it is sent as a separate
- packet.
+ request.
"""
if self.__state == _CS_REQ_STARTED:
self.__state = _CS_REQ_SENT
else:
raise CannotSendHeader()
- self._send_output(message_body)
+ self._send_output(message_body, encode_chunked=encode_chunked)
- def request(self, method, url, body=None, headers={}):
+ def request(self, method, url, body=None, headers={}, *,
+ encode_chunked=False):
"""Send a complete request to the server."""
- self._send_request(method, url, body, headers)
-
- def _set_content_length(self, body, method):
- # Set the content-length based on the body. If the body is "empty", we
- # set Content-Length: 0 for methods that expect a body (RFC 7230,
- # Section 3.3.2). If the body is set for other methods, we set the
- # header provided we can figure out what the length is.
- thelen = None
- method_expects_body = method.upper() in _METHODS_EXPECTING_BODY
- if body is None and method_expects_body:
- thelen = '0'
- elif body is not None:
- try:
- thelen = str(len(body))
- except TypeError:
- # If this is a file-like object, try to
- # fstat its file descriptor
- try:
- thelen = str(os.fstat(body.fileno()).st_size)
- except (AttributeError, OSError):
- # Don't send a length if this failed
- if self.debuglevel > 0: print("Cannot stat!!")
+ self._send_request(method, url, body, headers, encode_chunked)
- if thelen is not None:
- self.putheader('Content-Length', thelen)
-
- def _send_request(self, method, url, body, headers):
+ def _send_request(self, method, url, body, headers, encode_chunked):
# Honor explicitly requested Host: and Accept-Encoding: headers.
- header_names = dict.fromkeys([k.lower() for k in headers])
+ header_names = frozenset(k.lower() for k in headers)
skips = {}
if 'host' in header_names:
skips['skip_host'] = 1
@@ -1189,15 +1262,41 @@ class HTTPConnection:
self.putrequest(method, url, **skips)
+ # chunked encoding will happen if HTTP/1.1 is used and either
+ # the caller passes encode_chunked=True or the following
+ # conditions hold:
+ # 1. content-length has not been explicitly set
+ # 2. the length of the body cannot be determined
+ # (e.g. it is a generator or unseekable file)
+ # 3. Transfer-Encoding has NOT been explicitly set by the caller
+
if 'content-length' not in header_names:
- self._set_content_length(body, method)
+ # only chunk body if not explicitly set for backwards
+ # compatibility, assuming the client code is already handling the
+ # chunking
+ if 'transfer-encoding' not in header_names:
+ # if content-length cannot be automatically determined, fall
+ # back to chunked encoding
+ encode_chunked = False
+ content_length = self._get_content_length(body, method)
+ if content_length is None:
+ if body:
+ if self.debuglevel > 0:
+ print('Unable to determine size of %r' % body)
+ encode_chunked = True
+ self.putheader('Transfer-Encoding', 'chunked')
+ else:
+ self.putheader('Content-Length', str(content_length))
+ else:
+ encode_chunked = False
+
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = _encode(body, 'body')
- self.endheaders(body)
+ self.endheaders(body, encode_chunked=encode_chunked)
def getresponse(self):
"""Get the response from the server.
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 1768a34..a179612 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -314,6 +314,124 @@ class HeaderTests(TestCase):
conn.putheader(name, value)
+class TransferEncodingTest(TestCase):
+ expected_body = b"It's just a flesh wound"
+
+ def test_endheaders_chunked(self):
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.putrequest('POST', '/')
+ conn.endheaders(self._make_body(), encode_chunked=True)
+
+ _, _, body = self._parse_request(conn.sock.data)
+ body = self._parse_chunked(body)
+ self.assertEqual(body, self.expected_body)
+
+ def test_explicit_headers(self):
+ # explicit chunked
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ # this shouldn't actually be automatically chunk-encoded because the
+ # calling code has explicitly stated that it's taking care of it
+ conn.request(
+ 'POST', '/', self._make_body(), {'Transfer-Encoding': 'chunked'})
+
+ _, headers, body = self._parse_request(conn.sock.data)
+ self.assertNotIn('content-length', [k.lower() for k in headers.keys()])
+ self.assertEqual(headers['Transfer-Encoding'], 'chunked')
+ self.assertEqual(body, self.expected_body)
+
+ # explicit chunked, string body
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.request(
+ 'POST', '/', self.expected_body.decode('latin-1'),
+ {'Transfer-Encoding': 'chunked'})
+
+ _, headers, body = self._parse_request(conn.sock.data)
+ self.assertNotIn('content-length', [k.lower() for k in headers.keys()])
+ self.assertEqual(headers['Transfer-Encoding'], 'chunked')
+ self.assertEqual(body, self.expected_body)
+
+ # User-specified TE, but request() does the chunk encoding
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.request('POST', '/',
+ headers={'Transfer-Encoding': 'gzip, chunked'},
+ encode_chunked=True,
+ body=self._make_body())
+ _, headers, body = self._parse_request(conn.sock.data)
+ self.assertNotIn('content-length', [k.lower() for k in headers])
+ self.assertEqual(headers['Transfer-Encoding'], 'gzip, chunked')
+ self.assertEqual(self._parse_chunked(body), self.expected_body)
+
+ def test_request(self):
+ for empty_lines in (False, True,):
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.request(
+ 'POST', '/', self._make_body(empty_lines=empty_lines))
+
+ _, headers, body = self._parse_request(conn.sock.data)
+ body = self._parse_chunked(body)
+ self.assertEqual(body, self.expected_body)
+ self.assertEqual(headers['Transfer-Encoding'], 'chunked')
+
+ # Content-Length and Transfer-Encoding SHOULD not be sent in the
+ # same request
+ self.assertNotIn('content-length', [k.lower() for k in headers])
+
+ def _make_body(self, empty_lines=False):
+ lines = self.expected_body.split(b' ')
+ for idx, line in enumerate(lines):
+ # for testing handling empty lines
+ if empty_lines and idx % 2:
+ yield b''
+ if idx < len(lines) - 1:
+ yield line + b' '
+ else:
+ yield line
+
+ def _parse_request(self, data):
+ lines = data.split(b'\r\n')
+ request = lines[0]
+ headers = {}
+ n = 1
+ while n < len(lines) and len(lines[n]) > 0:
+ key, val = lines[n].split(b':')
+ key = key.decode('latin-1').strip()
+ headers[key] = val.decode('latin-1').strip()
+ n += 1
+
+ return request, headers, b'\r\n'.join(lines[n + 1:])
+
+ def _parse_chunked(self, data):
+ body = []
+ trailers = {}
+ n = 0
+ lines = data.split(b'\r\n')
+ # parse body
+ while True:
+ size, chunk = lines[n:n+2]
+ size = int(size, 16)
+
+ if size == 0:
+ n += 1
+ break
+
+ self.assertEqual(size, len(chunk))
+ body.append(chunk)
+
+ n += 2
+ # we /should/ hit the end chunk, but check against the size of
+ # lines so we're not stuck in an infinite loop should we get
+ # malformed data
+ if n > len(lines):
+ break
+
+ return b''.join(body)
+
+
class BasicTest(TestCase):
def test_status_lines(self):
# Test HTTP status lines
@@ -564,11 +682,11 @@ class BasicTest(TestCase):
yield None
yield 'data_two'
- class UpdatingFile():
+ class UpdatingFile(io.TextIOBase):
mode = 'r'
d = data()
def read(self, blocksize=-1):
- return self.d.__next__()
+ return next(self.d)
expected = b'data'
@@ -1546,6 +1664,26 @@ class RequestBodyTest(TestCase):
message = client.parse_headers(f)
return message, f
+ def test_list_body(self):
+ # Note that no content-length is automatically calculated for
+ # an iterable. The request will fall back to send chunked
+ # transfer encoding.
+ cases = (
+ ([b'foo', b'bar'], b'3\r\nfoo\r\n3\r\nbar\r\n0\r\n\r\n'),
+ ((b'foo', b'bar'), b'3\r\nfoo\r\n3\r\nbar\r\n0\r\n\r\n'),
+ )
+ for body, expected in cases:
+ with self.subTest(body):
+ self.conn = client.HTTPConnection('example.com')
+ self.conn.sock = self.sock = FakeSocket('')
+
+ self.conn.request('PUT', '/url', body)
+ msg, f = self.get_headers_and_fp()
+ self.assertNotIn('Content-Type', msg)
+ self.assertNotIn('Content-Length', msg)
+ self.assertEqual(msg.get('Transfer-Encoding'), 'chunked')
+ self.assertEqual(expected, f.read())
+
def test_manual_content_length(self):
# Set an incorrect content-length so that we can verify that
# it will not be over-ridden by the library.
@@ -1588,8 +1726,13 @@ class RequestBodyTest(TestCase):
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertIsNone(message.get_charset())
- self.assertEqual("4", message.get("content-length"))
- self.assertEqual(b'body', f.read())
+ # Note that the length of text files is unpredictable
+ # because it depends on character encoding and line ending
+ # translation. No content-length will be set, the body
+ # will be sent using chunked transfer encoding.
+ self.assertIsNone(message.get("content-length"))
+ self.assertEqual("chunked", message.get("transfer-encoding"))
+ self.assertEqual(b'4\r\nbody\r\n0\r\n\r\n', f.read())
def test_binary_file_body(self):
self.addCleanup(support.unlink, support.TESTFN)
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index eda7ccc..0eea0c7 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -7,6 +7,8 @@ import io
import socket
import array
import sys
+import tempfile
+import subprocess
import urllib.request
# The proxy bypass method imported below has logic specific to the OSX
@@ -335,7 +337,8 @@ class MockHTTPClass:
else:
self._tunnel_headers.clear()
- def request(self, method, url, body=None, headers=None):
+ def request(self, method, url, body=None, headers=None, *,
+ encode_chunked=False):
self.method = method
self.selector = url
if headers is not None:
@@ -343,6 +346,7 @@ class MockHTTPClass:
self.req_headers.sort()
if body:
self.data = body
+ self.encode_chunked = encode_chunked
if self.raise_on_endheaders:
raise OSError()
@@ -908,41 +912,96 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(req.unredirected_hdrs["Host"], "baz")
self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
- # Check iterable body support
- def iterable_body():
- yield b"one"
- yield b"two"
- yield b"three"
+ def test_http_body_file(self):
+ # A regular file - Content Length is calculated unless already set.
- for headers in {}, {"Content-Length": 11}:
- req = Request("http://example.com/", iterable_body(), headers)
- if not headers:
- # Having an iterable body without a Content-Length should
- # raise an exception
- self.assertRaises(ValueError, h.do_request_, req)
- else:
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ file_obj = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
+ file_path = file_obj.name
+ file_obj.write(b"Something\nSomething\nSomething\n")
+ file_obj.close()
+
+ for headers in {}, {"Content-Length": 30}:
+ with open(file_path, "rb") as f:
+ req = Request("http://example.com/", f, headers)
newreq = h.do_request_(req)
+ self.assertEqual(int(newreq.get_header('Content-length')), 30)
- # A file object.
- # Test only Content-Length attribute of request.
+ os.unlink(file_path)
+
+ def test_http_body_fileobj(self):
+ # A file object - Content Length is calculated unless already set.
+ # (Note that there are some subtle differences to a regular
+ # file, that is why we are testing both cases.)
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
file_obj = io.BytesIO()
file_obj.write(b"Something\nSomething\nSomething\n")
for headers in {}, {"Content-Length": 30}:
+ file_obj.seek(0)
req = Request("http://example.com/", file_obj, headers)
- if not headers:
- # Having an iterable body without a Content-Length should
- # raise an exception
- self.assertRaises(ValueError, h.do_request_, req)
- else:
- newreq = h.do_request_(req)
- self.assertEqual(int(newreq.get_header('Content-length')), 30)
+ newreq = h.do_request_(req)
+ self.assertEqual(int(newreq.get_header('Content-length')), 30)
file_obj.close()
+ def test_http_body_pipe(self):
+ # A file reading from a pipe.
+ # A pipe cannot be seek'ed. There is no way to determine the
+ # content length up front. Thus, do_request_() should fall
+ # back to Transfer-encoding chunked.
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ cmd = [sys.executable, "-c",
+ r"import sys; "
+ r"sys.stdout.buffer.write(b'Something\nSomething\nSomething\n')"]
+ for headers in {}, {"Content-Length": 30}:
+ with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc:
+ req = Request("http://example.com/", proc.stdout, headers)
+ newreq = h.do_request_(req)
+ if not headers:
+ self.assertEqual(newreq.get_header('Content-length'), None)
+ self.assertEqual(newreq.get_header('Transfer-encoding'),
+ 'chunked')
+ else:
+ self.assertEqual(int(newreq.get_header('Content-length')),
+ 30)
+
+ def test_http_body_iterable(self):
+ # Generic iterable. There is no way to determine the content
+ # length up front. Fall back to Transfer-encoding chunked.
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ def iterable_body():
+ yield b"one"
+ yield b"two"
+ yield b"three"
+
+ for headers in {}, {"Content-Length": 11}:
+ req = Request("http://example.com/", iterable_body(), headers)
+ newreq = h.do_request_(req)
+ if not headers:
+ self.assertEqual(newreq.get_header('Content-length'), None)
+ self.assertEqual(newreq.get_header('Transfer-encoding'),
+ 'chunked')
+ else:
+ self.assertEqual(int(newreq.get_header('Content-length')), 11)
+
+ def test_http_body_array(self):
# array.array Iterable - Content Length is calculated
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
iterable_array = array.array("I",[1,2,3,4])
for headers in {}, {"Content-Length": 16}:
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index dc436bc..30bf6e0 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -141,17 +141,9 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
*, cafile=None, capath=None, cadefault=False, context=None):
'''Open the URL url, which can be either a string or a Request object.
- *data* must be a bytes object specifying additional data to be sent to the
- server, or None if no such data is needed. data may also be an iterable
- object and in that case Content-Length value must be specified in the
- headers. Currently HTTP requests are the only ones that use data; the HTTP
- request will be a POST instead of a GET when the data parameter is
- provided.
-
- *data* should be a buffer in the standard application/x-www-form-urlencoded
- format. The urllib.parse.urlencode() function takes a mapping or sequence
- of 2-tuples and returns an ASCII text string in this format. It should be
- encoded to bytes before being used as the data parameter.
+ *data* must be an object specifying additional data to be sent to
+ the server, or None if no such data is needed. See Request for
+ details.
urllib.request module uses HTTP/1.1 and includes a "Connection:close"
header in its HTTP requests.
@@ -1235,6 +1227,11 @@ class AbstractHTTPHandler(BaseHandler):
def set_http_debuglevel(self, level):
self._debuglevel = level
+ def _get_content_length(self, request):
+ return http.client.HTTPConnection._get_content_length(
+ request.data,
+ request.get_method())
+
def do_request_(self, request):
host = request.host
if not host:
@@ -1243,24 +1240,22 @@ class AbstractHTTPHandler(BaseHandler):
if request.data is not None: # POST
data = request.data
if isinstance(data, str):
- msg = "POST data should be bytes or an iterable of bytes. " \
- "It cannot be of type str."
+ msg = "POST data should be bytes, an iterable of bytes, " \
+ "or a file object. It cannot be of type str."
raise TypeError(msg)
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
- if not request.has_header('Content-length'):
- try:
- mv = memoryview(data)
- except TypeError:
- if isinstance(data, collections.Iterable):
- raise ValueError("Content-Length should be specified "
- "for iterable data of type %r %r" % (type(data),
- data))
+ if (not request.has_header('Content-length')
+ and not request.has_header('Transfer-encoding')):
+ content_length = self._get_content_length(request)
+ if content_length is not None:
+ request.add_unredirected_header(
+ 'Content-length', str(content_length))
else:
request.add_unredirected_header(
- 'Content-length', '%d' % (len(mv) * mv.itemsize))
+ 'Transfer-encoding', 'chunked')
sel_host = host
if request.has_proxy():
@@ -1316,7 +1311,8 @@ class AbstractHTTPHandler(BaseHandler):
try:
try:
- h.request(req.get_method(), req.selector, req.data, headers)
+ h.request(req.get_method(), req.selector, req.data, headers,
+ encode_chunked=req.has_header('Transfer-encoding'))
except OSError as err: # timeout error
raise URLError(err)
r = h.getresponse()
diff --git a/Misc/ACKS b/Misc/ACKS
index 9cc1c1e..46f4ae7 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -795,6 +795,7 @@ Daniel Kozan
Jerzy Kozera
Maksim Kozyarchuk
Stefan Krah
+Rolf Krahl
Bob Kras
Sebastian Kreft
Holger Krekel
diff --git a/Misc/NEWS b/Misc/NEWS
index 5d98cdd..e0cd715 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -160,6 +160,14 @@ Library
then further affects other traceback display operations in the module). Patch
by Emanuel Barry.
+- Issue #12319: Chunked transfer encoding support added to
+ http.client.HTTPConnection requests. The
+ urllib.request.AbstractHTTPHandler class does not enforce a Content-Length
+ header any more. If a HTTP request has a non-empty body, but no
+ Content-Length header, and the content length cannot be determined
+ up front, rather than throwing an error, the library now falls back
+ to use chunked transfer encoding.
+
- Issue #27664: Add to concurrent.futures.thread.ThreadPoolExecutor()
the ability to specify a thread name prefix.