summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/http/client.py199
-rw-r--r--Lib/test/test_httplib.py151
-rw-r--r--Lib/test/test_urllib2.py103
-rw-r--r--Lib/urllib/request.py42
4 files changed, 396 insertions, 99 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 763e1ef..b242ba6 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -795,6 +795,58 @@ class HTTPConnection:
auto_open = 1
debuglevel = 0
+ @staticmethod
+ def _is_textIO(stream):
+ """Test whether a file-like object is a text or a binary stream.
+ """
+ return isinstance(stream, io.TextIOBase)
+
+ @staticmethod
+ def _get_content_length(body, method):
+ """Get the content-length based on the body.
+
+ If the body is "empty", we set Content-Length: 0 for methods
+ that expect a body (RFC 7230, Section 3.3.2). If the body is
+ set for other methods, we set the header provided we can
+ figure out what the length is.
+ """
+ if not body:
+ # do an explicit check for not None here to distinguish
+ # between unset and set but empty
+ if method.upper() in _METHODS_EXPECTING_BODY or body is not None:
+ return 0
+ else:
+ return None
+
+ if hasattr(body, 'read'):
+ # file-like object.
+ if HTTPConnection._is_textIO(body):
+ # text streams are unpredictable because it depends on
+ # character encoding and line ending translation.
+ return None
+ else:
+ # Is it seekable?
+ try:
+ curpos = body.tell()
+ sz = body.seek(0, io.SEEK_END)
+ except (TypeError, AttributeError, OSError):
+ return None
+ else:
+ body.seek(curpos)
+ return sz - curpos
+
+ try:
+ # does it implement the buffer protocol (bytes, bytearray, array)?
+ mv = memoryview(body)
+ return mv.nbytes
+ except TypeError:
+ pass
+
+ if isinstance(body, str):
+ return len(body)
+
+ return None
+
def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
self.timeout = timeout
@@ -933,18 +985,9 @@ class HTTPConnection:
if hasattr(data, "read") :
if self.debuglevel > 0:
print("sendIng a read()able")
- encode = False
- try:
- mode = data.mode
- except AttributeError:
- # io.BytesIO and other file-like objects don't have a `mode`
- # attribute.
- pass
- else:
- if "b" not in mode:
- encode = True
- if self.debuglevel > 0:
- print("encoding file using iso-8859-1")
+ encode = self._is_textIO(data)
+ if encode and self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
while 1:
datablock = data.read(blocksize)
if not datablock:
@@ -970,7 +1013,22 @@ class HTTPConnection:
"""
self._buffer.append(s)
- def _send_output(self, message_body=None):
+ def _read_readable(self, readable):
+ blocksize = 8192
+ if self.debuglevel > 0:
+ print("sendIng a read()able")
+ encode = self._is_textIO(readable)
+ if encode and self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
+ while True:
+ datablock = readable.read(blocksize)
+ if not datablock:
+ break
+ if encode:
+ datablock = datablock.encode("iso-8859-1")
+ yield datablock
+
+ def _send_output(self, message_body=None, encode_chunked=False):
"""Send the currently buffered request and clear the buffer.
Appends an extra \\r\\n to the buffer.
@@ -979,10 +1037,50 @@ class HTTPConnection:
self._buffer.extend((b"", b""))
msg = b"\r\n".join(self._buffer)
del self._buffer[:]
-
self.send(msg)
+
if message_body is not None:
- self.send(message_body)
+
+ # create a consistent interface to message_body
+ if hasattr(message_body, 'read'):
+ # Let file-like take precedence over byte-like. This
+ # is needed to allow the current position of mmap'ed
+ # files to be taken into account.
+ chunks = self._read_readable(message_body)
+ else:
+ try:
+ # this is solely to check to see if message_body
+ # implements the buffer API. it /would/ be easier
+ # to capture if PyObject_CheckBuffer was exposed
+ # to Python.
+ memoryview(message_body)
+ except TypeError:
+ try:
+ chunks = iter(message_body)
+ except TypeError:
+ raise TypeError("message_body should be a bytes-like "
+ "object or an iterable, got %r"
+ % type(message_body))
+ else:
+ # the object implements the buffer interface and
+ # can be passed directly into socket methods
+ chunks = (message_body,)
+
+ for chunk in chunks:
+ if not chunk:
+ if self.debuglevel > 0:
+ print('Zero length chunk ignored')
+ continue
+
+ if encode_chunked and self._http_vsn == 11:
+ # chunked encoding
+ chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
+ + b'\r\n'
+ self.send(chunk)
+
+ if encode_chunked and self._http_vsn == 11:
+ # end chunked transfer
+ self.send(b'0\r\n\r\n')
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
"""Send a request to the server.
@@ -1135,52 +1233,27 @@ class HTTPConnection:
header = header + b': ' + value
self._output(header)
- def endheaders(self, message_body=None):
+ def endheaders(self, message_body=None, *, encode_chunked=False):
"""Indicate that the last header line has been sent to the server.
This method sends the request to the server. The optional message_body
argument can be used to pass a message body associated with the
- request. The message body will be sent in the same packet as the
- message headers if it is a string, otherwise it is sent as a separate
- packet.
+ request.
"""
if self.__state == _CS_REQ_STARTED:
self.__state = _CS_REQ_SENT
else:
raise CannotSendHeader()
- self._send_output(message_body)
+ self._send_output(message_body, encode_chunked=encode_chunked)
- def request(self, method, url, body=None, headers={}):
+ def request(self, method, url, body=None, headers={}, *,
+ encode_chunked=False):
"""Send a complete request to the server."""
- self._send_request(method, url, body, headers)
-
- def _set_content_length(self, body, method):
- # Set the content-length based on the body. If the body is "empty", we
- # set Content-Length: 0 for methods that expect a body (RFC 7230,
- # Section 3.3.2). If the body is set for other methods, we set the
- # header provided we can figure out what the length is.
- thelen = None
- method_expects_body = method.upper() in _METHODS_EXPECTING_BODY
- if body is None and method_expects_body:
- thelen = '0'
- elif body is not None:
- try:
- thelen = str(len(body))
- except TypeError:
- # If this is a file-like object, try to
- # fstat its file descriptor
- try:
- thelen = str(os.fstat(body.fileno()).st_size)
- except (AttributeError, OSError):
- # Don't send a length if this failed
- if self.debuglevel > 0: print("Cannot stat!!")
+ self._send_request(method, url, body, headers, encode_chunked)
- if thelen is not None:
- self.putheader('Content-Length', thelen)
-
- def _send_request(self, method, url, body, headers):
+ def _send_request(self, method, url, body, headers, encode_chunked):
# Honor explicitly requested Host: and Accept-Encoding: headers.
- header_names = dict.fromkeys([k.lower() for k in headers])
+ header_names = frozenset(k.lower() for k in headers)
skips = {}
if 'host' in header_names:
skips['skip_host'] = 1
@@ -1189,15 +1262,41 @@ class HTTPConnection:
self.putrequest(method, url, **skips)
+ # chunked encoding will happen if HTTP/1.1 is used and either
+ # the caller passes encode_chunked=True or the following
+ # conditions hold:
+ # 1. content-length has not been explicitly set
+ # 2. the length of the body cannot be determined
+ # (e.g. it is a generator or unseekable file)
+ # 3. Transfer-Encoding has NOT been explicitly set by the caller
+
if 'content-length' not in header_names:
- self._set_content_length(body, method)
+ # only chunk body if not explicitly set for backwards
+ # compatibility, assuming the client code is already handling the
+ # chunking
+ if 'transfer-encoding' not in header_names:
+ # if content-length cannot be automatically determined, fall
+ # back to chunked encoding
+ encode_chunked = False
+ content_length = self._get_content_length(body, method)
+ if content_length is None:
+ if body:
+ if self.debuglevel > 0:
+ print('Unable to determine size of %r' % body)
+ encode_chunked = True
+ self.putheader('Transfer-Encoding', 'chunked')
+ else:
+ self.putheader('Content-Length', str(content_length))
+ else:
+ encode_chunked = False
+
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = _encode(body, 'body')
- self.endheaders(body)
+ self.endheaders(body, encode_chunked=encode_chunked)
def getresponse(self):
"""Get the response from the server.
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 1768a34..a179612 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -314,6 +314,124 @@ class HeaderTests(TestCase):
conn.putheader(name, value)
+class TransferEncodingTest(TestCase):
+ expected_body = b"It's just a flesh wound"
+
+ def test_endheaders_chunked(self):
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.putrequest('POST', '/')
+ conn.endheaders(self._make_body(), encode_chunked=True)
+
+ _, _, body = self._parse_request(conn.sock.data)
+ body = self._parse_chunked(body)
+ self.assertEqual(body, self.expected_body)
+
+ def test_explicit_headers(self):
+ # explicit chunked
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ # this shouldn't actually be automatically chunk-encoded because the
+ # calling code has explicitly stated that it's taking care of it
+ conn.request(
+ 'POST', '/', self._make_body(), {'Transfer-Encoding': 'chunked'})
+
+ _, headers, body = self._parse_request(conn.sock.data)
+ self.assertNotIn('content-length', [k.lower() for k in headers.keys()])
+ self.assertEqual(headers['Transfer-Encoding'], 'chunked')
+ self.assertEqual(body, self.expected_body)
+
+ # explicit chunked, string body
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.request(
+ 'POST', '/', self.expected_body.decode('latin-1'),
+ {'Transfer-Encoding': 'chunked'})
+
+ _, headers, body = self._parse_request(conn.sock.data)
+ self.assertNotIn('content-length', [k.lower() for k in headers.keys()])
+ self.assertEqual(headers['Transfer-Encoding'], 'chunked')
+ self.assertEqual(body, self.expected_body)
+
+ # User-specified TE, but request() does the chunk encoding
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.request('POST', '/',
+ headers={'Transfer-Encoding': 'gzip, chunked'},
+ encode_chunked=True,
+ body=self._make_body())
+ _, headers, body = self._parse_request(conn.sock.data)
+ self.assertNotIn('content-length', [k.lower() for k in headers])
+ self.assertEqual(headers['Transfer-Encoding'], 'gzip, chunked')
+ self.assertEqual(self._parse_chunked(body), self.expected_body)
+
+ def test_request(self):
+ for empty_lines in (False, True,):
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(b'')
+ conn.request(
+ 'POST', '/', self._make_body(empty_lines=empty_lines))
+
+ _, headers, body = self._parse_request(conn.sock.data)
+ body = self._parse_chunked(body)
+ self.assertEqual(body, self.expected_body)
+ self.assertEqual(headers['Transfer-Encoding'], 'chunked')
+
+ # Content-Length and Transfer-Encoding SHOULD not be sent in the
+ # same request
+ self.assertNotIn('content-length', [k.lower() for k in headers])
+
+ def _make_body(self, empty_lines=False):
+ lines = self.expected_body.split(b' ')
+ for idx, line in enumerate(lines):
+ # for testing handling empty lines
+ if empty_lines and idx % 2:
+ yield b''
+ if idx < len(lines) - 1:
+ yield line + b' '
+ else:
+ yield line
+
+ def _parse_request(self, data):
+ lines = data.split(b'\r\n')
+ request = lines[0]
+ headers = {}
+ n = 1
+ while n < len(lines) and len(lines[n]) > 0:
+ key, val = lines[n].split(b':')
+ key = key.decode('latin-1').strip()
+ headers[key] = val.decode('latin-1').strip()
+ n += 1
+
+ return request, headers, b'\r\n'.join(lines[n + 1:])
+
+ def _parse_chunked(self, data):
+ body = []
+ trailers = {}
+ n = 0
+ lines = data.split(b'\r\n')
+ # parse body
+ while True:
+ size, chunk = lines[n:n+2]
+ size = int(size, 16)
+
+ if size == 0:
+ n += 1
+ break
+
+ self.assertEqual(size, len(chunk))
+ body.append(chunk)
+
+ n += 2
+ # we /should/ hit the end chunk, but check against the size of
+ # lines so we're not stuck in an infinite loop should we get
+ # malformed data
+ if n > len(lines):
+ break
+
+ return b''.join(body)
+
+
class BasicTest(TestCase):
def test_status_lines(self):
# Test HTTP status lines
@@ -564,11 +682,11 @@ class BasicTest(TestCase):
yield None
yield 'data_two'
- class UpdatingFile():
+ class UpdatingFile(io.TextIOBase):
mode = 'r'
d = data()
def read(self, blocksize=-1):
- return self.d.__next__()
+ return next(self.d)
expected = b'data'
@@ -1546,6 +1664,26 @@ class RequestBodyTest(TestCase):
message = client.parse_headers(f)
return message, f
+ def test_list_body(self):
+ # Note that no content-length is automatically calculated for
+ # an iterable. The request will fall back to send chunked
+ # transfer encoding.
+ cases = (
+ ([b'foo', b'bar'], b'3\r\nfoo\r\n3\r\nbar\r\n0\r\n\r\n'),
+ ((b'foo', b'bar'), b'3\r\nfoo\r\n3\r\nbar\r\n0\r\n\r\n'),
+ )
+ for body, expected in cases:
+ with self.subTest(body):
+ self.conn = client.HTTPConnection('example.com')
+ self.conn.sock = self.sock = FakeSocket('')
+
+ self.conn.request('PUT', '/url', body)
+ msg, f = self.get_headers_and_fp()
+ self.assertNotIn('Content-Type', msg)
+ self.assertNotIn('Content-Length', msg)
+ self.assertEqual(msg.get('Transfer-Encoding'), 'chunked')
+ self.assertEqual(expected, f.read())
+
def test_manual_content_length(self):
# Set an incorrect content-length so that we can verify that
# it will not be over-ridden by the library.
@@ -1588,8 +1726,13 @@ class RequestBodyTest(TestCase):
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertIsNone(message.get_charset())
- self.assertEqual("4", message.get("content-length"))
- self.assertEqual(b'body', f.read())
+ # Note that the length of text files is unpredictable
+ # because it depends on character encoding and line ending
+ # translation. No content-length will be set, the body
+ # will be sent using chunked transfer encoding.
+ self.assertIsNone(message.get("content-length"))
+ self.assertEqual("chunked", message.get("transfer-encoding"))
+ self.assertEqual(b'4\r\nbody\r\n0\r\n\r\n', f.read())
def test_binary_file_body(self):
self.addCleanup(support.unlink, support.TESTFN)
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index eda7ccc..0eea0c7 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -7,6 +7,8 @@ import io
import socket
import array
import sys
+import tempfile
+import subprocess
import urllib.request
# The proxy bypass method imported below has logic specific to the OSX
@@ -335,7 +337,8 @@ class MockHTTPClass:
else:
self._tunnel_headers.clear()
- def request(self, method, url, body=None, headers=None):
+ def request(self, method, url, body=None, headers=None, *,
+ encode_chunked=False):
self.method = method
self.selector = url
if headers is not None:
@@ -343,6 +346,7 @@ class MockHTTPClass:
self.req_headers.sort()
if body:
self.data = body
+ self.encode_chunked = encode_chunked
if self.raise_on_endheaders:
raise OSError()
@@ -908,41 +912,96 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(req.unredirected_hdrs["Host"], "baz")
self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
- # Check iterable body support
- def iterable_body():
- yield b"one"
- yield b"two"
- yield b"three"
+ def test_http_body_file(self):
+ # A regular file - Content Length is calculated unless already set.
- for headers in {}, {"Content-Length": 11}:
- req = Request("http://example.com/", iterable_body(), headers)
- if not headers:
- # Having an iterable body without a Content-Length should
- # raise an exception
- self.assertRaises(ValueError, h.do_request_, req)
- else:
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ file_obj = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
+ file_path = file_obj.name
+ file_obj.write(b"Something\nSomething\nSomething\n")
+ file_obj.close()
+
+ for headers in {}, {"Content-Length": 30}:
+ with open(file_path, "rb") as f:
+ req = Request("http://example.com/", f, headers)
newreq = h.do_request_(req)
+ self.assertEqual(int(newreq.get_header('Content-length')), 30)
- # A file object.
- # Test only Content-Length attribute of request.
+ os.unlink(file_path)
+
+ def test_http_body_fileobj(self):
+ # A file object - Content Length is calculated unless already set.
+ # (Note that there are some subtle differences to a regular
+ # file, that is why we are testing both cases.)
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
file_obj = io.BytesIO()
file_obj.write(b"Something\nSomething\nSomething\n")
for headers in {}, {"Content-Length": 30}:
+ file_obj.seek(0)
req = Request("http://example.com/", file_obj, headers)
- if not headers:
- # Having an iterable body without a Content-Length should
- # raise an exception
- self.assertRaises(ValueError, h.do_request_, req)
- else:
- newreq = h.do_request_(req)
- self.assertEqual(int(newreq.get_header('Content-length')), 30)
+ newreq = h.do_request_(req)
+ self.assertEqual(int(newreq.get_header('Content-length')), 30)
file_obj.close()
+ def test_http_body_pipe(self):
+ # A file reading from a pipe.
+ # A pipe cannot be seek'ed. There is no way to determine the
+ # content length up front. Thus, do_request_() should fall
+ # back to Transfer-encoding chunked.
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ cmd = [sys.executable, "-c",
+ r"import sys; "
+ r"sys.stdout.buffer.write(b'Something\nSomething\nSomething\n')"]
+ for headers in {}, {"Content-Length": 30}:
+ with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc:
+ req = Request("http://example.com/", proc.stdout, headers)
+ newreq = h.do_request_(req)
+ if not headers:
+ self.assertEqual(newreq.get_header('Content-length'), None)
+ self.assertEqual(newreq.get_header('Transfer-encoding'),
+ 'chunked')
+ else:
+ self.assertEqual(int(newreq.get_header('Content-length')),
+ 30)
+
+ def test_http_body_iterable(self):
+ # Generic iterable. There is no way to determine the content
+ # length up front. Fall back to Transfer-encoding chunked.
+
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
+ def iterable_body():
+ yield b"one"
+ yield b"two"
+ yield b"three"
+
+ for headers in {}, {"Content-Length": 11}:
+ req = Request("http://example.com/", iterable_body(), headers)
+ newreq = h.do_request_(req)
+ if not headers:
+ self.assertEqual(newreq.get_header('Content-length'), None)
+ self.assertEqual(newreq.get_header('Transfer-encoding'),
+ 'chunked')
+ else:
+ self.assertEqual(int(newreq.get_header('Content-length')), 11)
+
+ def test_http_body_array(self):
# array.array Iterable - Content Length is calculated
+ h = urllib.request.AbstractHTTPHandler()
+ o = h.parent = MockOpener()
+
iterable_array = array.array("I",[1,2,3,4])
for headers in {}, {"Content-Length": 16}:
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index dc436bc..30bf6e0 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -141,17 +141,9 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
*, cafile=None, capath=None, cadefault=False, context=None):
'''Open the URL url, which can be either a string or a Request object.
- *data* must be a bytes object specifying additional data to be sent to the
- server, or None if no such data is needed. data may also be an iterable
- object and in that case Content-Length value must be specified in the
- headers. Currently HTTP requests are the only ones that use data; the HTTP
- request will be a POST instead of a GET when the data parameter is
- provided.
-
- *data* should be a buffer in the standard application/x-www-form-urlencoded
- format. The urllib.parse.urlencode() function takes a mapping or sequence
- of 2-tuples and returns an ASCII text string in this format. It should be
- encoded to bytes before being used as the data parameter.
+ *data* must be an object specifying additional data to be sent to
+ the server, or None if no such data is needed. See Request for
+ details.
urllib.request module uses HTTP/1.1 and includes a "Connection:close"
header in its HTTP requests.
@@ -1235,6 +1227,11 @@ class AbstractHTTPHandler(BaseHandler):
def set_http_debuglevel(self, level):
self._debuglevel = level
+ def _get_content_length(self, request):
+ return http.client.HTTPConnection._get_content_length(
+ request.data,
+ request.get_method())
+
def do_request_(self, request):
host = request.host
if not host:
@@ -1243,24 +1240,22 @@ class AbstractHTTPHandler(BaseHandler):
if request.data is not None: # POST
data = request.data
if isinstance(data, str):
- msg = "POST data should be bytes or an iterable of bytes. " \
- "It cannot be of type str."
+ msg = "POST data should be bytes, an iterable of bytes, " \
+ "or a file object. It cannot be of type str."
raise TypeError(msg)
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
- if not request.has_header('Content-length'):
- try:
- mv = memoryview(data)
- except TypeError:
- if isinstance(data, collections.Iterable):
- raise ValueError("Content-Length should be specified "
- "for iterable data of type %r %r" % (type(data),
- data))
+ if (not request.has_header('Content-length')
+ and not request.has_header('Transfer-encoding')):
+ content_length = self._get_content_length(request)
+ if content_length is not None:
+ request.add_unredirected_header(
+ 'Content-length', str(content_length))
else:
request.add_unredirected_header(
- 'Content-length', '%d' % (len(mv) * mv.itemsize))
+ 'Transfer-encoding', 'chunked')
sel_host = host
if request.has_proxy():
@@ -1316,7 +1311,8 @@ class AbstractHTTPHandler(BaseHandler):
try:
try:
- h.request(req.get_method(), req.selector, req.data, headers)
+ h.request(req.get_method(), req.selector, req.data, headers,
+ encode_chunked=req.has_header('Transfer-encoding'))
except OSError as err: # timeout error
raise URLError(err)
r = h.getresponse()