Issue #12319: Support for chunked encoding of HTTP request bodies

When the body object is a file, its size is no longer determined with fstat(), since that can report the wrong result (e.g. reading from a pipe). Instead, determine the size using seek(), or fall back to chunked encoding for unseekable files. Also, change the logic for detecting text files to check for TextIOBase inheritance, rather than inspecting the “mode” attribute, which may not exist (e.g. BytesIO and StringIO). The Content-Length for text files is no longer determined ahead of time, because the original logic could have been wrong depending on the codec and newline translation settings. Patch by Demian Brecht and Rolf Krahl, with a few tweaks by me.
author: Martin Panter <vadmium+py@gmail.com> 2016-08-24 06:33:33 (GMT)
committer: Martin Panter <vadmium+py@gmail.com> 2016-08-24 06:33:33 (GMT)
commit: 3c0d0baf2badfad7deb346d1043f7d83bb92691f (patch)
tree: 968ca71729f519aaf6ebb38477efdc73e5ae3ae9 /Lib/test/test_urllib2.py
parent: a790fe7ff86f193670b3d8287b22c72cbe675c7b (diff)
download: cpython-3c0d0baf2badfad7deb346d1043f7d83bb92691f.zip
cpython-3c0d0baf2badfad7deb346d1043f7d83bb92691f.tar.gz
cpython-3c0d0baf2badfad7deb346d1043f7d83bb92691f.tar.bz2
1 files changed, 81 insertions, 22 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index eda7ccc..0eea0c7 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -7,6 +7,8 @@ import io
 import socket
 import array
 import sys
+import tempfile
+import subprocess
 
 import urllib.request
 # The proxy bypass method imported below has logic specific to the OSX
@@ -335,7 +337,8 @@ class MockHTTPClass:
         else:
             self._tunnel_headers.clear()
 
-    def request(self, method, url, body=None, headers=None):
+    def request(self, method, url, body=None, headers=None, *,
+                encode_chunked=False):
         self.method = method
         self.selector = url
         if headers is not None:
@@ -343,6 +346,7 @@ class MockHTTPClass:
         self.req_headers.sort()
         if body:
             self.data = body
+        self.encode_chunked = encode_chunked
         if self.raise_on_endheaders:
             raise OSError()
 
@@ -908,41 +912,96 @@ class HandlerTests(unittest.TestCase):
             self.assertEqual(req.unredirected_hdrs["Host"], "baz")
             self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
 
-        # Check iterable body support
-        def iterable_body():
-            yield b"one"
-            yield b"two"
-            yield b"three"
+    def test_http_body_file(self):
+        # A regular file - Content Length is calculated unless already set.
 
-        for headers in {}, {"Content-Length": 11}:
-            req = Request("http://example.com/", iterable_body(), headers)
-            if not headers:
-                # Having an iterable body without a Content-Length should
-                # raise an exception
-                self.assertRaises(ValueError, h.do_request_, req)
-            else:
+        h = urllib.request.AbstractHTTPHandler()
+        o = h.parent = MockOpener()
+
+        file_obj = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
+        file_path = file_obj.name
+        file_obj.write(b"Something\nSomething\nSomething\n")
+        file_obj.close()
+
+        for headers in {}, {"Content-Length": 30}:
+            with open(file_path, "rb") as f:
+                req = Request("http://example.com/", f, headers)
                 newreq = h.do_request_(req)
+                self.assertEqual(int(newreq.get_header('Content-length')), 30)
 
-        # A file object.
-        # Test only Content-Length attribute of request.
+        os.unlink(file_path)
+
+    def test_http_body_fileobj(self):
+        # A file object - Content Length is calculated unless already set.
+        # (Note that there are some subtle differences to a regular
+        # file, that is why we are testing both cases.)
+
+        h = urllib.request.AbstractHTTPHandler()
+        o = h.parent = MockOpener()
 
         file_obj = io.BytesIO()
         file_obj.write(b"Something\nSomething\nSomething\n")
 
         for headers in {}, {"Content-Length": 30}:
+            file_obj.seek(0)
             req = Request("http://example.com/", file_obj, headers)
-            if not headers:
-                # Having an iterable body without a Content-Length should
-                # raise an exception
-                self.assertRaises(ValueError, h.do_request_, req)
-            else:
-                newreq = h.do_request_(req)
-                self.assertEqual(int(newreq.get_header('Content-length')), 30)
+            newreq = h.do_request_(req)
+            self.assertEqual(int(newreq.get_header('Content-length')), 30)
 
         file_obj.close()
 
+    def test_http_body_pipe(self):
+        # A file reading from a pipe.
+        # A pipe cannot be seek'ed.  There is no way to determine the
+        # content length up front.  Thus, do_request_() should fall
+        # back to Transfer-encoding chunked.
+
+        h = urllib.request.AbstractHTTPHandler()
+        o = h.parent = MockOpener()
+
+        cmd = [sys.executable, "-c",
+               r"import sys; "
+               r"sys.stdout.buffer.write(b'Something\nSomething\nSomething\n')"]
+        for headers in {}, {"Content-Length": 30}:
+            with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc:
+                req = Request("http://example.com/", proc.stdout, headers)
+                newreq = h.do_request_(req)
+                if not headers:
+                    self.assertEqual(newreq.get_header('Content-length'), None)
+                    self.assertEqual(newreq.get_header('Transfer-encoding'),
+                                     'chunked')
+                else:
+                    self.assertEqual(int(newreq.get_header('Content-length')),
+                                     30)
+
+    def test_http_body_iterable(self):
+        # Generic iterable.  There is no way to determine the content
+        # length up front.  Fall back to Transfer-encoding chunked.
+
+        h = urllib.request.AbstractHTTPHandler()
+        o = h.parent = MockOpener()
+
+        def iterable_body():
+            yield b"one"
+            yield b"two"
+            yield b"three"
+
+        for headers in {}, {"Content-Length": 11}:
+            req = Request("http://example.com/", iterable_body(), headers)
+            newreq = h.do_request_(req)
+            if not headers:
+                self.assertEqual(newreq.get_header('Content-length'), None)
+                self.assertEqual(newreq.get_header('Transfer-encoding'),
+                                 'chunked')
+            else:
+                self.assertEqual(int(newreq.get_header('Content-length')), 11)
+
+    def test_http_body_array(self):
         # array.array Iterable - Content Length is calculated
 
+        h = urllib.request.AbstractHTTPHandler()
+        o = h.parent = MockOpener()
+
         iterable_array = array.array("I",[1,2,3,4])
 
         for headers in {}, {"Content-Length": 16}:
author	Martin Panter <vadmium+py@gmail.com>	2016-08-24 06:33:33 (GMT)
committer	Martin Panter <vadmium+py@gmail.com>	2016-08-24 06:33:33 (GMT)
commit	3c0d0baf2badfad7deb346d1043f7d83bb92691f (patch)
tree	968ca71729f519aaf6ebb38477efdc73e5ae3ae9 /Lib/test/test_urllib2.py
parent	a790fe7ff86f193670b3d8287b22c72cbe675c7b (diff)
download	cpython-3c0d0baf2badfad7deb346d1043f7d83bb92691f.zip cpython-3c0d0baf2badfad7deb346d1043f7d83bb92691f.tar.gz cpython-3c0d0baf2badfad7deb346d1043f7d83bb92691f.tar.bz2