Issue #25626: Merge zlib fix from 3.5

author: Martin Panter <vadmium+py@gmail.com> 2015-11-21 10:57:15 (GMT)
committer: Martin Panter <vadmium+py@gmail.com> 2015-11-21 10:57:15 (GMT)
commit: 36befa5b4c37b34de9c7f59eba26575b255114cf (patch)
tree: d87a8cd77c6bbdfeb0e15a8320740da4051ab786
parent: 266b276f34db5cc106d100f63b631af1f1f8e0e2 (diff)
parent: e99e97762cc75ad94056275ddcae9c84d63a3412 (diff)
download: cpython-36befa5b4c37b34de9c7f59eba26575b255114cf.zip
cpython-36befa5b4c37b34de9c7f59eba26575b255114cf.tar.gz
cpython-36befa5b4c37b34de9c7f59eba26575b255114cf.tar.bz2
7 files changed, 121 insertions, 48 deletions
diff --git a/Doc/howto/clinic.rst b/Doc/howto/clinic.rst
index 7524c4a..b04edea 100644
--- a/Doc/howto/clinic.rst
+++ b/Doc/howto/clinic.rst
@@ -1249,18 +1249,18 @@ Here's the simplest example of a custom converter, from ``Modules/zlibmodule.c``
 
     /*[python input]
 
-    class uint_converter(CConverter):
+    class capped_uint_converter(CConverter):
         type = 'unsigned int'
-        converter = 'uint_converter'
+        converter = 'capped_uint_converter'
 
     [python start generated code]*/
-    /*[python end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/
+    /*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
 
-This block adds a converter to Argument Clinic named ``uint``.  Parameters
-declared as ``uint`` will be declared as type ``unsigned int``, and will
-be parsed by the ``'O&'`` format unit, which will call the ``uint_converter``
-converter function.
-``uint`` variables automatically support default values.
+This block adds a converter to Argument Clinic named ``capped_uint``.  Parameters
+declared as ``capped_uint`` will be declared as type ``unsigned int``, and will
+be parsed by the ``'O&'`` format unit, which will call the
+``capped_uint_converter`` converter function.  ``capped_uint`` variables
+automatically support default values.
 
 More sophisticated custom converters can insert custom C code to
 handle initialization and cleanup.
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 728b459..2969b36 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -1610,12 +1610,15 @@ class _MemoryWatchdog:
 def bigmemtest(size, memuse, dry_run=True):
     """Decorator for bigmem tests.
 
-    'minsize' is the minimum useful size for the test (in arbitrary,
-    test-interpreted units.) 'memuse' is the number of 'bytes per size' for
-    the test, or a good estimate of it.
-
-    if 'dry_run' is False, it means the test doesn't support dummy runs
-    when -M is not specified.
+    'size' is a requested size for the test (in arbitrary, test-interpreted
+    units.) 'memuse' is the number of bytes per unit for the test, or a good
+    estimate of it. For example, a test that needs two byte buffers, of 4 GiB
+    each, could be decorated with @bigmemtest(size=_4G, memuse=2).
+
+    The 'size' argument is normally passed to the decorated test method as an
+    extra argument. If 'dry_run' is true, the value passed to the test method
+    may be less than the requested value. If 'dry_run' is false, it means the
+    test doesn't support dummy runs when -M is not specified.
     """
     def decorator(f):
         def wrapper(self):
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index d8408e1..3c51673 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -3,6 +3,7 @@
 
 import unittest
 from test import support
+from test.support import bigmemtest, _4G
 import os
 import io
 import struct
@@ -116,6 +117,14 @@ class TestGzip(BaseTest):
                 self.assertEqual(f.tell(), nread)
         self.assertEqual(b''.join(blocks), data1 * 50)
 
+    @bigmemtest(size=_4G, memuse=1)
+    def test_read_large(self, size):
+        # Read chunk size over UINT_MAX should be supported, despite zlib's
+        # limitation per low-level call
+        compressed = gzip.compress(data1, compresslevel=1)
+        f = gzip.GzipFile(fileobj=io.BytesIO(compressed), mode='rb')
+        self.assertEqual(f.read(size), data1)
+
     def test_io_on_closed_object(self):
         # Test that I/O operations on closed GzipFile objects raise a
         # ValueError, just like the corresponding functions on file objects.
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
index 7154e13..88c415b 100644
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -122,11 +122,17 @@ class ExceptionTestCase(unittest.TestCase):
         self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
         self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
 
+    @support.cpython_only
+    def test_overflow(self):
+        with self.assertRaisesRegex(OverflowError, 'int too large'):
+            zlib.decompress(b'', 15, sys.maxsize + 1)
+        with self.assertRaisesRegex(OverflowError, 'int too large'):
+            zlib.decompressobj().flush(sys.maxsize + 1)
+
 
 class BaseCompressTestCase(object):
     def check_big_compress_buffer(self, size, compress_func):
         _1M = 1024 * 1024
-        fmt = "%%0%dx" % (2 * _1M)
         # Generate 10MB worth of random, and expand it by repeating it.
         # The assumption is that zlib's memory is not big enough to exploit
         # such spread out redundancy.
@@ -196,6 +202,18 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase):
         finally:
             data = None
 
+    @bigmemtest(size=_4G, memuse=1)
+    def test_large_bufsize(self, size):
+        # Test decompress(bufsize) parameter greater than the internal limit
+        data = HAMLET_SCENE * 10
+        compressed = zlib.compress(data, 1)
+        self.assertEqual(zlib.decompress(compressed, 15, size), data)
+
+    def test_custom_bufsize(self):
+        data = HAMLET_SCENE * 10
+        compressed = zlib.compress(data, 1)
+        self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data)
+
 
 class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
     # Test compression object
@@ -364,6 +382,21 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
         self.assertRaises(ValueError, dco.decompress, b"", -1)
         self.assertEqual(b'', dco.unconsumed_tail)
 
+    def test_maxlen_large(self):
+        # Sizes up to sys.maxsize should be accepted, although zlib is
+        # internally limited to expressing sizes with unsigned int
+        data = HAMLET_SCENE * 10
+        self.assertGreater(len(data), zlib.DEF_BUF_SIZE)
+        compressed = zlib.compress(data, 1)
+        dco = zlib.decompressobj()
+        self.assertEqual(dco.decompress(compressed, sys.maxsize), data)
+
+    def test_maxlen_custom(self):
+        data = HAMLET_SCENE * 10
+        compressed = zlib.compress(data, 1)
+        dco = zlib.decompressobj()
+        self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100])
+
     def test_clear_unconsumed_tail(self):
         # Issue #12050: calling decompress() without providing max_length
         # should clear the unconsumed_tail attribute.
@@ -537,6 +570,22 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
         data = zlib.compress(input2)
         self.assertEqual(dco.flush(), input1[1:])
 
+    @bigmemtest(size=_4G, memuse=1)
+    def test_flush_large_length(self, size):
+        # Test flush(length) parameter greater than internal limit UINT_MAX
+        input = HAMLET_SCENE * 10
+        data = zlib.compress(input, 1)
+        dco = zlib.decompressobj()
+        dco.decompress(data, 1)
+        self.assertEqual(dco.flush(size), input[1:])
+
+    def test_flush_custom_length(self):
+        input = HAMLET_SCENE * 10
+        data = zlib.compress(input, 1)
+        dco = zlib.decompressobj()
+        dco.decompress(data, 1)
+        self.assertEqual(dco.flush(CustomInt()), input[1:])
+
     @requires_Compress_copy
     def test_compresscopy(self):
         # Test copying a compression object
@@ -725,5 +774,10 @@ LAERTES
 """
 
 
+class CustomInt:
+    def __int__(self):
+        return 100
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 1e18892..0e25ad2 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -432,6 +432,13 @@ Core and Builtins
 Library
 -------
 
+- Issue #25626: Change three zlib functions to accept sizes that fit in
+  Py_ssize_t, but internally cap those sizes to UINT_MAX.  This resolves a
+  regression in 3.5 where GzipFile.read() failed to read chunks larger than 2
+  or 4 GiB.  The change affects the zlib.Decompress.decompress() max_length
+  parameter, the zlib.decompress() bufsize parameter, and the
+  zlib.Decompress.flush() length parameter.
+
 - Issue #25583: Avoid incorrect errors raised by os.makedirs(exist_ok=True)
   when the OS gives priority to errors such as EACCES over EEXIST.
 
diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h
index 35661a5..c5cdf42 100644
--- a/Modules/clinic/zlibmodule.c.h
+++ b/Modules/clinic/zlibmodule.c.h
@@ -68,7 +68,7 @@ zlib_decompress(PyModuleDef *module, PyObject *args)
     unsigned int bufsize = DEF_BUF_SIZE;
 
     if (!PyArg_ParseTuple(args, "y*|iO&:decompress",
-        &data, &wbits, uint_converter, &bufsize))
+        &data, &wbits, capped_uint_converter, &bufsize))
         goto exit;
     return_value = zlib_decompress_impl(module, &data, wbits, bufsize);
 
@@ -242,7 +242,7 @@ zlib_Decompress_decompress(compobject *self, PyObject *args)
     unsigned int max_length = 0;
 
     if (!PyArg_ParseTuple(args, "y*|O&:decompress",
-        &data, uint_converter, &max_length))
+        &data, capped_uint_converter, &max_length))
         goto exit;
     return_value = zlib_Decompress_decompress_impl(self, &data, max_length);
 
@@ -353,7 +353,7 @@ zlib_Decompress_flush(compobject *self, PyObject *args)
     unsigned int length = DEF_BUF_SIZE;
 
     if (!PyArg_ParseTuple(args, "|O&:flush",
-        uint_converter, &length))
+        capped_uint_converter, &length))
         goto exit;
     return_value = zlib_Decompress_flush_impl(self, length);
 
@@ -438,4 +438,4 @@ exit:
 #ifndef ZLIB_COMPRESS_COPY_METHODDEF
     #define ZLIB_COMPRESS_COPY_METHODDEF
 #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */
-/*[clinic end generated code: output=56ed1147bbbb4788 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=7734aec079550bc8 input=a9049054013a1b77]*/
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index 1997b40..37307be 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -226,42 +226,42 @@ zlib_compress_impl(PyModuleDef *module, Py_buffer *bytes, int level)
 
 /*[python input]
 
-class uint_converter(CConverter):
+class capped_uint_converter(CConverter):
     type = 'unsigned int'
-    converter = 'uint_converter'
+    converter = 'capped_uint_converter'
     c_ignored_default = "0"
 
 [python start generated code]*/
-/*[python end generated code: output=da39a3ee5e6b4b0d input=22263855f7a3ebfd]*/
+/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
 
 static int
-uint_converter(PyObject *obj, void *ptr)
+capped_uint_converter(PyObject *obj, void *ptr)
 {
-    long val;
-    unsigned long uval;
+    PyObject *long_obj;
+    Py_ssize_t val;
 
-    val = PyLong_AsLong(obj);
-    if (val == -1 && PyErr_Occurred()) {
-        uval = PyLong_AsUnsignedLong(obj);
-        if (uval == (unsigned long)-1 && PyErr_Occurred())
-            return 0;
+    long_obj = (PyObject *)_PyLong_FromNbInt(obj);
+    if (long_obj == NULL) {
+        return 0;
     }
-    else {
-        if (val < 0) {
-            PyErr_SetString(PyExc_ValueError,
-                            "value must be positive");
-            return 0;
-        }
-        uval = (unsigned long)val;
+    val = PyLong_AsSsize_t(long_obj);
+    Py_DECREF(long_obj);
+    if (val == -1 && PyErr_Occurred()) {
+        return 0;
     }
-
-    if (uval > UINT_MAX) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "Python int too large for C unsigned int");
+    if (val < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "value must be positive");
         return 0;
     }
 
-    *(unsigned int *)ptr = Py_SAFE_DOWNCAST(uval, unsigned long, unsigned int);
+    if ((size_t)val > UINT_MAX) {
+        *(unsigned int *)ptr = UINT_MAX;
+    }
+    else {
+        *(unsigned int *)ptr = Py_SAFE_DOWNCAST(val, Py_ssize_t,
+            unsigned int);
+    }
     return 1;
 }
 
@@ -272,7 +272,7 @@ zlib.decompress
         Compressed data.
     wbits: int(c_default="MAX_WBITS") = MAX_WBITS
         The window buffer size.
-    bufsize: uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
+    bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
         The initial output buffer size.
     /
 
@@ -282,7 +282,7 @@ Returns a bytes object containing the uncompressed data.
 static PyObject *
 zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits,
                      unsigned int bufsize)
-/*[clinic end generated code: output=444d0987f3429574 input=0f4b9abb7103f50e]*/
+/*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/
 {
     PyObject *result_str = NULL;
     Byte *input;
@@ -691,7 +691,7 @@ zlib.Decompress.decompress
 
     data: Py_buffer
         The binary data to decompress.
-    max_length: uint = 0
+    max_length: capped_uint = 0
         The maximum allowable length of the decompressed data.
         Unconsumed input data will be stored in
         the unconsumed_tail attribute.
@@ -707,7 +707,7 @@ Call the flush() method to clear these buffers.
 static PyObject *
 zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data,
                                 unsigned int max_length)
-/*[clinic end generated code: output=b82e2a2c19f5fe7b input=02cfc047377cec86]*/
+/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/
 {
     int err;
     unsigned int old_length, length = DEF_BUF_SIZE;
@@ -1048,7 +1048,7 @@ error:
 /*[clinic input]
 zlib.Decompress.flush
 
-    length: uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
+    length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
         the initial size of the output buffer.
     /
 
@@ -1057,7 +1057,7 @@ Return a bytes object containing any remaining decompressed data.
 
 static PyObject *
 zlib_Decompress_flush_impl(compobject *self, unsigned int length)
-/*[clinic end generated code: output=db6fb753ab698e22 input=1580956505978993]*/
+/*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/
 {
     int err;
     unsigned int new_length;
author	Martin Panter <vadmium+py@gmail.com>	2015-11-21 10:57:15 (GMT)
committer	Martin Panter <vadmium+py@gmail.com>	2015-11-21 10:57:15 (GMT)
commit	36befa5b4c37b34de9c7f59eba26575b255114cf (patch)
tree	d87a8cd77c6bbdfeb0e15a8320740da4051ab786
parent	266b276f34db5cc106d100f63b631af1f1f8e0e2 (diff)
parent	e99e97762cc75ad94056275ddcae9c84d63a3412 (diff)
download	cpython-36befa5b4c37b34de9c7f59eba26575b255114cf.zip cpython-36befa5b4c37b34de9c7f59eba26575b255114cf.tar.gz cpython-36befa5b4c37b34de9c7f59eba26575b255114cf.tar.bz2