summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/pymem.h12
-rw-r--r--Include/pyport.h11
-rwxr-xr-xLib/test/test_array.py17
-rw-r--r--Lib/test/test_struct.py8
-rw-r--r--Misc/NEWS5
-rw-r--r--Modules/_csv.c10
-rw-r--r--Modules/_struct.c6
-rw-r--r--Modules/arraymodule.c38
-rw-r--r--Modules/audioop.c68
-rw-r--r--Modules/binascii.c42
-rw-r--r--Modules/cPickle.c16
-rw-r--r--Modules/cStringIO.c15
-rw-r--r--Modules/cjkcodecs/multibytecodec.c38
-rw-r--r--Modules/datetimemodule.c7
-rw-r--r--Modules/md5.c13
-rw-r--r--Modules/stropmodule.c19
-rw-r--r--Objects/bufferobject.c7
-rw-r--r--Objects/listobject.c22
-rw-r--r--Objects/obmalloc.c4
-rw-r--r--Parser/node.c3
-rw-r--r--Python/asdl.c36
-rw-r--r--Python/ast.c3
-rw-r--r--Python/bltinmodule.c60
-rw-r--r--Python/compile.c32
24 files changed, 438 insertions, 54 deletions
diff --git a/Include/pymem.h b/Include/pymem.h
index 7f74f37..f9acb55 100644
--- a/Include/pymem.h
+++ b/Include/pymem.h
@@ -85,14 +85,18 @@ PyAPI_FUNC(void) PyMem_Free(void *);
*/
#define PyMem_New(type, n) \
- ( (type *) PyMem_Malloc((n) * sizeof(type)) )
+ ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \
+ ( (type *) PyMem_Malloc((n) * sizeof(type)) ) )
#define PyMem_NEW(type, n) \
- ( (type *) PyMem_MALLOC((n) * sizeof(type)) )
+ ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \
+ ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) )
#define PyMem_Resize(p, type, n) \
- ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) )
+ ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \
+ ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) )
#define PyMem_RESIZE(p, type, n) \
- ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) )
+ ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \
+ ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) )
/* PyMem{Del,DEL} are left over from ancient days, and shouldn't be used
* anymore. They're just confusing aliases for PyMem_{Free,FREE} now.
diff --git a/Include/pyport.h b/Include/pyport.h
index 7c684f7..15c8644 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -117,6 +117,17 @@ typedef Py_intptr_t Py_ssize_t;
# error "Python needs a typedef for Py_ssize_t in pyport.h."
#endif
+/* Largest possible value of size_t.
+ SIZE_MAX is part of C99, so it might be defined on some
+ platforms. If it is not defined, (size_t)-1 is a portable
+ definition for C89, due to the way signed->unsigned
+ conversion is defined. */
+#ifdef SIZE_MAX
+#define PY_SIZE_MAX SIZE_MAX
+#else
+#define PY_SIZE_MAX ((size_t)-1)
+#endif
+
/* Largest positive value of type Py_ssize_t. */
#define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1))
/* Smallest negative value of type Py_ssize_t. */
diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py
index b11c9d6..34a8f79 100755
--- a/Lib/test/test_array.py
+++ b/Lib/test/test_array.py
@@ -1009,6 +1009,23 @@ tests.append(FloatTest)
class DoubleTest(FPTest):
typecode = 'd'
minitemsize = 8
+
+ def test_alloc_overflow(self):
+ a = array.array('d', [-1]*65536)
+ try:
+ a *= 65536
+ except MemoryError:
+ pass
+ else:
+ self.fail("a *= 2**16 didn't raise MemoryError")
+ b = array.array('d', [ 2.71828183, 3.14159265, -1])
+ try:
+ b * 1431655766
+ except MemoryError:
+ pass
+ else:
+ self.fail("a * 1431655766 didn't raise MemoryError")
+
tests.append(DoubleTest)
def test_main(verbose=None):
diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
index e3a4e21..7ee47bf 100644
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -8,6 +8,7 @@ from test.test_support import TestFailed, verbose, run_unittest, catch_warning
import sys
ISBIGENDIAN = sys.byteorder == "big"
+IS32BIT = sys.maxint == 0x7fffffff
del sys
try:
@@ -568,6 +569,13 @@ class StructTest(unittest.TestCase):
for c in '\x01\x7f\xff\x0f\xf0':
self.assertTrue(struct.unpack('>?', c)[0])
+ def test_crasher(self):
+ if IS32BIT:
+ self.assertRaises(MemoryError, struct.pack, "357913941c", "a")
+ else:
+ print "%s test_crasher skipped on 64bit build."
+
+
def test_main():
run_unittest(StructTest)
diff --git a/Misc/NEWS b/Misc/NEWS
index 4b9f21c..f721122 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -40,6 +40,11 @@ Core and Builtins
Exception (KeyboardInterrupt, and SystemExit) propagate instead of
ignoring them.
+- Added checks for integer overflows, contributed by Google. Some are
+ only available if asserts are left in the code, in cases where they
+ can't be triggered from Python code.
+
+
Extension Modules
-----------------
diff --git a/Modules/_csv.c b/Modules/_csv.c
index c628927..a5787d3 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -559,6 +559,10 @@ parse_grow_buff(ReaderObj *self)
self->field = PyMem_Malloc(self->field_size);
}
else {
+ if (self->field_size > INT_MAX / 2) {
+ PyErr_NoMemory();
+ return 0;
+ }
self->field_size *= 2;
self->field = PyMem_Realloc(self->field, self->field_size);
}
@@ -1053,6 +1057,12 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
static int
join_check_rec_size(WriterObj *self, int rec_len)
{
+
+ if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
+ PyErr_NoMemory();
+ return 0;
+ }
+
if (rec_len > self->rec_size) {
if (self->rec_size == 0) {
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
diff --git a/Modules/_struct.c b/Modules/_struct.c
index e5fe211..078c3a5 100644
--- a/Modules/_struct.c
+++ b/Modules/_struct.c
@@ -1385,6 +1385,12 @@ prepare_s(PyStructObject *self)
}
}
+ /* check for overflow */
+ if ((len + 1) > (PY_SSIZE_T_MAX / sizeof(formatcode))) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
self->s_size = size;
self->s_len = len;
codes = PyMem_MALLOC((len + 1) * sizeof(formatcode));
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c
index 89ed27a..c505dad 100644
--- a/Modules/arraymodule.c
+++ b/Modules/arraymodule.c
@@ -652,6 +652,9 @@ array_concat(arrayobject *a, PyObject *bb)
PyErr_BadArgument();
return NULL;
}
+ if (Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
+ return PyErr_NoMemory();
+ }
size = Py_SIZE(a) + Py_SIZE(b);
np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr);
if (np == NULL) {
@@ -674,6 +677,9 @@ array_repeat(arrayobject *a, Py_ssize_t n)
Py_ssize_t nbytes;
if (n < 0)
n = 0;
+ if ((Py_SIZE(a) != 0) && (n > PY_SSIZE_T_MAX / Py_SIZE(a))) {
+ return PyErr_NoMemory();
+ }
size = Py_SIZE(a) * n;
np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr);
if (np == NULL)
@@ -818,6 +824,11 @@ array_do_extend(arrayobject *self, PyObject *bb)
"can only extend with array of same kind");
return -1;
}
+ if ((Py_SIZE(self) > PY_SSIZE_T_MAX - Py_SIZE(b)) ||
+ ((Py_SIZE(self) + Py_SIZE(b)) > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) {
+ PyErr_NoMemory();
+ return -1;
+ }
size = Py_SIZE(self) + Py_SIZE(b);
PyMem_RESIZE(self->ob_item, char, size*self->ob_descr->itemsize);
if (self->ob_item == NULL) {
@@ -859,6 +870,10 @@ array_inplace_repeat(arrayobject *self, Py_ssize_t n)
if (n < 0)
n = 0;
items = self->ob_item;
+ if ((self->ob_descr->itemsize != 0) &&
+ (Py_SIZE(self) > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) {
+ return PyErr_NoMemory();
+ }
size = Py_SIZE(self) * self->ob_descr->itemsize;
if (n == 0) {
PyMem_FREE(items);
@@ -867,6 +882,9 @@ array_inplace_repeat(arrayobject *self, Py_ssize_t n)
self->allocated = 0;
}
else {
+ if (size > PY_SSIZE_T_MAX / n) {
+ return PyErr_NoMemory();
+ }
PyMem_Resize(items, char, n * size);
if (items == NULL)
return PyErr_NoMemory();
@@ -1148,6 +1166,10 @@ array_reduce(arrayobject *array)
Py_INCREF(dict);
}
if (Py_SIZE(array) > 0) {
+ if (array->ob_descr->itemsize
+ > PY_SSIZE_T_MAX / array->ob_size) {
+ return PyErr_NoMemory();
+ }
result = Py_BuildValue("O(cs#)O",
Py_TYPE(array),
array->ob_descr->typecode,
@@ -1330,6 +1352,9 @@ array_fromlist(arrayobject *self, PyObject *list)
if ((*self->ob_descr->setitem)(self,
Py_SIZE(self) - n + i, v) != 0) {
Py_SIZE(self) -= n;
+ if (itemsize && (self->ob_size > PY_SSIZE_T_MAX / itemsize)) {
+ return PyErr_NoMemory();
+ }
PyMem_RESIZE(item, char,
Py_SIZE(self) * itemsize);
self->ob_item = item;
@@ -1389,6 +1414,10 @@ array_fromstring(arrayobject *self, PyObject *args)
n = n / itemsize;
if (n > 0) {
char *item = self->ob_item;
+ if ((n > PY_SSIZE_T_MAX - Py_SIZE(self)) ||
+ ((Py_SIZE(self) + n) > PY_SSIZE_T_MAX / itemsize)) {
+ return PyErr_NoMemory();
+ }
PyMem_RESIZE(item, char, (Py_SIZE(self) + n) * itemsize);
if (item == NULL) {
PyErr_NoMemory();
@@ -1414,8 +1443,12 @@ values,as if it had been read from a file using the fromfile() method).");
static PyObject *
array_tostring(arrayobject *self, PyObject *unused)
{
- return PyString_FromStringAndSize(self->ob_item,
+ if (self->ob_size <= PY_SSIZE_T_MAX / self->ob_descr->itemsize) {
+ return PyString_FromStringAndSize(self->ob_item,
Py_SIZE(self) * self->ob_descr->itemsize);
+ } else {
+ return PyErr_NoMemory();
+ }
}
PyDoc_STRVAR(tostring_doc,
@@ -1443,6 +1476,9 @@ array_fromunicode(arrayobject *self, PyObject *args)
}
if (n > 0) {
Py_UNICODE *item = (Py_UNICODE *) self->ob_item;
+ if (Py_SIZE(self) > PY_SSIZE_T_MAX - n) {
+ return PyErr_NoMemory();
+ }
PyMem_RESIZE(item, Py_UNICODE, Py_SIZE(self) + n);
if (item == NULL) {
PyErr_NoMemory();
diff --git a/Modules/audioop.c b/Modules/audioop.c
index ce00975..31e3fa4 100644
--- a/Modules/audioop.c
+++ b/Modules/audioop.c
@@ -829,7 +829,7 @@ static PyObject *
audioop_tostereo(PyObject *self, PyObject *args)
{
signed char *cp, *ncp;
- int len, size, val1, val2, val = 0;
+ int len, new_len, size, val1, val2, val = 0;
double fac1, fac2, fval, maxval;
PyObject *rv;
int i;
@@ -846,7 +846,14 @@ audioop_tostereo(PyObject *self, PyObject *args)
return 0;
}
- rv = PyString_FromStringAndSize(NULL, len*2);
+ new_len = len*2;
+ if (new_len < 0) {
+ PyErr_SetString(PyExc_MemoryError,
+ "not enough memory for output buffer");
+ return 0;
+ }
+
+ rv = PyString_FromStringAndSize(NULL, new_len);
if ( rv == 0 )
return 0;
ncp = (signed char *)PyString_AsString(rv);
@@ -1009,7 +1016,7 @@ audioop_lin2lin(PyObject *self, PyObject *args)
{
signed char *cp;
unsigned char *ncp;
- int len, size, size2, val = 0;
+ int len, new_len, size, size2, val = 0;
PyObject *rv;
int i, j;
@@ -1023,7 +1030,13 @@ audioop_lin2lin(PyObject *self, PyObject *args)
return 0;
}
- rv = PyString_FromStringAndSize(NULL, (len/size)*size2);
+ new_len = (len/size)*size2;
+ if (new_len < 0) {
+ PyErr_SetString(PyExc_MemoryError,
+ "not enough memory for output buffer");
+ return 0;
+ }
+ rv = PyString_FromStringAndSize(NULL, new_len);
if ( rv == 0 )
return 0;
ncp = (unsigned char *)PyString_AsString(rv);
@@ -1059,6 +1072,7 @@ audioop_ratecv(PyObject *self, PyObject *args)
int chan, d, *prev_i, *cur_i, cur_o;
PyObject *state, *samps, *str, *rv = NULL;
int bytes_per_frame;
+ size_t alloc_size;
weightA = 1;
weightB = 0;
@@ -1101,8 +1115,14 @@ audioop_ratecv(PyObject *self, PyObject *args)
inrate /= d;
outrate /= d;
- prev_i = (int *) malloc(nchannels * sizeof(int));
- cur_i = (int *) malloc(nchannels * sizeof(int));
+ alloc_size = sizeof(int) * (unsigned)nchannels;
+ if (alloc_size < nchannels) {
+ PyErr_SetString(PyExc_MemoryError,
+ "not enough memory for output buffer");
+ return 0;
+ }
+ prev_i = (int *) malloc(alloc_size);
+ cur_i = (int *) malloc(alloc_size);
if (prev_i == NULL || cur_i == NULL) {
(void) PyErr_NoMemory();
goto exit;
@@ -1276,7 +1296,7 @@ audioop_ulaw2lin(PyObject *self, PyObject *args)
unsigned char *cp;
unsigned char cval;
signed char *ncp;
- int len, size, val;
+ int len, new_len, size, val;
PyObject *rv;
int i;
@@ -1289,12 +1309,18 @@ audioop_ulaw2lin(PyObject *self, PyObject *args)
return 0;
}
- rv = PyString_FromStringAndSize(NULL, len*size);
+ new_len = len*size;
+ if (new_len < 0) {
+ PyErr_SetString(PyExc_MemoryError,
+ "not enough memory for output buffer");
+ return 0;
+ }
+ rv = PyString_FromStringAndSize(NULL, new_len);
if ( rv == 0 )
return 0;
ncp = (signed char *)PyString_AsString(rv);
- for ( i=0; i < len*size; i += size ) {
+ for ( i=0; i < new_len; i += size ) {
cval = *cp++;
val = st_ulaw2linear16(cval);
@@ -1344,7 +1370,7 @@ audioop_alaw2lin(PyObject *self, PyObject *args)
unsigned char *cp;
unsigned char cval;
signed char *ncp;
- int len, size, val;
+ int len, new_len, size, val;
PyObject *rv;
int i;
@@ -1357,12 +1383,18 @@ audioop_alaw2lin(PyObject *self, PyObject *args)
return 0;
}
- rv = PyString_FromStringAndSize(NULL, len*size);
+ new_len = len*size;
+ if (new_len < 0) {
+ PyErr_SetString(PyExc_MemoryError,
+ "not enough memory for output buffer");
+ return 0;
+ }
+ rv = PyString_FromStringAndSize(NULL, new_len);
if ( rv == 0 )
return 0;
ncp = (signed char *)PyString_AsString(rv);
- for ( i=0; i < len*size; i += size ) {
+ for ( i=0; i < new_len; i += size ) {
cval = *cp++;
val = st_alaw2linear16(cval);
@@ -1487,7 +1519,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args)
{
signed char *cp;
signed char *ncp;
- int len, size, valpred, step, delta, index, sign, vpdiff;
+ int len, new_len, size, valpred, step, delta, index, sign, vpdiff;
PyObject *rv, *str, *state;
int i, inputbuffer = 0, bufferstep;
@@ -1509,7 +1541,13 @@ audioop_adpcm2lin(PyObject *self, PyObject *args)
} else if ( !PyArg_ParseTuple(state, "ii", &valpred, &index) )
return 0;
- str = PyString_FromStringAndSize(NULL, len*size*2);
+ new_len = len*size*2;
+ if (new_len < 0) {
+ PyErr_SetString(PyExc_MemoryError,
+ "not enough memory for output buffer");
+ return 0;
+ }
+ str = PyString_FromStringAndSize(NULL, new_len);
if ( str == 0 )
return 0;
ncp = (signed char *)PyString_AsString(str);
@@ -1517,7 +1555,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args)
step = stepsizeTable[index];
bufferstep = 0;
- for ( i=0; i < len*size*2; i += size ) {
+ for ( i=0; i < new_len; i += size ) {
/* Step 1 - get the delta value and compute next index */
if ( bufferstep ) {
delta = inputbuffer & 0xf;
diff --git a/Modules/binascii.c b/Modules/binascii.c
index c1fc675..bcbafcf 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -141,7 +141,7 @@ static char table_a2b_base64[] = {
#define BASE64_PAD '='
/* Max binary chunk size; limited only by available memory */
-#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
+#define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
static unsigned char table_b2a_base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -198,6 +198,8 @@ binascii_a2b_uu(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
return NULL;
+ assert(ascii_len >= 0);
+
/* First byte: binary data length (in bytes) */
bin_len = (*ascii_data++ - ' ') & 077;
ascii_len--;
@@ -351,6 +353,11 @@ binascii_a2b_base64(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
return NULL;
+ assert(ascii_len >= 0);
+
+ if (ascii_len > PY_SSIZE_T_MAX - 3)
+ return PyErr_NoMemory();
+
bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
/* Allocate the buffer */
@@ -440,6 +447,9 @@ binascii_b2a_base64(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
return NULL;
+
+ assert(bin_len >= 0);
+
if ( bin_len > BASE64_MAXBIN ) {
PyErr_SetString(Error, "Too much data for base64 line");
return NULL;
@@ -495,6 +505,11 @@ binascii_a2b_hqx(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
return NULL;
+ assert(len >= 0);
+
+ if (len > PY_SSIZE_T_MAX - 2)
+ return PyErr_NoMemory();
+
/* Allocate a string that is too big (fixed later)
Add two to the initial length to prevent interning which
would preclude subsequent resizing. */
@@ -558,6 +573,11 @@ binascii_rlecode_hqx(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
return NULL;
+ assert(len >= 0);
+
+ if (len > PY_SSIZE_T_MAX / 2 - 2)
+ return PyErr_NoMemory();
+
/* Worst case: output is twice as big as input (fixed later) */
if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
return NULL;
@@ -607,6 +627,11 @@ binascii_b2a_hqx(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
return NULL;
+ assert(len >= 0);
+
+ if (len > PY_SSIZE_T_MAX / 2 - 2)
+ return PyErr_NoMemory();
+
/* Allocate a buffer that is at least large enough */
if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
return NULL;
@@ -645,9 +670,13 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
return NULL;
+ assert(in_len >= 0);
+
/* Empty string is a special case */
if ( in_len == 0 )
return PyString_FromString("");
+ else if (in_len > PY_SSIZE_T_MAX / 2)
+ return PyErr_NoMemory();
/* Allocate a buffer of reasonable size. Resized when needed */
out_len = in_len*2;
@@ -673,6 +702,7 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args)
#define OUTBYTE(b) \
do { \
if ( --out_len_left < 0 ) { \
+ if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
_PyString_Resize(&rv, 2*out_len); \
if ( rv == NULL ) return NULL; \
out_data = (unsigned char *)PyString_AsString(rv) \
@@ -741,7 +771,7 @@ binascii_crc_hqx(PyObject *self, PyObject *args)
if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
return NULL;
- while(len--) {
+ while(len-- > 0) {
crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
}
@@ -901,7 +931,7 @@ binascii_crc32(PyObject *self, PyObject *args)
return NULL;
crc = ~ crc;
- while (len--)
+ while (len-- > 0)
crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
/* Note: (crc >> 8) MUST zero fill on left */
@@ -923,6 +953,10 @@ binascii_hexlify(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
return NULL;
+ assert(arglen >= 0);
+ if (arglen > PY_SSIZE_T_MAX / 2)
+ return PyErr_NoMemory();
+
retval = PyString_FromStringAndSize(NULL, arglen*2);
if (!retval)
return NULL;
@@ -980,6 +1014,8 @@ binascii_unhexlify(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
return NULL;
+ assert(arglen >= 0);
+
/* XXX What should we do about strings with an odd length? Should
* we add an implicit leading zero, or a trailing zero? For now,
* raise an exception.
diff --git a/Modules/cPickle.c b/Modules/cPickle.c
index f130087..51fc226 100644
--- a/Modules/cPickle.c
+++ b/Modules/cPickle.c
@@ -3435,6 +3435,14 @@ load_binstring(Unpicklerobject *self)
if (self->read_func(self, &s, 4) < 0) return -1;
l = calc_binint(s, 4);
+ if (l < 0) {
+ /* Corrupt or hostile pickle -- we never write one like
+ * this.
+ */
+ PyErr_SetString(UnpicklingError,
+ "BINSTRING pickle has negative byte count");
+ return -1;
+ }
if (self->read_func(self, &s, l) < 0)
return -1;
@@ -3502,6 +3510,14 @@ load_binunicode(Unpicklerobject *self)
if (self->read_func(self, &s, 4) < 0) return -1;
l = calc_binint(s, 4);
+ if (l < 0) {
+ /* Corrupt or hostile pickle -- we never write one like
+ * this.
+ */
+ PyErr_SetString(UnpicklingError,
+ "BINUNICODE pickle has negative byte count");
+ return -1;
+ }
if (self->read_func(self, &s, l) < 0)
return -1;
diff --git a/Modules/cStringIO.c b/Modules/cStringIO.c
index 139a4a8..237d8c2 100644
--- a/Modules/cStringIO.c
+++ b/Modules/cStringIO.c
@@ -119,6 +119,7 @@ PyDoc_STRVAR(IO_getval__doc__,
static PyObject *
IO_cgetval(PyObject *self) {
if (!IO__opencheck(IOOOBJECT(self))) return NULL;
+ assert(IOOOBJECT(self)->pos >= 0);
return PyString_FromStringAndSize(((IOobject*)self)->buf,
((IOobject*)self)->pos);
}
@@ -137,6 +138,7 @@ IO_getval(IOobject *self, PyObject *args) {
}
else
s=self->string_size;
+ assert(self->pos >= 0);
return PyString_FromStringAndSize(self->buf, s);
}
@@ -157,6 +159,8 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) {
Py_ssize_t l;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
+ assert(IOOOBJECT(self)->pos >= 0);
+ assert(IOOOBJECT(self)->string_size >= 0);
l = ((IOobject*)self)->string_size - ((IOobject*)self)->pos;
if (n < 0 || n > l) {
n = l;
@@ -192,12 +196,17 @@ IO_creadline(PyObject *self, char **output) {
for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos,
s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
n < s && *n != '\n'; n++);
+
if (n < s) n++;
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos;
- assert(((IOobject*)self)->pos + l < INT_MAX);
- ((IOobject*)self)->pos += (int)l;
+
+ assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l);
+ assert(IOOOBJECT(self)->pos >= 0);
+ assert(IOOOBJECT(self)->string_size >= 0);
+
+ ((IOobject*)self)->pos += l;
return (int)l;
}
@@ -215,6 +224,7 @@ IO_readline(IOobject *self, PyObject *args) {
n -= m;
self->pos -= m;
}
+ assert(IOOOBJECT(self)->pos >= 0);
return PyString_FromStringAndSize(output, n);
}
@@ -277,6 +287,7 @@ IO_tell(IOobject *self, PyObject *unused) {
if (!IO__opencheck(self)) return NULL;
+ assert(self->pos >= 0);
return PyInt_FromSsize_t(self->pos);
}
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index bbd4c1a..a1b0ca9 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -163,13 +163,17 @@ static PyGetSetDef codecctx_getsets[] = {
static int
expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
{
- Py_ssize_t orgpos, orgsize;
+ Py_ssize_t orgpos, orgsize, incsize;
orgpos = (Py_ssize_t)((char *)buf->outbuf -
PyString_AS_STRING(buf->outobj));
orgsize = PyString_GET_SIZE(buf->outobj);
- if (_PyString_Resize(&buf->outobj, orgsize + (
- esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
+ incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
+
+ if (orgsize > PY_SSIZE_T_MAX - incsize)
+ return -1;
+
+ if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1)
return -1;
buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
@@ -473,6 +477,12 @@ multibytecodec_encode(MultibyteCodec *codec,
buf.excobj = NULL;
buf.inbuf = buf.inbuf_top = *data;
buf.inbuf_end = buf.inbuf_top + datalen;
+
+ if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
+ PyErr_NoMemory();
+ goto errorexit;
+ }
+
buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
if (buf.outobj == NULL)
goto errorexit;
@@ -735,6 +745,11 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
origpending = ctx->pendingsize;
if (origpending > 0) {
+ if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) {
+ PyErr_NoMemory();
+ /* inbuf_tmp == NULL */
+ goto errorexit;
+ }
inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
if (inbuf_tmp == NULL)
goto errorexit;
@@ -797,9 +812,10 @@ decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
Py_ssize_t npendings;
npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
- if (npendings + ctx->pendingsize > MAXDECPENDING) {
- PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
- return -1;
+ if (npendings + ctx->pendingsize > MAXDECPENDING ||
+ npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
+ PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
+ return -1;
}
memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
ctx->pendingsize += npendings;
@@ -1001,7 +1017,7 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
PyObject *args, PyObject *kwargs)
{
MultibyteDecodeBuffer buf;
- char *data, *wdata;
+ char *data, *wdata = NULL;
Py_ssize_t wsize, finalsize = 0, size, origpending;
int final = 0;
@@ -1017,6 +1033,10 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
wdata = data;
}
else {
+ if (size > PY_SSIZE_T_MAX - self->pendingsize) {
+ PyErr_NoMemory();
+ goto errorexit;
+ }
wsize = size + self->pendingsize;
wdata = PyMem_Malloc(wsize);
if (wdata == NULL)
@@ -1235,6 +1255,10 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
PyObject *ctr;
char *ctrdata;
+ if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
+ PyErr_NoMemory();
+ goto errorexit;
+ }
rsize = PyString_GET_SIZE(cres) + self->pendingsize;
ctr = PyString_FromStringAndSize(NULL, rsize);
if (ctr == NULL)
diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c
index 3443b42..fcbd2e9 100644
--- a/Modules/datetimemodule.c
+++ b/Modules/datetimemodule.c
@@ -1115,6 +1115,8 @@ format_utcoffset(char *buf, size_t buflen, const char *sep,
char sign;
int none;
+ assert(buflen >= 1);
+
offset = call_utcoffset(tzinfo, tzinfoarg, &none);
if (offset == -1 && PyErr_Occurred())
return -1;
@@ -1206,6 +1208,11 @@ wrap_strftime(PyObject *object, const char *format, size_t format_len,
* a new format. Since computing the replacements for those codes
* is expensive, don't unless they're actually used.
*/
+ if (format_len > INT_MAX - 1) {
+ PyErr_NoMemory();
+ goto Done;
+ }
+
totalnew = format_len + 1; /* realistic if no %z/%Z/%f */
newfmt = PyString_FromStringAndSize(NULL, totalnew);
if (newfmt == NULL) goto Done;
diff --git a/Modules/md5.c b/Modules/md5.c
index c35d96c..0e1058f 100644
--- a/Modules/md5.c
+++ b/Modules/md5.c
@@ -53,6 +53,7 @@
#include "md5.h"
#include <string.h>
+#include <limits.h>
#undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */
#ifdef ARCH_IS_BIG_ENDIAN
@@ -330,6 +331,18 @@ md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
if (nbytes <= 0)
return;
+ /* this special case is handled recursively */
+ if (nbytes > INT_MAX - offset) {
+ int overlap;
+
+ /* handle the append in two steps to prevent overflow */
+ overlap = 64 - offset;
+
+ md5_append(pms, data, overlap);
+ md5_append(pms, data + overlap, nbytes - overlap);
+ return;
+ }
+
/* Update the message length. */
pms->count[1] += nbytes >> 29;
pms->count[0] += nbits;
diff --git a/Modules/stropmodule.c b/Modules/stropmodule.c
index 8b00fed..bc60959 100644
--- a/Modules/stropmodule.c
+++ b/Modules/stropmodule.c
@@ -578,7 +578,7 @@ strop_expandtabs(PyObject *self, PyObject *args)
char* e;
char* p;
char* q;
- Py_ssize_t i, j;
+ Py_ssize_t i, j, old_j;
PyObject* out;
char* string;
Py_ssize_t stringlen;
@@ -595,12 +595,18 @@ strop_expandtabs(PyObject *self, PyObject *args)
}
/* First pass: determine size of output string */
- i = j = 0; /* j: current column; i: total of previous lines */
+ i = j = old_j = 0; /* j: current column; i: total of previous lines */
e = string + stringlen;
for (p = string; p < e; p++) {
- if (*p == '\t')
+ if (*p == '\t') {
j += tabsize - (j%tabsize);
- else {
+ if (old_j > j) {
+ PyErr_SetString(PyExc_OverflowError,
+ "new string is too long");
+ return NULL;
+ }
+ old_j = j;
+ } else {
j++;
if (*p == '\n') {
i += j;
@@ -609,6 +615,11 @@ strop_expandtabs(PyObject *self, PyObject *args)
}
}
+ if ((i + j) < 0) {
+ PyErr_SetString(PyExc_OverflowError, "new string is too long");
+ return NULL;
+ }
+
/* Second pass: create output string and fill it */
out = PyString_FromStringAndSize(NULL, i+j);
if (out == NULL)
diff --git a/Objects/bufferobject.c b/Objects/bufferobject.c
index 37d9bcb..3bd8c6b 100644
--- a/Objects/bufferobject.c
+++ b/Objects/bufferobject.c
@@ -207,7 +207,10 @@ PyBuffer_New(Py_ssize_t size)
"size must be zero or positive");
return NULL;
}
- /* XXX: check for overflow in multiply */
+ if (sizeof(*b) > PY_SSIZE_T_MAX - size) {
+ /* unlikely */
+ return PyErr_NoMemory();
+ }
/* Inline PyObject_New */
o = (PyObject *)PyObject_MALLOC(sizeof(*b) + size);
if ( o == NULL )
@@ -401,6 +404,8 @@ buffer_concat(PyBufferObject *self, PyObject *other)
if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 )
return NULL;
+ assert(count <= PY_SIZE_MAX - size);
+
ob = PyString_FromStringAndSize(NULL, size + count);
if ( ob == NULL )
return NULL;
diff --git a/Objects/listobject.c b/Objects/listobject.c
index e72f81f..16a2ce6 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -45,7 +45,16 @@ list_resize(PyListObject *self, Py_ssize_t newsize)
* system realloc().
* The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
*/
- new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6) + newsize;
+ new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6);
+
+ /* check for integer overflow */
+ if (new_allocated > PY_SIZE_MAX - newsize) {
+ PyErr_NoMemory();
+ return -1;
+ } else {
+ new_allocated += newsize;
+ }
+
if (newsize == 0)
new_allocated = 0;
items = self->ob_item;
@@ -118,8 +127,9 @@ PyList_New(Py_ssize_t size)
return NULL;
}
nbytes = size * sizeof(PyObject *);
- /* Check for overflow */
- if (nbytes / sizeof(PyObject *) != (size_t)size)
+ /* Check for overflow without an actual overflow,
+ * which can cause compiler to optimise out */
+ if (size > PY_SIZE_MAX / sizeof(PyObject *))
return PyErr_NoMemory();
if (numfree) {
numfree--;
@@ -1407,6 +1417,10 @@ merge_getmem(MergeState *ms, Py_ssize_t need)
* we don't care what's in the block.
*/
merge_freemem(ms);
+ if (need > PY_SSIZE_T_MAX / sizeof(PyObject*)) {
+ PyErr_NoMemory();
+ return -1;
+ }
ms->a = (PyObject **)PyMem_Malloc(need * sizeof(PyObject*));
if (ms->a) {
ms->alloced = need;
@@ -2589,6 +2603,8 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value)
step = -step;
}
+ assert(slicelength <= PY_SIZE_MAX / sizeof(PyObject*));
+
garbage = (PyObject**)
PyMem_MALLOC(slicelength*sizeof(PyObject*));
if (!garbage) {
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 2f2b35e..efbd566 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -526,9 +526,9 @@ new_arena(void)
numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
if (numarenas <= maxarenas)
return NULL; /* overflow */
- nbytes = numarenas * sizeof(*arenas);
- if (nbytes / sizeof(*arenas) != numarenas)
+ if (numarenas > PY_SIZE_MAX / sizeof(*arenas))
return NULL; /* overflow */
+ nbytes = numarenas * sizeof(*arenas);
arenaobj = (struct arena_object *)realloc(arenas, nbytes);
if (arenaobj == NULL)
return NULL;
diff --git a/Parser/node.c b/Parser/node.c
index d133a0d..f4c86cb 100644
--- a/Parser/node.c
+++ b/Parser/node.c
@@ -91,6 +91,9 @@ PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offs
if (current_capacity < 0 || required_capacity < 0)
return E_OVERFLOW;
if (current_capacity < required_capacity) {
+ if (required_capacity > PY_SIZE_MAX / sizeof(node)) {
+ return E_NOMEM;
+ }
n = n1->n_child;
n = (node *) PyObject_REALLOC(n,
required_capacity * sizeof(node));
diff --git a/Python/asdl.c b/Python/asdl.c
index 72329b9..1105d3a 100644
--- a/Python/asdl.c
+++ b/Python/asdl.c
@@ -5,8 +5,22 @@ asdl_seq *
asdl_seq_new(int size, PyArena *arena)
{
asdl_seq *seq = NULL;
- size_t n = sizeof(asdl_seq) +
- (size ? (sizeof(void *) * (size - 1)) : 0);
+ size_t n = (size ? (sizeof(void *) * (size - 1)) : 0);
+
+ /* check size is sane */
+ if (size < 0 || size == INT_MIN ||
+ (size && ((size - 1) > (PY_SIZE_MAX / sizeof(void *))))) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ /* check if size can be added safely */
+ if (n > PY_SIZE_MAX - sizeof(asdl_seq)) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ n += sizeof(asdl_seq);
seq = (asdl_seq *)PyArena_Malloc(arena, n);
if (!seq) {
@@ -22,8 +36,22 @@ asdl_int_seq *
asdl_int_seq_new(int size, PyArena *arena)
{
asdl_int_seq *seq = NULL;
- size_t n = sizeof(asdl_seq) +
- (size ? (sizeof(int) * (size - 1)) : 0);
+ size_t n = (size ? (sizeof(void *) * (size - 1)) : 0);
+
+ /* check size is sane */
+ if (size < 0 || size == INT_MIN ||
+ (size && ((size - 1) > (PY_SIZE_MAX / sizeof(void *))))) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ /* check if size can be added safely */
+ if (n > PY_SIZE_MAX - sizeof(asdl_seq)) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ n += sizeof(asdl_seq);
seq = (asdl_int_seq *)PyArena_Malloc(arena, n);
if (!seq) {
diff --git a/Python/ast.c b/Python/ast.c
index a6bb1b7..4d874af 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3200,6 +3200,9 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
buf = (char *)s;
u = NULL;
} else {
+ /* check for integer overflow */
+ if (len > PY_SIZE_MAX / 4)
+ return NULL;
/* "\XX" may become "\u005c\uHHLL" (12 bytes) */
u = PyString_FromStringAndSize((char *)NULL, len * 4);
if (u == NULL)
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index a2ebb4a..e18eb2a 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -2792,11 +2792,43 @@ filterstring(PyObject *func, PyObject *strobj)
PyString_AS_STRING(item)[0];
} else {
/* do we need more space? */
- Py_ssize_t need = j + reslen + len-i-1;
+ Py_ssize_t need = j;
+
+ /* calculate space requirements while checking for overflow */
+ if (need > PY_SSIZE_T_MAX - reslen) {
+ Py_DECREF(item);
+ goto Fail_1;
+ }
+
+ need += reslen;
+
+ if (need > PY_SSIZE_T_MAX - len) {
+ Py_DECREF(item);
+ goto Fail_1;
+ }
+
+ need += len;
+
+ if (need <= i) {
+ Py_DECREF(item);
+ goto Fail_1;
+ }
+
+ need = need - i - 1;
+
+ assert(need >= 0);
+ assert(outlen >= 0);
+
if (need > outlen) {
/* overallocate, to avoid reallocations */
- if (need<2*outlen)
+ if (outlen > PY_SSIZE_T_MAX / 2) {
+ Py_DECREF(item);
+ return NULL;
+ }
+
+ if (need<2*outlen) {
need = 2*outlen;
+ }
if (_PyString_Resize(&result, need)) {
Py_DECREF(item);
return NULL;
@@ -2888,11 +2920,31 @@ filterunicode(PyObject *func, PyObject *strobj)
else {
/* do we need more space? */
Py_ssize_t need = j + reslen + len - i - 1;
+
+ /* check that didnt overflow */
+ if ((j > PY_SSIZE_T_MAX - reslen) ||
+ ((j + reslen) > PY_SSIZE_T_MAX - len) ||
+ ((j + reslen + len) < i) ||
+ ((j + reslen + len - i) <= 0)) {
+ Py_DECREF(item);
+ return NULL;
+ }
+
+ assert(need >= 0);
+ assert(outlen >= 0);
+
if (need > outlen) {
/* overallocate,
to avoid reallocations */
- if (need < 2 * outlen)
- need = 2 * outlen;
+ if (need < 2 * outlen) {
+ if (outlen > PY_SSIZE_T_MAX / 2) {
+ Py_DECREF(item);
+ return NULL;
+ } else {
+ need = 2 * outlen;
+ }
+ }
+
if (PyUnicode_Resize(
&result, need) < 0) {
Py_DECREF(item);
diff --git a/Python/compile.c b/Python/compile.c
index c81218d..264fdcd 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -216,6 +216,10 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
return ident; /* Don't mangle if class is just underscores */
}
plen = strlen(p);
+
+ assert(1 <= PY_SSIZE_T_MAX - nlen);
+ assert(1 + nlen <= PY_SSIZE_T_MAX - plen);
+
ident = PyString_FromStringAndSize(NULL, 1 + nlen + plen);
if (!ident)
return 0;
@@ -621,6 +625,12 @@ compiler_next_instr(struct compiler *c, basicblock *b)
size_t oldsize, newsize;
oldsize = b->b_ialloc * sizeof(struct instr);
newsize = oldsize << 1;
+
+ if (oldsize > (PY_SIZE_MAX >> 1)) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
if (newsize == 0) {
PyErr_NoMemory();
return -1;
@@ -3478,6 +3488,10 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno)
a->a_lnotab = PyString_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
if (!a->a_lnotab)
return 0;
+ if (nblocks > PY_SIZE_MAX / sizeof(basicblock *)) {
+ PyErr_NoMemory();
+ return 0;
+ }
a->a_postorder = (basicblock **)PyObject_Malloc(
sizeof(basicblock *) * nblocks);
if (!a->a_postorder) {
@@ -3586,10 +3600,14 @@ assemble_lnotab(struct assembler *a, struct instr *i)
nbytes = a->a_lnotab_off + 2 * ncodes;
len = PyString_GET_SIZE(a->a_lnotab);
if (nbytes >= len) {
- if (len * 2 < nbytes)
+ if ((len <= INT_MAX / 2) && (len * 2 < nbytes))
len = nbytes;
- else
+ else if (len <= INT_MAX / 2)
len *= 2;
+ else {
+ PyErr_NoMemory();
+ return 0;
+ }
if (_PyString_Resize(&a->a_lnotab, len) < 0)
return 0;
}
@@ -3608,10 +3626,14 @@ assemble_lnotab(struct assembler *a, struct instr *i)
nbytes = a->a_lnotab_off + 2 * ncodes;
len = PyString_GET_SIZE(a->a_lnotab);
if (nbytes >= len) {
- if (len * 2 < nbytes)
+ if ((len <= INT_MAX / 2) && len * 2 < nbytes)
len = nbytes;
- else
+ else if (len <= INT_MAX / 2)
len *= 2;
+ else {
+ PyErr_NoMemory();
+ return 0;
+ }
if (_PyString_Resize(&a->a_lnotab, len) < 0)
return 0;
}
@@ -3670,6 +3692,8 @@ assemble_emit(struct assembler *a, struct instr *i)
if (i->i_lineno && !assemble_lnotab(a, i))
return 0;
if (a->a_offset + size >= len) {
+ if (len > PY_SSIZE_T_MAX / 2)
+ return 0;
if (_PyString_Resize(&a->a_bytecode, len * 2) < 0)
return 0;
}