From 9d53457e599623fbad90833c3448835b42d7e7f9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Wed, 11 Jun 2008 07:41:16 +0000 Subject: Merge in release25-maint r60793: Added checks for integer overflows, contributed by Google. Some are only available if asserts are left in the code, in cases where they can't be triggered from Python code. --- Include/pymem.h | 12 ++++--- Include/pyport.h | 11 ++++++ Lib/test/test_array.py | 17 ++++++++++ Lib/test/test_struct.py | 8 +++++ Misc/NEWS | 5 +++ Modules/_csv.c | 10 ++++++ Modules/_struct.c | 6 ++++ Modules/arraymodule.c | 38 ++++++++++++++++++++- Modules/audioop.c | 68 +++++++++++++++++++++++++++++--------- Modules/binascii.c | 42 +++++++++++++++++++++-- Modules/cPickle.c | 16 +++++++++ Modules/cStringIO.c | 15 +++++++-- Modules/cjkcodecs/multibytecodec.c | 38 +++++++++++++++++---- Modules/datetimemodule.c | 7 ++++ Modules/md5.c | 13 ++++++++ Modules/stropmodule.c | 19 ++++++++--- Objects/bufferobject.c | 7 +++- Objects/listobject.c | 22 ++++++++++-- Objects/obmalloc.c | 4 +-- Parser/node.c | 3 ++ Python/asdl.c | 36 +++++++++++++++++--- Python/ast.c | 3 ++ Python/bltinmodule.c | 60 ++++++++++++++++++++++++++++++--- Python/compile.c | 32 +++++++++++++++--- 24 files changed, 438 insertions(+), 54 deletions(-) diff --git a/Include/pymem.h b/Include/pymem.h index 7f74f37..f9acb55 100644 --- a/Include/pymem.h +++ b/Include/pymem.h @@ -85,14 +85,18 @@ PyAPI_FUNC(void) PyMem_Free(void *); */ #define PyMem_New(type, n) \ - ( (type *) PyMem_Malloc((n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) #define PyMem_NEW(type, n) \ - ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) ) #define PyMem_Resize(p, type, n) \ - ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) ) #define PyMem_RESIZE(p, type, n) \ - ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) ) /* PyMem{Del,DEL} are left over from ancient days, and shouldn't be used * anymore. They're just confusing aliases for PyMem_{Free,FREE} now. diff --git a/Include/pyport.h b/Include/pyport.h index 7c684f7..15c8644 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -117,6 +117,17 @@ typedef Py_intptr_t Py_ssize_t; # error "Python needs a typedef for Py_ssize_t in pyport.h." #endif +/* Largest possible value of size_t. + SIZE_MAX is part of C99, so it might be defined on some + platforms. If it is not defined, (size_t)-1 is a portable + definition for C89, due to the way signed->unsigned + conversion is defined. */ +#ifdef SIZE_MAX +#define PY_SIZE_MAX SIZE_MAX +#else +#define PY_SIZE_MAX ((size_t)-1) +#endif + /* Largest positive value of type Py_ssize_t. */ #define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) /* Smallest negative value of type Py_ssize_t. */ diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index b11c9d6..34a8f79 100755 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -1009,6 +1009,23 @@ tests.append(FloatTest) class DoubleTest(FPTest): typecode = 'd' minitemsize = 8 + + def test_alloc_overflow(self): + a = array.array('d', [-1]*65536) + try: + a *= 65536 + except MemoryError: + pass + else: + self.fail("a *= 2**16 didn't raise MemoryError") + b = array.array('d', [ 2.71828183, 3.14159265, -1]) + try: + b * 1431655766 + except MemoryError: + pass + else: + self.fail("a * 1431655766 didn't raise MemoryError") + tests.append(DoubleTest) def test_main(verbose=None): diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index e3a4e21..7ee47bf 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -8,6 +8,7 @@ from test.test_support import TestFailed, verbose, run_unittest, catch_warning import sys ISBIGENDIAN = sys.byteorder == "big" +IS32BIT = sys.maxint == 0x7fffffff del sys try: @@ -568,6 +569,13 @@ class StructTest(unittest.TestCase): for c in '\x01\x7f\xff\x0f\xf0': self.assertTrue(struct.unpack('>?', c)[0]) + def test_crasher(self): + if IS32BIT: + self.assertRaises(MemoryError, struct.pack, "357913941c", "a") + else: + print "%s test_crasher skipped on 64bit build." + + def test_main(): run_unittest(StructTest) diff --git a/Misc/NEWS b/Misc/NEWS index 4b9f21c..f721122 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -40,6 +40,11 @@ Core and Builtins Exception (KeyboardInterrupt, and SystemExit) propagate instead of ignoring them. +- Added checks for integer overflows, contributed by Google. Some are + only available if asserts are left in the code, in cases where they + can't be triggered from Python code. + + Extension Modules ----------------- diff --git a/Modules/_csv.c b/Modules/_csv.c index c628927..a5787d3 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -559,6 +559,10 @@ parse_grow_buff(ReaderObj *self) self->field = PyMem_Malloc(self->field_size); } else { + if (self->field_size > INT_MAX / 2) { + PyErr_NoMemory(); + return 0; + } self->field_size *= 2; self->field = PyMem_Realloc(self->field, self->field_size); } @@ -1053,6 +1057,12 @@ join_append_data(WriterObj *self, char *field, int quote_empty, static int join_check_rec_size(WriterObj *self, int rec_len) { + + if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) { + PyErr_NoMemory(); + return 0; + } + if (rec_len > self->rec_size) { if (self->rec_size == 0) { self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; diff --git a/Modules/_struct.c b/Modules/_struct.c index e5fe211..078c3a5 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1385,6 +1385,12 @@ prepare_s(PyStructObject *self) } } + /* check for overflow */ + if ((len + 1) > (PY_SSIZE_T_MAX / sizeof(formatcode))) { + PyErr_NoMemory(); + return -1; + } + self->s_size = size; self->s_len = len; codes = PyMem_MALLOC((len + 1) * sizeof(formatcode)); diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 89ed27a..c505dad 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -652,6 +652,9 @@ array_concat(arrayobject *a, PyObject *bb) PyErr_BadArgument(); return NULL; } + if (Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { + return PyErr_NoMemory(); + } size = Py_SIZE(a) + Py_SIZE(b); np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr); if (np == NULL) { @@ -674,6 +677,9 @@ array_repeat(arrayobject *a, Py_ssize_t n) Py_ssize_t nbytes; if (n < 0) n = 0; + if ((Py_SIZE(a) != 0) && (n > PY_SSIZE_T_MAX / Py_SIZE(a))) { + return PyErr_NoMemory(); + } size = Py_SIZE(a) * n; np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr); if (np == NULL) @@ -818,6 +824,11 @@ array_do_extend(arrayobject *self, PyObject *bb) "can only extend with array of same kind"); return -1; } + if ((Py_SIZE(self) > PY_SSIZE_T_MAX - Py_SIZE(b)) || + ((Py_SIZE(self) + Py_SIZE(b)) > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) { + PyErr_NoMemory(); + return -1; + } size = Py_SIZE(self) + Py_SIZE(b); PyMem_RESIZE(self->ob_item, char, size*self->ob_descr->itemsize); if (self->ob_item == NULL) { @@ -859,6 +870,10 @@ array_inplace_repeat(arrayobject *self, Py_ssize_t n) if (n < 0) n = 0; items = self->ob_item; + if ((self->ob_descr->itemsize != 0) && + (Py_SIZE(self) > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) { + return PyErr_NoMemory(); + } size = Py_SIZE(self) * self->ob_descr->itemsize; if (n == 0) { PyMem_FREE(items); @@ -867,6 +882,9 @@ array_inplace_repeat(arrayobject *self, Py_ssize_t n) self->allocated = 0; } else { + if (size > PY_SSIZE_T_MAX / n) { + return PyErr_NoMemory(); + } PyMem_Resize(items, char, n * size); if (items == NULL) return PyErr_NoMemory(); @@ -1148,6 +1166,10 @@ array_reduce(arrayobject *array) Py_INCREF(dict); } if (Py_SIZE(array) > 0) { + if (array->ob_descr->itemsize + > PY_SSIZE_T_MAX / array->ob_size) { + return PyErr_NoMemory(); + } result = Py_BuildValue("O(cs#)O", Py_TYPE(array), array->ob_descr->typecode, @@ -1330,6 +1352,9 @@ array_fromlist(arrayobject *self, PyObject *list) if ((*self->ob_descr->setitem)(self, Py_SIZE(self) - n + i, v) != 0) { Py_SIZE(self) -= n; + if (itemsize && (self->ob_size > PY_SSIZE_T_MAX / itemsize)) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, char, Py_SIZE(self) * itemsize); self->ob_item = item; @@ -1389,6 +1414,10 @@ array_fromstring(arrayobject *self, PyObject *args) n = n / itemsize; if (n > 0) { char *item = self->ob_item; + if ((n > PY_SSIZE_T_MAX - Py_SIZE(self)) || + ((Py_SIZE(self) + n) > PY_SSIZE_T_MAX / itemsize)) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, char, (Py_SIZE(self) + n) * itemsize); if (item == NULL) { PyErr_NoMemory(); @@ -1414,8 +1443,12 @@ values,as if it had been read from a file using the fromfile() method)."); static PyObject * array_tostring(arrayobject *self, PyObject *unused) { - return PyString_FromStringAndSize(self->ob_item, + if (self->ob_size <= PY_SSIZE_T_MAX / self->ob_descr->itemsize) { + return PyString_FromStringAndSize(self->ob_item, Py_SIZE(self) * self->ob_descr->itemsize); + } else { + return PyErr_NoMemory(); + } } PyDoc_STRVAR(tostring_doc, @@ -1443,6 +1476,9 @@ array_fromunicode(arrayobject *self, PyObject *args) } if (n > 0) { Py_UNICODE *item = (Py_UNICODE *) self->ob_item; + if (Py_SIZE(self) > PY_SSIZE_T_MAX - n) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, Py_UNICODE, Py_SIZE(self) + n); if (item == NULL) { PyErr_NoMemory(); diff --git a/Modules/audioop.c b/Modules/audioop.c index ce00975..31e3fa4 100644 --- a/Modules/audioop.c +++ b/Modules/audioop.c @@ -829,7 +829,7 @@ static PyObject * audioop_tostereo(PyObject *self, PyObject *args) { signed char *cp, *ncp; - int len, size, val1, val2, val = 0; + int len, new_len, size, val1, val2, val = 0; double fac1, fac2, fval, maxval; PyObject *rv; int i; @@ -846,7 +846,14 @@ audioop_tostereo(PyObject *self, PyObject *args) return 0; } - rv = PyString_FromStringAndSize(NULL, len*2); + new_len = len*2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + + rv = PyString_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyString_AsString(rv); @@ -1009,7 +1016,7 @@ audioop_lin2lin(PyObject *self, PyObject *args) { signed char *cp; unsigned char *ncp; - int len, size, size2, val = 0; + int len, new_len, size, size2, val = 0; PyObject *rv; int i, j; @@ -1023,7 +1030,13 @@ audioop_lin2lin(PyObject *self, PyObject *args) return 0; } - rv = PyString_FromStringAndSize(NULL, (len/size)*size2); + new_len = (len/size)*size2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyString_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (unsigned char *)PyString_AsString(rv); @@ -1059,6 +1072,7 @@ audioop_ratecv(PyObject *self, PyObject *args) int chan, d, *prev_i, *cur_i, cur_o; PyObject *state, *samps, *str, *rv = NULL; int bytes_per_frame; + size_t alloc_size; weightA = 1; weightB = 0; @@ -1101,8 +1115,14 @@ audioop_ratecv(PyObject *self, PyObject *args) inrate /= d; outrate /= d; - prev_i = (int *) malloc(nchannels * sizeof(int)); - cur_i = (int *) malloc(nchannels * sizeof(int)); + alloc_size = sizeof(int) * (unsigned)nchannels; + if (alloc_size < nchannels) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + prev_i = (int *) malloc(alloc_size); + cur_i = (int *) malloc(alloc_size); if (prev_i == NULL || cur_i == NULL) { (void) PyErr_NoMemory(); goto exit; @@ -1276,7 +1296,7 @@ audioop_ulaw2lin(PyObject *self, PyObject *args) unsigned char *cp; unsigned char cval; signed char *ncp; - int len, size, val; + int len, new_len, size, val; PyObject *rv; int i; @@ -1289,12 +1309,18 @@ audioop_ulaw2lin(PyObject *self, PyObject *args) return 0; } - rv = PyString_FromStringAndSize(NULL, len*size); + new_len = len*size; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyString_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyString_AsString(rv); - for ( i=0; i < len*size; i += size ) { + for ( i=0; i < new_len; i += size ) { cval = *cp++; val = st_ulaw2linear16(cval); @@ -1344,7 +1370,7 @@ audioop_alaw2lin(PyObject *self, PyObject *args) unsigned char *cp; unsigned char cval; signed char *ncp; - int len, size, val; + int len, new_len, size, val; PyObject *rv; int i; @@ -1357,12 +1383,18 @@ audioop_alaw2lin(PyObject *self, PyObject *args) return 0; } - rv = PyString_FromStringAndSize(NULL, len*size); + new_len = len*size; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyString_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyString_AsString(rv); - for ( i=0; i < len*size; i += size ) { + for ( i=0; i < new_len; i += size ) { cval = *cp++; val = st_alaw2linear16(cval); @@ -1487,7 +1519,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) { signed char *cp; signed char *ncp; - int len, size, valpred, step, delta, index, sign, vpdiff; + int len, new_len, size, valpred, step, delta, index, sign, vpdiff; PyObject *rv, *str, *state; int i, inputbuffer = 0, bufferstep; @@ -1509,7 +1541,13 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) } else if ( !PyArg_ParseTuple(state, "ii", &valpred, &index) ) return 0; - str = PyString_FromStringAndSize(NULL, len*size*2); + new_len = len*size*2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + str = PyString_FromStringAndSize(NULL, new_len); if ( str == 0 ) return 0; ncp = (signed char *)PyString_AsString(str); @@ -1517,7 +1555,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) step = stepsizeTable[index]; bufferstep = 0; - for ( i=0; i < len*size*2; i += size ) { + for ( i=0; i < new_len; i += size ) { /* Step 1 - get the delta value and compute next index */ if ( bufferstep ) { delta = inputbuffer & 0xf; diff --git a/Modules/binascii.c b/Modules/binascii.c index c1fc675..bcbafcf 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -141,7 +141,7 @@ static char table_a2b_base64[] = { #define BASE64_PAD '=' /* Max binary chunk size; limited only by available memory */ -#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3) +#define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3) static unsigned char table_b2a_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -198,6 +198,8 @@ binascii_a2b_uu(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) ) return NULL; + assert(ascii_len >= 0); + /* First byte: binary data length (in bytes) */ bin_len = (*ascii_data++ - ' ') & 077; ascii_len--; @@ -351,6 +353,11 @@ binascii_a2b_base64(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) ) return NULL; + assert(ascii_len >= 0); + + if (ascii_len > PY_SSIZE_T_MAX - 3) + return PyErr_NoMemory(); + bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ /* Allocate the buffer */ @@ -440,6 +447,9 @@ binascii_b2a_base64(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) ) return NULL; + + assert(bin_len >= 0); + if ( bin_len > BASE64_MAXBIN ) { PyErr_SetString(Error, "Too much data for base64 line"); return NULL; @@ -495,6 +505,11 @@ binascii_a2b_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) ) return NULL; + assert(len >= 0); + + if (len > PY_SSIZE_T_MAX - 2) + return PyErr_NoMemory(); + /* Allocate a string that is too big (fixed later) Add two to the initial length to prevent interning which would preclude subsequent resizing. */ @@ -558,6 +573,11 @@ binascii_rlecode_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) ) return NULL; + assert(len >= 0); + + if (len > PY_SSIZE_T_MAX / 2 - 2) + return PyErr_NoMemory(); + /* Worst case: output is twice as big as input (fixed later) */ if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) return NULL; @@ -607,6 +627,11 @@ binascii_b2a_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) ) return NULL; + assert(len >= 0); + + if (len > PY_SSIZE_T_MAX / 2 - 2) + return PyErr_NoMemory(); + /* Allocate a buffer that is at least large enough */ if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) return NULL; @@ -645,9 +670,13 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) ) return NULL; + assert(in_len >= 0); + /* Empty string is a special case */ if ( in_len == 0 ) return PyString_FromString(""); + else if (in_len > PY_SSIZE_T_MAX / 2) + return PyErr_NoMemory(); /* Allocate a buffer of reasonable size. Resized when needed */ out_len = in_len*2; @@ -673,6 +702,7 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) #define OUTBYTE(b) \ do { \ if ( --out_len_left < 0 ) { \ + if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \ _PyString_Resize(&rv, 2*out_len); \ if ( rv == NULL ) return NULL; \ out_data = (unsigned char *)PyString_AsString(rv) \ @@ -741,7 +771,7 @@ binascii_crc_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) ) return NULL; - while(len--) { + while(len-- > 0) { crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++]; } @@ -901,7 +931,7 @@ binascii_crc32(PyObject *self, PyObject *args) return NULL; crc = ~ crc; - while (len--) + while (len-- > 0) crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8); /* Note: (crc >> 8) MUST zero fill on left */ @@ -923,6 +953,10 @@ binascii_hexlify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen)) return NULL; + assert(arglen >= 0); + if (arglen > PY_SSIZE_T_MAX / 2) + return PyErr_NoMemory(); + retval = PyString_FromStringAndSize(NULL, arglen*2); if (!retval) return NULL; @@ -980,6 +1014,8 @@ binascii_unhexlify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen)) return NULL; + assert(arglen >= 0); + /* XXX What should we do about strings with an odd length? Should * we add an implicit leading zero, or a trailing zero? For now, * raise an exception. diff --git a/Modules/cPickle.c b/Modules/cPickle.c index f130087..51fc226 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -3435,6 +3435,14 @@ load_binstring(Unpicklerobject *self) if (self->read_func(self, &s, 4) < 0) return -1; l = calc_binint(s, 4); + if (l < 0) { + /* Corrupt or hostile pickle -- we never write one like + * this. + */ + PyErr_SetString(UnpicklingError, + "BINSTRING pickle has negative byte count"); + return -1; + } if (self->read_func(self, &s, l) < 0) return -1; @@ -3502,6 +3510,14 @@ load_binunicode(Unpicklerobject *self) if (self->read_func(self, &s, 4) < 0) return -1; l = calc_binint(s, 4); + if (l < 0) { + /* Corrupt or hostile pickle -- we never write one like + * this. + */ + PyErr_SetString(UnpicklingError, + "BINUNICODE pickle has negative byte count"); + return -1; + } if (self->read_func(self, &s, l) < 0) return -1; diff --git a/Modules/cStringIO.c b/Modules/cStringIO.c index 139a4a8..237d8c2 100644 --- a/Modules/cStringIO.c +++ b/Modules/cStringIO.c @@ -119,6 +119,7 @@ PyDoc_STRVAR(IO_getval__doc__, static PyObject * IO_cgetval(PyObject *self) { if (!IO__opencheck(IOOOBJECT(self))) return NULL; + assert(IOOOBJECT(self)->pos >= 0); return PyString_FromStringAndSize(((IOobject*)self)->buf, ((IOobject*)self)->pos); } @@ -137,6 +138,7 @@ IO_getval(IOobject *self, PyObject *args) { } else s=self->string_size; + assert(self->pos >= 0); return PyString_FromStringAndSize(self->buf, s); } @@ -157,6 +159,8 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) { Py_ssize_t l; if (!IO__opencheck(IOOOBJECT(self))) return -1; + assert(IOOOBJECT(self)->pos >= 0); + assert(IOOOBJECT(self)->string_size >= 0); l = ((IOobject*)self)->string_size - ((IOobject*)self)->pos; if (n < 0 || n > l) { n = l; @@ -192,12 +196,17 @@ IO_creadline(PyObject *self, char **output) { for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos, s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size; n < s && *n != '\n'; n++); + if (n < s) n++; *output=((IOobject*)self)->buf + ((IOobject*)self)->pos; l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos; - assert(((IOobject*)self)->pos + l < INT_MAX); - ((IOobject*)self)->pos += (int)l; + + assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l); + assert(IOOOBJECT(self)->pos >= 0); + assert(IOOOBJECT(self)->string_size >= 0); + + ((IOobject*)self)->pos += l; return (int)l; } @@ -215,6 +224,7 @@ IO_readline(IOobject *self, PyObject *args) { n -= m; self->pos -= m; } + assert(IOOOBJECT(self)->pos >= 0); return PyString_FromStringAndSize(output, n); } @@ -277,6 +287,7 @@ IO_tell(IOobject *self, PyObject *unused) { if (!IO__opencheck(self)) return NULL; + assert(self->pos >= 0); return PyInt_FromSsize_t(self->pos); } diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index bbd4c1a..a1b0ca9 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -163,13 +163,17 @@ static PyGetSetDef codecctx_getsets[] = { static int expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) { - Py_ssize_t orgpos, orgsize; + Py_ssize_t orgpos, orgsize, incsize; orgpos = (Py_ssize_t)((char *)buf->outbuf - PyString_AS_STRING(buf->outobj)); orgsize = PyString_GET_SIZE(buf->outobj); - if (_PyString_Resize(&buf->outobj, orgsize + ( - esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) + incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); + + if (orgsize > PY_SSIZE_T_MAX - incsize) + return -1; + + if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1) return -1; buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos; @@ -473,6 +477,12 @@ multibytecodec_encode(MultibyteCodec *codec, buf.excobj = NULL; buf.inbuf = buf.inbuf_top = *data; buf.inbuf_end = buf.inbuf_top + datalen; + + if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { + PyErr_NoMemory(); + goto errorexit; + } + buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); if (buf.outobj == NULL) goto errorexit; @@ -735,6 +745,11 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, origpending = ctx->pendingsize; if (origpending > 0) { + if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { + PyErr_NoMemory(); + /* inbuf_tmp == NULL */ + goto errorexit; + } inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); if (inbuf_tmp == NULL) goto errorexit; @@ -797,9 +812,10 @@ decoder_append_pending(MultibyteStatefulDecoderContext *ctx, Py_ssize_t npendings; npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - if (npendings + ctx->pendingsize > MAXDECPENDING) { - PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); - return -1; + if (npendings + ctx->pendingsize > MAXDECPENDING || + npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { + PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); + return -1; } memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); ctx->pendingsize += npendings; @@ -1001,7 +1017,7 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, PyObject *args, PyObject *kwargs) { MultibyteDecodeBuffer buf; - char *data, *wdata; + char *data, *wdata = NULL; Py_ssize_t wsize, finalsize = 0, size, origpending; int final = 0; @@ -1017,6 +1033,10 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, wdata = data; } else { + if (size > PY_SSIZE_T_MAX - self->pendingsize) { + PyErr_NoMemory(); + goto errorexit; + } wsize = size + self->pendingsize; wdata = PyMem_Malloc(wsize); if (wdata == NULL) @@ -1235,6 +1255,10 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, PyObject *ctr; char *ctrdata; + if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { + PyErr_NoMemory(); + goto errorexit; + } rsize = PyString_GET_SIZE(cres) + self->pendingsize; ctr = PyString_FromStringAndSize(NULL, rsize); if (ctr == NULL) diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index 3443b42..fcbd2e9 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -1115,6 +1115,8 @@ format_utcoffset(char *buf, size_t buflen, const char *sep, char sign; int none; + assert(buflen >= 1); + offset = call_utcoffset(tzinfo, tzinfoarg, &none); if (offset == -1 && PyErr_Occurred()) return -1; @@ -1206,6 +1208,11 @@ wrap_strftime(PyObject *object, const char *format, size_t format_len, * a new format. Since computing the replacements for those codes * is expensive, don't unless they're actually used. */ + if (format_len > INT_MAX - 1) { + PyErr_NoMemory(); + goto Done; + } + totalnew = format_len + 1; /* realistic if no %z/%Z/%f */ newfmt = PyString_FromStringAndSize(NULL, totalnew); if (newfmt == NULL) goto Done; diff --git a/Modules/md5.c b/Modules/md5.c index c35d96c..0e1058f 100644 --- a/Modules/md5.c +++ b/Modules/md5.c @@ -53,6 +53,7 @@ #include "md5.h" #include +#include #undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */ #ifdef ARCH_IS_BIG_ENDIAN @@ -330,6 +331,18 @@ md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes) if (nbytes <= 0) return; + /* this special case is handled recursively */ + if (nbytes > INT_MAX - offset) { + int overlap; + + /* handle the append in two steps to prevent overflow */ + overlap = 64 - offset; + + md5_append(pms, data, overlap); + md5_append(pms, data + overlap, nbytes - overlap); + return; + } + /* Update the message length. */ pms->count[1] += nbytes >> 29; pms->count[0] += nbits; diff --git a/Modules/stropmodule.c b/Modules/stropmodule.c index 8b00fed..bc60959 100644 --- a/Modules/stropmodule.c +++ b/Modules/stropmodule.c @@ -578,7 +578,7 @@ strop_expandtabs(PyObject *self, PyObject *args) char* e; char* p; char* q; - Py_ssize_t i, j; + Py_ssize_t i, j, old_j; PyObject* out; char* string; Py_ssize_t stringlen; @@ -595,12 +595,18 @@ strop_expandtabs(PyObject *self, PyObject *args) } /* First pass: determine size of output string */ - i = j = 0; /* j: current column; i: total of previous lines */ + i = j = old_j = 0; /* j: current column; i: total of previous lines */ e = string + stringlen; for (p = string; p < e; p++) { - if (*p == '\t') + if (*p == '\t') { j += tabsize - (j%tabsize); - else { + if (old_j > j) { + PyErr_SetString(PyExc_OverflowError, + "new string is too long"); + return NULL; + } + old_j = j; + } else { j++; if (*p == '\n') { i += j; @@ -609,6 +615,11 @@ strop_expandtabs(PyObject *self, PyObject *args) } } + if ((i + j) < 0) { + PyErr_SetString(PyExc_OverflowError, "new string is too long"); + return NULL; + } + /* Second pass: create output string and fill it */ out = PyString_FromStringAndSize(NULL, i+j); if (out == NULL) diff --git a/Objects/bufferobject.c b/Objects/bufferobject.c index 37d9bcb..3bd8c6b 100644 --- a/Objects/bufferobject.c +++ b/Objects/bufferobject.c @@ -207,7 +207,10 @@ PyBuffer_New(Py_ssize_t size) "size must be zero or positive"); return NULL; } - /* XXX: check for overflow in multiply */ + if (sizeof(*b) > PY_SSIZE_T_MAX - size) { + /* unlikely */ + return PyErr_NoMemory(); + } /* Inline PyObject_New */ o = (PyObject *)PyObject_MALLOC(sizeof(*b) + size); if ( o == NULL ) @@ -401,6 +404,8 @@ buffer_concat(PyBufferObject *self, PyObject *other) if ( (count = (*pb->bf_getreadbuffer)(other, 0, &ptr2)) < 0 ) return NULL; + assert(count <= PY_SIZE_MAX - size); + ob = PyString_FromStringAndSize(NULL, size + count); if ( ob == NULL ) return NULL; diff --git a/Objects/listobject.c b/Objects/listobject.c index e72f81f..16a2ce6 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -45,7 +45,16 @@ list_resize(PyListObject *self, Py_ssize_t newsize) * system realloc(). * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... */ - new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6) + newsize; + new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6); + + /* check for integer overflow */ + if (new_allocated > PY_SIZE_MAX - newsize) { + PyErr_NoMemory(); + return -1; + } else { + new_allocated += newsize; + } + if (newsize == 0) new_allocated = 0; items = self->ob_item; @@ -118,8 +127,9 @@ PyList_New(Py_ssize_t size) return NULL; } nbytes = size * sizeof(PyObject *); - /* Check for overflow */ - if (nbytes / sizeof(PyObject *) != (size_t)size) + /* Check for overflow without an actual overflow, + * which can cause compiler to optimise out */ + if (size > PY_SIZE_MAX / sizeof(PyObject *)) return PyErr_NoMemory(); if (numfree) { numfree--; @@ -1407,6 +1417,10 @@ merge_getmem(MergeState *ms, Py_ssize_t need) * we don't care what's in the block. */ merge_freemem(ms); + if (need > PY_SSIZE_T_MAX / sizeof(PyObject*)) { + PyErr_NoMemory(); + return -1; + } ms->a = (PyObject **)PyMem_Malloc(need * sizeof(PyObject*)); if (ms->a) { ms->alloced = need; @@ -2589,6 +2603,8 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) step = -step; } + assert(slicelength <= PY_SIZE_MAX / sizeof(PyObject*)); + garbage = (PyObject**) PyMem_MALLOC(slicelength*sizeof(PyObject*)); if (!garbage) { diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2f2b35e..efbd566 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -526,9 +526,9 @@ new_arena(void) numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; if (numarenas <= maxarenas) return NULL; /* overflow */ - nbytes = numarenas * sizeof(*arenas); - if (nbytes / sizeof(*arenas) != numarenas) + if (numarenas > PY_SIZE_MAX / sizeof(*arenas)) return NULL; /* overflow */ + nbytes = numarenas * sizeof(*arenas); arenaobj = (struct arena_object *)realloc(arenas, nbytes); if (arenaobj == NULL) return NULL; diff --git a/Parser/node.c b/Parser/node.c index d133a0d..f4c86cb 100644 --- a/Parser/node.c +++ b/Parser/node.c @@ -91,6 +91,9 @@ PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offs if (current_capacity < 0 || required_capacity < 0) return E_OVERFLOW; if (current_capacity < required_capacity) { + if (required_capacity > PY_SIZE_MAX / sizeof(node)) { + return E_NOMEM; + } n = n1->n_child; n = (node *) PyObject_REALLOC(n, required_capacity * sizeof(node)); diff --git a/Python/asdl.c b/Python/asdl.c index 72329b9..1105d3a 100644 --- a/Python/asdl.c +++ b/Python/asdl.c @@ -5,8 +5,22 @@ asdl_seq * asdl_seq_new(int size, PyArena *arena) { asdl_seq *seq = NULL; - size_t n = sizeof(asdl_seq) + - (size ? (sizeof(void *) * (size - 1)) : 0); + size_t n = (size ? (sizeof(void *) * (size - 1)) : 0); + + /* check size is sane */ + if (size < 0 || size == INT_MIN || + (size && ((size - 1) > (PY_SIZE_MAX / sizeof(void *))))) { + PyErr_NoMemory(); + return NULL; + } + + /* check if size can be added safely */ + if (n > PY_SIZE_MAX - sizeof(asdl_seq)) { + PyErr_NoMemory(); + return NULL; + } + + n += sizeof(asdl_seq); seq = (asdl_seq *)PyArena_Malloc(arena, n); if (!seq) { @@ -22,8 +36,22 @@ asdl_int_seq * asdl_int_seq_new(int size, PyArena *arena) { asdl_int_seq *seq = NULL; - size_t n = sizeof(asdl_seq) + - (size ? (sizeof(int) * (size - 1)) : 0); + size_t n = (size ? (sizeof(void *) * (size - 1)) : 0); + + /* check size is sane */ + if (size < 0 || size == INT_MIN || + (size && ((size - 1) > (PY_SIZE_MAX / sizeof(void *))))) { + PyErr_NoMemory(); + return NULL; + } + + /* check if size can be added safely */ + if (n > PY_SIZE_MAX - sizeof(asdl_seq)) { + PyErr_NoMemory(); + return NULL; + } + + n += sizeof(asdl_seq); seq = (asdl_int_seq *)PyArena_Malloc(arena, n); if (!seq) { diff --git a/Python/ast.c b/Python/ast.c index a6bb1b7..4d874af 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -3200,6 +3200,9 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons buf = (char *)s; u = NULL; } else { + /* check for integer overflow */ + if (len > PY_SIZE_MAX / 4) + return NULL; /* "\XX" may become "\u005c\uHHLL" (12 bytes) */ u = PyString_FromStringAndSize((char *)NULL, len * 4); if (u == NULL) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index a2ebb4a..e18eb2a 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2792,11 +2792,43 @@ filterstring(PyObject *func, PyObject *strobj) PyString_AS_STRING(item)[0]; } else { /* do we need more space? */ - Py_ssize_t need = j + reslen + len-i-1; + Py_ssize_t need = j; + + /* calculate space requirements while checking for overflow */ + if (need > PY_SSIZE_T_MAX - reslen) { + Py_DECREF(item); + goto Fail_1; + } + + need += reslen; + + if (need > PY_SSIZE_T_MAX - len) { + Py_DECREF(item); + goto Fail_1; + } + + need += len; + + if (need <= i) { + Py_DECREF(item); + goto Fail_1; + } + + need = need - i - 1; + + assert(need >= 0); + assert(outlen >= 0); + if (need > outlen) { /* overallocate, to avoid reallocations */ - if (need<2*outlen) + if (outlen > PY_SSIZE_T_MAX / 2) { + Py_DECREF(item); + return NULL; + } + + if (need<2*outlen) { need = 2*outlen; + } if (_PyString_Resize(&result, need)) { Py_DECREF(item); return NULL; @@ -2888,11 +2920,31 @@ filterunicode(PyObject *func, PyObject *strobj) else { /* do we need more space? */ Py_ssize_t need = j + reslen + len - i - 1; + + /* check that didnt overflow */ + if ((j > PY_SSIZE_T_MAX - reslen) || + ((j + reslen) > PY_SSIZE_T_MAX - len) || + ((j + reslen + len) < i) || + ((j + reslen + len - i) <= 0)) { + Py_DECREF(item); + return NULL; + } + + assert(need >= 0); + assert(outlen >= 0); + if (need > outlen) { /* overallocate, to avoid reallocations */ - if (need < 2 * outlen) - need = 2 * outlen; + if (need < 2 * outlen) { + if (outlen > PY_SSIZE_T_MAX / 2) { + Py_DECREF(item); + return NULL; + } else { + need = 2 * outlen; + } + } + if (PyUnicode_Resize( &result, need) < 0) { Py_DECREF(item); diff --git a/Python/compile.c b/Python/compile.c index c81218d..264fdcd 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -216,6 +216,10 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident) return ident; /* Don't mangle if class is just underscores */ } plen = strlen(p); + + assert(1 <= PY_SSIZE_T_MAX - nlen); + assert(1 + nlen <= PY_SSIZE_T_MAX - plen); + ident = PyString_FromStringAndSize(NULL, 1 + nlen + plen); if (!ident) return 0; @@ -621,6 +625,12 @@ compiler_next_instr(struct compiler *c, basicblock *b) size_t oldsize, newsize; oldsize = b->b_ialloc * sizeof(struct instr); newsize = oldsize << 1; + + if (oldsize > (PY_SIZE_MAX >> 1)) { + PyErr_NoMemory(); + return -1; + } + if (newsize == 0) { PyErr_NoMemory(); return -1; @@ -3478,6 +3488,10 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno) a->a_lnotab = PyString_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); if (!a->a_lnotab) return 0; + if (nblocks > PY_SIZE_MAX / sizeof(basicblock *)) { + PyErr_NoMemory(); + return 0; + } a->a_postorder = (basicblock **)PyObject_Malloc( sizeof(basicblock *) * nblocks); if (!a->a_postorder) { @@ -3586,10 +3600,14 @@ assemble_lnotab(struct assembler *a, struct instr *i) nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { - if (len * 2 < nbytes) + if ((len <= INT_MAX / 2) && (len * 2 < nbytes)) len = nbytes; - else + else if (len <= INT_MAX / 2) len *= 2; + else { + PyErr_NoMemory(); + return 0; + } if (_PyString_Resize(&a->a_lnotab, len) < 0) return 0; } @@ -3608,10 +3626,14 @@ assemble_lnotab(struct assembler *a, struct instr *i) nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { - if (len * 2 < nbytes) + if ((len <= INT_MAX / 2) && len * 2 < nbytes) len = nbytes; - else + else if (len <= INT_MAX / 2) len *= 2; + else { + PyErr_NoMemory(); + return 0; + } if (_PyString_Resize(&a->a_lnotab, len) < 0) return 0; } @@ -3670,6 +3692,8 @@ assemble_emit(struct assembler *a, struct instr *i) if (i->i_lineno && !assemble_lnotab(a, i)) return 0; if (a->a_offset + size >= len) { + if (len > PY_SSIZE_T_MAX / 2) + return 0; if (_PyString_Resize(&a->a_bytecode, len * 2) < 0) return 0; } -- cgit v0.12