From d254ca8813986ac72f39fdac90fdf8f63904c28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sun, 2 Mar 2008 20:32:57 +0000 Subject: Backport of r61180: Added checks for integer overflows, contributed by Google. Some are only available if asserts are left in the code, in cases where they can't be triggered from Python code. --- Include/pymem.h | 12 ++++++---- Include/pyport.h | 11 +++++++++ Misc/NEWS | 13 +++++++++++ Modules/_csv.c | 10 ++++++++ Modules/arraymodule.c | 34 ++++++++++++++++++++++++++- Modules/audioop.c | 56 ++++++++++++++++++++++++++++++++++---------- Modules/binascii.c | 40 ++++++++++++++++++++++++++++++-- Modules/cPickle.c | 16 +++++++++++++ Modules/cStringIO.c | 11 +++++++++ Modules/datetimemodule.c | 7 ++++++ Modules/rgbimgmodule.c | 34 +++++++++++++++++++++++---- Modules/stropmodule.c | 19 +++++++++++---- Objects/bufferobject.c | 6 +++++ Objects/listobject.c | 11 +++++++-- Parser/node.c | 3 +++ Python/bltinmodule.c | 60 ++++++++++++++++++++++++++++++++++++++++++++---- 16 files changed, 309 insertions(+), 34 deletions(-) diff --git a/Include/pymem.h b/Include/pymem.h index f8aef29..0e18f03 100644 --- a/Include/pymem.h +++ b/Include/pymem.h @@ -86,14 +86,18 @@ PyAPI_FUNC(void) PyMem_Free(void *); */ #define PyMem_New(type, n) \ - ( (type *) PyMem_Malloc((n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) #define PyMem_NEW(type, n) \ - ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) ) #define PyMem_Resize(p, type, n) \ - ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) ) #define PyMem_RESIZE(p, type, n) \ - ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) ) /* In order to avoid breaking old code mixing PyObject_{New, NEW} with PyMem_{Del, DEL} and PyMem_{Free, FREE}, the PyMem "release memory" diff --git a/Include/pyport.h b/Include/pyport.h index ffd46ac..0b1cc16 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -554,6 +554,17 @@ typedef struct fd_set { #error "LONG_BIT definition appears wrong for platform (bad gcc/glibc config?)." #endif +/* Largest possible value of size_t. + SIZE_MAX is part of C99, so it might be defined on some + platforms. If it is not defined, (size_t)-1 is a portable + definition for C89, due to the way signed->unsigned + conversion is defined. */ +#ifdef SIZE_MAX +#define PY_SIZE_MAX SIZE_MAX +#else +#define PY_SIZE_MAX ((size_t)-1) +#endif + #ifdef __cplusplus } #endif diff --git a/Misc/NEWS b/Misc/NEWS index a55dd64..f2749be 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -4,6 +4,19 @@ Python News (editors: check NEWS.help for information about editing NEWS using ReST.) +What's New in Python 2.3.7c1? +=========================== + +*Release date: 02-Mar-2008* + +Core and builtins +----------------- + +- Added checks for integer overflows, contributed by Google. Some are + only available if asserts are left in the code, in cases where they + can't be triggered from Python code. + + What's New in Python 2.3.6? =========================== diff --git a/Modules/_csv.c b/Modules/_csv.c index 077416c..080f3c4 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -470,6 +470,10 @@ parse_grow_buff(ReaderObj *self) self->field = PyMem_Malloc(self->field_size); } else { + if (self->field_size > INT_MAX / 2) { + PyErr_NoMemory(); + return 0; + } self->field_size *= 2; self->field = PyMem_Realloc(self->field, self->field_size); } @@ -1003,6 +1007,12 @@ join_append_data(WriterObj *self, char *field, int quote_empty, static int join_check_rec_size(WriterObj *self, int rec_len) { + + if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) { + PyErr_NoMemory(); + return 0; + } + if (rec_len > self->rec_size) { if (self->rec_size == 0) { self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 228c8f4..6671150 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -632,6 +632,9 @@ array_concat(arrayobject *a, PyObject *bb) PyErr_BadArgument(); return NULL; } + if (a->ob_size > INT_MAX - b->ob_size) { + return PyErr_NoMemory(); + } size = a->ob_size + b->ob_size; np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr); if (np == NULL) { @@ -654,6 +657,9 @@ array_repeat(arrayobject *a, int n) int nbytes; if (n < 0) n = 0; + if ((a->ob_size != 0) && (n > INT_MAX / a->ob_size)) { + return PyErr_NoMemory(); + } size = a->ob_size * n; np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr); if (np == NULL) @@ -775,6 +781,11 @@ array_do_extend(arrayobject *self, PyObject *bb) "can only extend with array of same kind"); return -1; } + if ((self->ob_size > INT_MAX - b->ob_size) || + ((self->ob_size + b->ob_size) > INT_MAX / self->ob_descr->itemsize)) { + PyErr_NoMemory(); + return -1; + } size = self->ob_size + b->ob_size; PyMem_RESIZE(self->ob_item, char, size*self->ob_descr->itemsize); if (self->ob_item == NULL) { @@ -809,6 +820,10 @@ array_inplace_repeat(arrayobject *self, int n) if (n < 0) n = 0; items = self->ob_item; + if ((self->ob_descr->itemsize != 0) && + (self->ob_size > INT_MAX / self->ob_descr->itemsize)) { + return PyErr_NoMemory(); + } size = self->ob_size * self->ob_descr->itemsize; if (n == 0) { PyMem_FREE(items); @@ -816,6 +831,9 @@ array_inplace_repeat(arrayobject *self, int n) self->ob_size = 0; } else { + if (size > INT_MAX / n) { + return PyErr_NoMemory(); + } PyMem_Resize(items, char, n * size); if (items == NULL) return PyErr_NoMemory(); @@ -1224,6 +1242,9 @@ array_fromlist(arrayobject *self, PyObject *list) if ((*self->ob_descr->setitem)(self, self->ob_size - n + i, v) != 0) { self->ob_size -= n; + if (itemsize && (self->ob_size > INT_MAX / itemsize)) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, char, self->ob_size * itemsize); self->ob_item = item; @@ -1282,6 +1303,10 @@ array_fromstring(arrayobject *self, PyObject *args) n = n / itemsize; if (n > 0) { char *item = self->ob_item; + if ((n > INT_MAX - self->ob_size) || + ((self->ob_size + n) > INT_MAX / itemsize)) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, char, (self->ob_size + n) * itemsize); if (item == NULL) { PyErr_NoMemory(); @@ -1306,8 +1331,12 @@ values,as if it had been read from a file using the fromfile() method)."); static PyObject * array_tostring(arrayobject *self, PyObject *unused) { - return PyString_FromStringAndSize(self->ob_item, + if (self->ob_size <= INT_MAX / self->ob_descr->itemsize) { + return PyString_FromStringAndSize(self->ob_item, self->ob_size * self->ob_descr->itemsize); + } else { + return PyErr_NoMemory(); + } } PyDoc_STRVAR(tostring_doc, @@ -1335,6 +1364,9 @@ array_fromunicode(arrayobject *self, PyObject *args) } if (n > 0) { Py_UNICODE *item = (Py_UNICODE *) self->ob_item; + if (self->ob_size > INT_MAX - n) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, Py_UNICODE, self->ob_size + n); if (item == NULL) { PyErr_NoMemory(); diff --git a/Modules/audioop.c b/Modules/audioop.c index 52824b8..801571d 100644 --- a/Modules/audioop.c +++ b/Modules/audioop.c @@ -674,7 +674,7 @@ static PyObject * audioop_tostereo(PyObject *self, PyObject *args) { signed char *cp, *ncp; - int len, size, val1, val2, val = 0; + int len, new_len, size, val1, val2, val = 0; double fac1, fac2, fval, maxval; PyObject *rv; int i; @@ -690,7 +690,14 @@ audioop_tostereo(PyObject *self, PyObject *args) return 0; } - rv = PyString_FromStringAndSize(NULL, len*2); + new_len = len*2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + + rv = PyString_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyString_AsString(rv); @@ -853,7 +860,7 @@ audioop_lin2lin(PyObject *self, PyObject *args) { signed char *cp; unsigned char *ncp; - int len, size, size2, val = 0; + int len, new_len, size, size2, val = 0; PyObject *rv; int i, j; @@ -867,7 +874,13 @@ audioop_lin2lin(PyObject *self, PyObject *args) return 0; } - rv = PyString_FromStringAndSize(NULL, (len/size)*size2); + new_len = (len/size)*size2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyString_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (unsigned char *)PyString_AsString(rv); @@ -903,6 +916,7 @@ audioop_ratecv(PyObject *self, PyObject *args) int chan, d, *prev_i, *cur_i, cur_o; PyObject *state, *samps, *str, *rv = NULL; int bytes_per_frame; + size_t alloc_size; weightA = 1; weightB = 0; @@ -944,8 +958,14 @@ audioop_ratecv(PyObject *self, PyObject *args) inrate /= d; outrate /= d; - prev_i = (int *) malloc(nchannels * sizeof(int)); - cur_i = (int *) malloc(nchannels * sizeof(int)); + alloc_size = sizeof(int) * (unsigned)nchannels; + if (alloc_size < nchannels) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + prev_i = (int *) malloc(alloc_size); + cur_i = (int *) malloc(alloc_size); if (prev_i == NULL || cur_i == NULL) { (void) PyErr_NoMemory(); goto exit; @@ -1114,7 +1134,7 @@ audioop_ulaw2lin(PyObject *self, PyObject *args) unsigned char *cp; unsigned char cval; signed char *ncp; - int len, size, val; + int len, new_len, size, val; PyObject *rv; int i; @@ -1127,12 +1147,18 @@ audioop_ulaw2lin(PyObject *self, PyObject *args) return 0; } - rv = PyString_FromStringAndSize(NULL, len*size); + new_len = len*size; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyString_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyString_AsString(rv); - for ( i=0; i < len*size; i += size ) { + for ( i=0; i < new_len; i += size ) { cval = *cp++; val = st_ulaw_to_linear(cval); @@ -1257,7 +1283,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) { signed char *cp; signed char *ncp; - int len, size, valpred, step, delta, index, sign, vpdiff; + int len, new_len, size, valpred, step, delta, index, sign, vpdiff; PyObject *rv, *str, *state; int i, inputbuffer = 0, bufferstep; @@ -1279,7 +1305,13 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) } else if ( !PyArg_Parse(state, "(ii)", &valpred, &index) ) return 0; - str = PyString_FromStringAndSize(NULL, len*size*2); + new_len = len*size*2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + str = PyString_FromStringAndSize(NULL, new_len); if ( str == 0 ) return 0; ncp = (signed char *)PyString_AsString(str); @@ -1287,7 +1319,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) step = stepsizeTable[index]; bufferstep = 0; - for ( i=0; i < len*size*2; i += size ) { + for ( i=0; i < new_len; i += size ) { /* Step 1 - get the delta value and compute next index */ if ( bufferstep ) { delta = inputbuffer & 0xf; diff --git a/Modules/binascii.c b/Modules/binascii.c index 9cc49f6..74720a7 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -194,6 +194,8 @@ binascii_a2b_uu(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) ) return NULL; + assert(ascii_len >= 0); + /* First byte: binary data length (in bytes) */ bin_len = (*ascii_data++ - ' ') & 077; ascii_len--; @@ -346,6 +348,11 @@ binascii_a2b_base64(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) ) return NULL; + assert(ascii_len >= 0); + + if (ascii_len > INT_MAX - 3) + return PyErr_NoMemory(); + bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ /* Allocate the buffer */ @@ -435,6 +442,9 @@ binascii_b2a_base64(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) ) return NULL; + + assert(bin_len >= 0); + if ( bin_len > BASE64_MAXBIN ) { PyErr_SetString(Error, "Too much data for base64 line"); return NULL; @@ -490,6 +500,11 @@ binascii_a2b_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) ) return NULL; + assert(len >= 0); + + if (len > INT_MAX - 2) + return PyErr_NoMemory(); + /* Allocate a string that is too big (fixed later) */ if ( (rv=PyString_FromStringAndSize(NULL, len)) == NULL ) return NULL; @@ -551,6 +566,11 @@ binascii_rlecode_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) ) return NULL; + assert(len >= 0); + + if (len > INT_MAX / 2 - 2) + return PyErr_NoMemory(); + /* Worst case: output is twice as big as input (fixed later) */ if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL ) return NULL; @@ -600,6 +620,11 @@ binascii_b2a_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) ) return NULL; + assert(len >= 0); + + if (len > INT_MAX / 2 - 2) + return PyErr_NoMemory(); + /* Allocate a buffer that is at least large enough */ if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL ) return NULL; @@ -638,9 +663,13 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) ) return NULL; + assert(in_len >= 0); + /* Empty string is a special case */ if ( in_len == 0 ) return Py_BuildValue("s", ""); + else if (in_len > INT_MAX / 2) + return PyErr_NoMemory(); /* Allocate a buffer of reasonable size. Resized when needed */ out_len = in_len*2; @@ -666,6 +695,7 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) #define OUTBYTE(b) \ do { \ if ( --out_len_left < 0 ) { \ + if ( out_len > INT_MAX / 2) return PyErr_NoMemory(); \ _PyString_Resize(&rv, 2*out_len); \ if ( rv == NULL ) return NULL; \ out_data = (unsigned char *)PyString_AsString(rv) \ @@ -734,7 +764,7 @@ binascii_crc_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) ) return NULL; - while(len--) { + while(len-- > 0) { crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++]; } @@ -878,7 +908,7 @@ binascii_crc32(PyObject *self, PyObject *args) /* only want the trailing 32 bits */ crc &= 0xFFFFFFFFUL; #endif - while (len--) + while (len-- > 0) crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8); /* Note: (crc >> 8) MUST zero fill on left */ @@ -908,6 +938,10 @@ binascii_hexlify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen)) return NULL; + assert(arglen >= 0); + if (arglen > INT_MAX / 2) + return PyErr_NoMemory(); + retval = PyString_FromStringAndSize(NULL, arglen*2); if (!retval) return NULL; @@ -965,6 +999,8 @@ binascii_unhexlify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen)) return NULL; + assert(arglen >= 0); + /* XXX What should we do about strings with an odd length? Should * we add an implicit leading zero, or a trailing zero? For now, * raise an exception. diff --git a/Modules/cPickle.c b/Modules/cPickle.c index c452dc1..9aa3d90 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -3409,6 +3409,14 @@ load_binstring(Unpicklerobject *self) if (self->read_func(self, &s, 4) < 0) return -1; l = calc_binint(s, 4); + if (l < 0) { + /* Corrupt or hostile pickle -- we never write one like + * this. + */ + PyErr_SetString(UnpicklingError, + "BINSTRING pickle has negative byte count"); + return -1; + } if (self->read_func(self, &s, l) < 0) return -1; @@ -3476,6 +3484,14 @@ load_binunicode(Unpicklerobject *self) if (self->read_func(self, &s, 4) < 0) return -1; l = calc_binint(s, 4); + if (l < 0) { + /* Corrupt or hostile pickle -- we never write one like + * this. + */ + PyErr_SetString(UnpicklingError, + "BINUNICODE pickle has negative byte count"); + return -1; + } if (self->read_func(self, &s, l) < 0) return -1; diff --git a/Modules/cStringIO.c b/Modules/cStringIO.c index ac84ab0..015854d 100644 --- a/Modules/cStringIO.c +++ b/Modules/cStringIO.c @@ -121,6 +121,7 @@ PyDoc_STRVAR(IO_getval__doc__, static PyObject * IO_cgetval(PyObject *self) { UNLESS (IO__opencheck(IOOOBJECT(self))) return NULL; + assert(IOOOBJECT(self)->pos >= 0); return PyString_FromStringAndSize(((IOobject*)self)->buf, ((IOobject*)self)->pos); } @@ -139,6 +140,7 @@ IO_getval(IOobject *self, PyObject *args) { } else s=self->string_size; + assert(self->pos >= 0); return PyString_FromStringAndSize(self->buf, s); } @@ -158,6 +160,8 @@ IO_cread(PyObject *self, char **output, int n) { int l; UNLESS (IO__opencheck(IOOOBJECT(self))) return -1; + assert(IOOOBJECT(self)->pos >= 0); + assert(IOOOBJECT(self)->string_size >= 0); l = ((IOobject*)self)->string_size - ((IOobject*)self)->pos; if (n < 0 || n > l) { n = l; @@ -197,6 +201,11 @@ IO_creadline(PyObject *self, char **output) { *output=((IOobject*)self)->buf + ((IOobject*)self)->pos; l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos; + + assert(IOOOBJECT(self)->pos <= INT_MAX - l); + assert(IOOOBJECT(self)->pos >= 0); + assert(IOOOBJECT(self)->string_size >= 0); + ((IOobject*)self)->pos += l; return l; } @@ -215,6 +224,7 @@ IO_readline(IOobject *self, PyObject *args) { n -= m; self->pos -= m; } + assert(IOOOBJECT(self)->pos >= 0); return PyString_FromStringAndSize(output, n); } @@ -274,6 +284,7 @@ IO_tell(IOobject *self, PyObject *unused) { UNLESS (IO__opencheck(self)) return NULL; + assert(self->pos >= 0); return PyInt_FromLong(self->pos); } diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index 80d54e1..6637791 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -1098,6 +1098,8 @@ format_utcoffset(char *buf, size_t buflen, const char *sep, char sign; int none; + assert(buflen >= 1); + offset = call_utcoffset(tzinfo, tzinfoarg, &none); if (offset == -1 && PyErr_Occurred()) return -1; @@ -1175,6 +1177,11 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, * a new format. Since computing the replacements for those codes * is expensive, don't unless they're actually used. */ + if (PyString_Size(format) > INT_MAX - 1) { + PyErr_NoMemory(); + goto Done; + } + totalnew = PyString_Size(format) + 1; /* realistic if no %z/%Z */ newfmt = PyString_FromStringAndSize(NULL, totalnew); if (newfmt == NULL) goto Done; diff --git a/Modules/rgbimgmodule.c b/Modules/rgbimgmodule.c index 904c64b..ba0808d 100644 --- a/Modules/rgbimgmodule.c +++ b/Modules/rgbimgmodule.c @@ -269,7 +269,7 @@ longimagedata(PyObject *self, PyObject *args) Py_Int32 *starttab = NULL, *lengthtab = NULL; FILE *inf = NULL; IMAGE image; - int y, z, tablen; + int y, z, tablen, new_size; int xsize, ysize, zsize; int bpp, rle, cur, badorder; int rlebuflen; @@ -301,9 +301,15 @@ longimagedata(PyObject *self, PyObject *args) zsize = image.zsize; if (rle) { tablen = ysize * zsize * sizeof(Py_Int32); + rlebuflen = (int) (1.05 * xsize +10); + if ((tablen / sizeof(Py_Int32)) != (ysize * zsize) || + rlebuflen < 0) { + PyErr_NoMemory(); + goto finally; + } + starttab = (Py_Int32 *)malloc(tablen); lengthtab = (Py_Int32 *)malloc(tablen); - rlebuflen = (int) (1.05 * xsize +10); rledat = (unsigned char *)malloc(rlebuflen); if (!starttab || !lengthtab || !rledat) { PyErr_NoMemory(); @@ -331,8 +337,14 @@ longimagedata(PyObject *self, PyObject *args) fseek(inf, 512 + 2 * tablen, SEEK_SET); cur = 512 + 2 * tablen; + new_size = xsize * ysize + TAGLEN; + if (new_size < 0 || (new_size * sizeof(Py_Int32)) < 0) { + PyErr_NoMemory(); + goto finally; + } + rv = PyString_FromStringAndSize((char *)NULL, - (xsize * ysize + TAGLEN) * sizeof(Py_Int32)); + new_size * sizeof(Py_Int32)); if (rv == NULL) goto finally; @@ -400,8 +412,14 @@ longimagedata(PyObject *self, PyObject *args) copybw((Py_Int32 *) base, xsize * ysize); } else { + new_size = xsize * ysize + TAGLEN; + if (new_size < 0 || (new_size * sizeof(Py_Int32)) < 0) { + PyErr_NoMemory(); + goto finally; + } + rv = PyString_FromStringAndSize((char *) 0, - (xsize*ysize+TAGLEN)*sizeof(Py_Int32)); + new_size*sizeof(Py_Int32)); if (rv == NULL) goto finally; @@ -581,10 +599,16 @@ longstoimage(PyObject *self, PyObject *args) return NULL; } tablen = ysize * zsize * sizeof(Py_Int32); + rlebuflen = (int) (1.05 * xsize + 10); + + if ((tablen / sizeof(Py_Int32)) != (ysize * zsize) || + rlebuflen < 0 || (xsize * sizeof(Py_Int32)) < 0) { + PyErr_NoMemory(); + goto finally; + } starttab = (Py_Int32 *)malloc(tablen); lengthtab = (Py_Int32 *)malloc(tablen); - rlebuflen = (int) (1.05 * xsize + 10); rlebuf = (unsigned char *)malloc(rlebuflen); lumbuf = (unsigned char *)malloc(xsize * sizeof(Py_Int32)); if (!starttab || !lengthtab || !rlebuf || !lumbuf) { diff --git a/Modules/stropmodule.c b/Modules/stropmodule.c index 8eb64a0..632172b 100644 --- a/Modules/stropmodule.c +++ b/Modules/stropmodule.c @@ -576,7 +576,7 @@ strop_expandtabs(PyObject *self, PyObject *args) char* e; char* p; char* q; - int i, j; + int i, j, old_j; PyObject* out; char* string; int stringlen; @@ -593,12 +593,18 @@ strop_expandtabs(PyObject *self, PyObject *args) } /* First pass: determine size of output string */ - i = j = 0; /* j: current column; i: total of previous lines */ + i = j = old_j = 0; /* j: current column; i: total of previous lines */ e = string + stringlen; for (p = string; p < e; p++) { - if (*p == '\t') + if (*p == '\t') { j += tabsize - (j%tabsize); - else { + if (old_j > j) { + PyErr_SetString(PyExc_OverflowError, + "new string is too long"); + return NULL; + } + old_j = j; + } else { j++; if (*p == '\n') { i += j; @@ -607,6 +613,11 @@ strop_expandtabs(PyObject *self, PyObject *args) } } + if ((i + j) < 0) { + PyErr_SetString(PyExc_OverflowError, "new string is too long"); + return NULL; + } + /* Second pass: create output string and fill it */ out = PyString_FromStringAndSize(NULL, i+j); if (out == NULL) diff --git a/Objects/bufferobject.c b/Objects/bufferobject.c index c0e3c80..6ae8d6f 100644 --- a/Objects/bufferobject.c +++ b/Objects/bufferobject.c @@ -138,6 +138,10 @@ PyBuffer_New(int size) "size must be zero or positive"); return NULL; } + if (sizeof(*b) > INT_MAX - size) { + /* unlikely */ + return PyErr_NoMemory(); + } /* Inline PyObject_New */ o = PyObject_MALLOC(sizeof(*b) + size); if ( o == NULL ) @@ -296,6 +300,8 @@ buffer_concat(PyBufferObject *self, PyObject *other) if ( (count = (*pb->bf_getreadbuffer)(other, 0, &p2)) < 0 ) return NULL; + assert(count <= PY_SIZE_MAX - self->b_size); + ob = PyString_FromStringAndSize(NULL, self->b_size + count); p1 = PyString_AS_STRING(ob); memcpy(p1, self->b_ptr, self->b_size); diff --git a/Objects/listobject.c b/Objects/listobject.c index 727c9e6..4d75560 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -62,8 +62,9 @@ PyList_New(int size) return NULL; } nbytes = size * sizeof(PyObject *); - /* Check for overflow */ - if (nbytes / sizeof(PyObject *) != (size_t)size) { + /* Check for overflow without an actual overflow, + * which can cause compiler to optimise out */ + if (size > PY_SIZE_MAX / sizeof(PyObject *)) { return PyErr_NoMemory(); } op = PyObject_GC_New(PyListObject, &PyList_Type); @@ -1235,6 +1236,10 @@ merge_getmem(MergeState *ms, int need) * we don't care what's in the block. */ merge_freemem(ms); + if (need > INT_MAX / sizeof(PyObject*)) { + PyErr_NoMemory(); + return -1; + } ms->a = (PyObject **)PyMem_Malloc(need * sizeof(PyObject*)); if (ms->a) { ms->alloced = need; @@ -2312,6 +2317,8 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) return 0; } + assert(slicelength <= PY_SIZE_MAX / sizeof(PyObject*)); + garbage = (PyObject**) PyMem_MALLOC(slicelength*sizeof(PyObject*)); diff --git a/Parser/node.c b/Parser/node.c index 75900ce..35f78d8 100644 --- a/Parser/node.c +++ b/Parser/node.c @@ -91,6 +91,9 @@ PyNode_AddChild(register node *n1, int type, char *str, int lineno) if (current_capacity < 0 || required_capacity < 0) return E_OVERFLOW; if (current_capacity < required_capacity) { + if (required_capacity > PY_SIZE_MAX / sizeof(node)) { + return E_NOMEM; + } n = n1->n_child; n = (node *) PyObject_REALLOC(n, required_capacity * sizeof(node)); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 98dae39..067b48c 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2278,11 +2278,43 @@ filterstring(PyObject *func, PyObject *strobj) PyString_AS_STRING(item)[0]; } else { /* do we need more space? */ - int need = j + reslen + len-i-1; + int need = j; + + /* calculate space requirements while checking for overflow */ + if (need > INT_MAX - reslen) { + Py_DECREF(item); + goto Fail_1; + } + + need += reslen; + + if (need > INT_MAX - len) { + Py_DECREF(item); + goto Fail_1; + } + + need += len; + + if (need <= i) { + Py_DECREF(item); + goto Fail_1; + } + + need = need - i - 1; + + assert(need >= 0); + assert(outlen >= 0); + if (need > outlen) { /* overallocate, to avoid reallocations */ - if (need<2*outlen) + if (outlen > INT_MAX / 2) { + Py_DECREF(item); + return NULL; + } + + if (need<2*outlen) { need = 2*outlen; + } if (_PyString_Resize(&result, need)) { Py_DECREF(item); return NULL; @@ -2373,10 +2405,30 @@ filterunicode(PyObject *func, PyObject *strobj) } else { /* do we need more space? */ int need = j + reslen + len-i-1; + + /* check that didnt overflow */ + if ((j > INT_MAX - reslen) || + ((j + reslen) > INT_MAX - len) || + ((j + reslen + len) < i) || + ((j + reslen + len - i) <= 0)) { + Py_DECREF(item); + return NULL; + } + + assert(need >= 0); + assert(outlen >= 0); + if (need > outlen) { /* overallocate, to avoid reallocations */ - if (need<2*outlen) - need = 2*outlen; + if (need < 2 * outlen) { + if (outlen > INT_MAX / 2) { + Py_DECREF(item); + return NULL; + } else { + need = 2 * outlen; + } + } + if (PyUnicode_Resize(&result, need)) { Py_DECREF(item); goto Fail_1; -- cgit v0.12