diff options
author | Amaury Forgeot d'Arc <amauryfa@gmail.com> | 2008-06-18 00:47:36 (GMT) |
---|---|---|
committer | Amaury Forgeot d'Arc <amauryfa@gmail.com> | 2008-06-18 00:47:36 (GMT) |
commit | 9c74b14fe9b6dddc9d41dd37f431f174350004d4 (patch) | |
tree | 3d30d794259f55f5021f5e8036447523d0e1eea3 | |
parent | 036aa5433e963f6576c5dfa02ace4ca0e2c642a2 (diff) | |
download | cpython-9c74b14fe9b6dddc9d41dd37f431f174350004d4.zip cpython-9c74b14fe9b6dddc9d41dd37f431f174350004d4.tar.gz cpython-9c74b14fe9b6dddc9d41dd37f431f174350004d4.tar.bz2 |
Merged revisions 64114 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r64114 | gregory.p.smith | 2008-06-11 09:41:16 +0200 (mer., 11 juin 2008) | 6 lines
Merge in release25-maint r60793:
Added checks for integer overflows, contributed by Google. Some are
only available if asserts are left in the code, in cases where they
can't be triggered from Python code.
........
-rw-r--r-- | Include/pymem.h | 12 | ||||
-rw-r--r-- | Include/pyport.h | 11 | ||||
-rwxr-xr-x | Lib/test/test_array.py | 17 | ||||
-rw-r--r-- | Modules/_csv.c | 10 | ||||
-rw-r--r-- | Modules/arraymodule.c | 35 | ||||
-rw-r--r-- | Modules/audioop.c | 68 | ||||
-rw-r--r-- | Modules/binascii.c | 40 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 38 | ||||
-rw-r--r-- | Modules/datetimemodule.c | 7 | ||||
-rw-r--r-- | Objects/listobject.c | 22 | ||||
-rw-r--r-- | Objects/memoryobject.c | 5 | ||||
-rw-r--r-- | Objects/obmalloc.c | 4 | ||||
-rw-r--r-- | Parser/node.c | 3 | ||||
-rw-r--r-- | Python/asdl.c | 36 | ||||
-rw-r--r-- | Python/ast.c | 3 | ||||
-rw-r--r-- | Python/compile.c | 32 |
16 files changed, 299 insertions, 44 deletions
diff --git a/Include/pymem.h b/Include/pymem.h index 7f74f37..f9acb55 100644 --- a/Include/pymem.h +++ b/Include/pymem.h @@ -85,14 +85,18 @@ PyAPI_FUNC(void) PyMem_Free(void *); */ #define PyMem_New(type, n) \ - ( (type *) PyMem_Malloc((n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) #define PyMem_NEW(type, n) \ - ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) ) #define PyMem_Resize(p, type, n) \ - ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (p) = (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) ) #define PyMem_RESIZE(p, type, n) \ - ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) + ( assert((n) <= PY_SIZE_MAX / sizeof(type)) , \ + ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) ) /* PyMem{Del,DEL} are left over from ancient days, and shouldn't be used * anymore. They're just confusing aliases for PyMem_{Free,FREE} now. diff --git a/Include/pyport.h b/Include/pyport.h index d6fcf56..3991bc5 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -106,6 +106,17 @@ typedef Py_intptr_t Py_ssize_t; # error "Python needs a typedef for Py_ssize_t in pyport.h." #endif +/* Largest possible value of size_t. + SIZE_MAX is part of C99, so it might be defined on some + platforms. If it is not defined, (size_t)-1 is a portable + definition for C89, due to the way signed->unsigned + conversion is defined. */ +#ifdef SIZE_MAX +#define PY_SIZE_MAX SIZE_MAX +#else +#define PY_SIZE_MAX ((size_t)-1) +#endif + /* Largest positive value of type Py_ssize_t. */ #define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) /* Smallest negative value of type Py_ssize_t. */ diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index 3f8b683..5b6b9f2 100755 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -962,6 +962,23 @@ tests.append(FloatTest) class DoubleTest(FPTest): typecode = 'd' minitemsize = 8 + + def test_alloc_overflow(self): + a = array.array('d', [-1]*65536) + try: + a *= 65536 + except MemoryError: + pass + else: + self.fail("a *= 2**16 didn't raise MemoryError") + b = array.array('d', [ 2.71828183, 3.14159265, -1]) + try: + b * 1431655766 + except MemoryError: + pass + else: + self.fail("a * 1431655766 didn't raise MemoryError") + tests.append(DoubleTest) def test_main(verbose=None): diff --git a/Modules/_csv.c b/Modules/_csv.c index c654712..430ccdc 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -533,6 +533,10 @@ parse_grow_buff(ReaderObj *self) self->field = PyMem_New(Py_UNICODE, self->field_size); } else { + if (self->field_size > INT_MAX / 2) { + PyErr_NoMemory(); + return 0; + } self->field_size *= 2; self->field = PyMem_Resize(self->field, Py_UNICODE, self->field_size); @@ -1038,6 +1042,12 @@ join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty, static int join_check_rec_size(WriterObj *self, int rec_len) { + + if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) { + PyErr_NoMemory(); + return 0; + } + if (rec_len > self->rec_size) { if (self->rec_size == 0) { self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 3e26369..d2ae367 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -642,6 +642,9 @@ array_concat(arrayobject *a, PyObject *bb) PyErr_BadArgument(); return NULL; } + if (Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { + return PyErr_NoMemory(); + } size = Py_SIZE(a) + Py_SIZE(b); np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr); if (np == NULL) { @@ -664,6 +667,9 @@ array_repeat(arrayobject *a, Py_ssize_t n) Py_ssize_t nbytes; if (n < 0) n = 0; + if ((Py_SIZE(a) != 0) && (n > PY_SSIZE_T_MAX / Py_SIZE(a))) { + return PyErr_NoMemory(); + } size = Py_SIZE(a) * n; np = (arrayobject *) newarrayobject(&Arraytype, size, a->ob_descr); if (np == NULL) @@ -853,6 +859,10 @@ array_inplace_repeat(arrayobject *self, Py_ssize_t n) if (n < 0) n = 0; items = self->ob_item; + if ((self->ob_descr->itemsize != 0) && + (Py_SIZE(self) > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) { + return PyErr_NoMemory(); + } size = Py_SIZE(self) * self->ob_descr->itemsize; if (n == 0) { PyMem_FREE(items); @@ -861,6 +871,9 @@ array_inplace_repeat(arrayobject *self, Py_ssize_t n) self->allocated = 0; } else { + if (size > PY_SSIZE_T_MAX / n) { + return PyErr_NoMemory(); + } PyMem_Resize(items, char, n * size); if (items == NULL) return PyErr_NoMemory(); @@ -1142,6 +1155,10 @@ array_reduce(arrayobject *array) Py_INCREF(dict); } if (Py_SIZE(array) > 0) { + if (array->ob_descr->itemsize + > PY_SSIZE_T_MAX / Py_SIZE(array)) { + return PyErr_NoMemory(); + } result = Py_BuildValue("O(cy#)O", Py_TYPE(array), array->ob_descr->typecode, @@ -1315,6 +1332,9 @@ array_fromlist(arrayobject *self, PyObject *list) if ((*self->ob_descr->setitem)(self, Py_SIZE(self) - n + i, v) != 0) { Py_SIZE(self) -= n; + if (itemsize && (Py_SIZE(self) > PY_SSIZE_T_MAX / itemsize)) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, char, Py_SIZE(self) * itemsize); self->ob_item = item; @@ -1373,6 +1393,10 @@ array_fromstring(arrayobject *self, PyObject *args) n = n / itemsize; if (n > 0) { char *item = self->ob_item; + if ((n > PY_SSIZE_T_MAX - Py_SIZE(self)) || + ((Py_SIZE(self) + n) > PY_SSIZE_T_MAX / itemsize)) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, char, (Py_SIZE(self) + n) * itemsize); if (item == NULL) { PyErr_NoMemory(); @@ -1398,8 +1422,12 @@ values, as if it had been read from a file using the fromfile() method)."); static PyObject * array_tostring(arrayobject *self, PyObject *unused) { - return PyBytes_FromStringAndSize(self->ob_item, - Py_SIZE(self) * self->ob_descr->itemsize); + if (Py_SIZE(self) <= PY_SSIZE_T_MAX / self->ob_descr->itemsize) { + return PyBytes_FromStringAndSize(self->ob_item, + Py_SIZE(self) * self->ob_descr->itemsize); + } else { + return PyErr_NoMemory(); + } } PyDoc_STRVAR(tostring_doc, @@ -1428,6 +1456,9 @@ array_fromunicode(arrayobject *self, PyObject *args) } if (n > 0) { Py_UNICODE *item = (Py_UNICODE *) self->ob_item; + if (Py_SIZE(self) > PY_SSIZE_T_MAX - n) { + return PyErr_NoMemory(); + } PyMem_RESIZE(item, Py_UNICODE, Py_SIZE(self) + n); if (item == NULL) { PyErr_NoMemory(); diff --git a/Modules/audioop.c b/Modules/audioop.c index 57f25c6..c660501 100644 --- a/Modules/audioop.c +++ b/Modules/audioop.c @@ -829,7 +829,7 @@ static PyObject * audioop_tostereo(PyObject *self, PyObject *args) { signed char *cp, *ncp; - int len, size, val1, val2, val = 0; + int len, new_len, size, val1, val2, val = 0; double fac1, fac2, fval, maxval; PyObject *rv; int i; @@ -846,7 +846,14 @@ audioop_tostereo(PyObject *self, PyObject *args) return 0; } - rv = PyBytes_FromStringAndSize(NULL, len*2); + new_len = len*2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + + rv = PyBytes_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyBytes_AsString(rv); @@ -1009,7 +1016,7 @@ audioop_lin2lin(PyObject *self, PyObject *args) { signed char *cp; unsigned char *ncp; - int len, size, size2, val = 0; + int len, new_len, size, size2, val = 0; PyObject *rv; int i, j; @@ -1023,7 +1030,13 @@ audioop_lin2lin(PyObject *self, PyObject *args) return 0; } - rv = PyBytes_FromStringAndSize(NULL, (len/size)*size2); + new_len = (len/size)*size2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyBytes_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (unsigned char *)PyBytes_AsString(rv); @@ -1059,6 +1072,7 @@ audioop_ratecv(PyObject *self, PyObject *args) int chan, d, *prev_i, *cur_i, cur_o; PyObject *state, *samps, *str, *rv = NULL; int bytes_per_frame; + size_t alloc_size; weightA = 1; weightB = 0; @@ -1101,8 +1115,14 @@ audioop_ratecv(PyObject *self, PyObject *args) inrate /= d; outrate /= d; - prev_i = (int *) malloc(nchannels * sizeof(int)); - cur_i = (int *) malloc(nchannels * sizeof(int)); + alloc_size = sizeof(int) * (unsigned)nchannels; + if (alloc_size < nchannels) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + prev_i = (int *) malloc(alloc_size); + cur_i = (int *) malloc(alloc_size); if (prev_i == NULL || cur_i == NULL) { (void) PyErr_NoMemory(); goto exit; @@ -1275,7 +1295,7 @@ audioop_ulaw2lin(PyObject *self, PyObject *args) unsigned char *cp; unsigned char cval; signed char *ncp; - int len, size, val; + int len, new_len, size, val; PyObject *rv; int i; @@ -1288,12 +1308,18 @@ audioop_ulaw2lin(PyObject *self, PyObject *args) return 0; } - rv = PyBytes_FromStringAndSize(NULL, len*size); + new_len = len*size; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyBytes_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyBytes_AsString(rv); - for ( i=0; i < len*size; i += size ) { + for ( i=0; i < new_len; i += size ) { cval = *cp++; val = st_ulaw2linear16(cval); @@ -1343,7 +1369,7 @@ audioop_alaw2lin(PyObject *self, PyObject *args) unsigned char *cp; unsigned char cval; signed char *ncp; - int len, size, val; + int len, new_len, size, val; PyObject *rv; int i; @@ -1356,12 +1382,18 @@ audioop_alaw2lin(PyObject *self, PyObject *args) return 0; } - rv = PyBytes_FromStringAndSize(NULL, len*size); + new_len = len*size; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + rv = PyBytes_FromStringAndSize(NULL, new_len); if ( rv == 0 ) return 0; ncp = (signed char *)PyBytes_AsString(rv); - for ( i=0; i < len*size; i += size ) { + for ( i=0; i < new_len; i += size ) { cval = *cp++; val = st_alaw2linear16(cval); @@ -1486,7 +1518,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) { signed char *cp; signed char *ncp; - int len, size, valpred, step, delta, index, sign, vpdiff; + int len, new_len, size, valpred, step, delta, index, sign, vpdiff; PyObject *rv, *str, *state; int i, inputbuffer = 0, bufferstep; @@ -1508,7 +1540,13 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) } else if ( !PyArg_ParseTuple(state, "ii", &valpred, &index) ) return 0; - str = PyBytes_FromStringAndSize(NULL, len*size*2); + new_len = len*size*2; + if (new_len < 0) { + PyErr_SetString(PyExc_MemoryError, + "not enough memory for output buffer"); + return 0; + } + str = PyBytes_FromStringAndSize(NULL, new_len); if ( str == 0 ) return 0; ncp = (signed char *)PyBytes_AsString(str); @@ -1516,7 +1554,7 @@ audioop_adpcm2lin(PyObject *self, PyObject *args) step = stepsizeTable[index]; bufferstep = 0; - for ( i=0; i < len*size*2; i += size ) { + for ( i=0; i < new_len; i += size ) { /* Step 1 - get the delta value and compute next index */ if ( bufferstep ) { delta = inputbuffer & 0xf; diff --git a/Modules/binascii.c b/Modules/binascii.c index d3e8a51..ea34bcd 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -198,6 +198,8 @@ binascii_a2b_uu(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) ) return NULL; + assert(ascii_len >= 0); + /* First byte: binary data length (in bytes) */ bin_len = (*ascii_data++ - ' ') & 077; ascii_len--; @@ -355,6 +357,11 @@ binascii_a2b_base64(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) ) return NULL; + assert(ascii_len >= 0); + + if (ascii_len > PY_SSIZE_T_MAX - 3) + return PyErr_NoMemory(); + bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ /* Allocate the buffer */ @@ -448,6 +455,9 @@ binascii_b2a_base64(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) ) return NULL; + + assert(bin_len >= 0); + if ( bin_len > BASE64_MAXBIN ) { PyErr_SetString(Error, "Too much data for base64 line"); return NULL; @@ -507,6 +517,11 @@ binascii_a2b_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) ) return NULL; + assert(len >= 0); + + if (len > PY_SSIZE_T_MAX - 2) + return PyErr_NoMemory(); + /* Allocate a string that is too big (fixed later) Add two to the initial length to prevent interning which would preclude subsequent resizing. */ @@ -574,6 +589,11 @@ binascii_rlecode_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) ) return NULL; + assert(len >= 0); + + if (len > PY_SSIZE_T_MAX / 2 - 2) + return PyErr_NoMemory(); + /* Worst case: output is twice as big as input (fixed later) */ if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) return NULL; @@ -627,6 +647,11 @@ binascii_b2a_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) ) return NULL; + assert(len >= 0); + + if (len > PY_SSIZE_T_MAX / 2 - 2) + return PyErr_NoMemory(); + /* Allocate a buffer that is at least large enough */ if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) return NULL; @@ -669,9 +694,13 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) ) return NULL; + assert(in_len >= 0); + /* Empty string is a special case */ if ( in_len == 0 ) return PyBytes_FromStringAndSize("", 0); + else if (in_len > PY_SSIZE_T_MAX / 2) + return PyErr_NoMemory(); /* Allocate a buffer of reasonable size. Resized when needed */ out_len = in_len*2; @@ -697,6 +726,7 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) #define OUTBYTE(b) \ do { \ if ( --out_len_left < 0 ) { \ + if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \ if (_PyBytes_Resize(&rv, 2*out_len) < 0) \ { Py_DECREF(rv); return NULL; } \ out_data = (unsigned char *)PyBytes_AS_STRING(rv) \ @@ -769,7 +799,7 @@ binascii_crc_hqx(PyObject *self, PyObject *args) if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) ) return NULL; - while(len--) { + while(len-- > 0) { crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++]; } @@ -925,7 +955,7 @@ binascii_crc32(PyObject *self, PyObject *args) return NULL; crc = ~ crc; - while (len--) { + while (len-- > 0) { crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8); /* Note: (crc >> 8) MUST zero fill on left */ } @@ -948,6 +978,10 @@ binascii_hexlify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen)) return NULL; + assert(arglen >= 0); + if (arglen > PY_SSIZE_T_MAX / 2) + return PyErr_NoMemory(); + retval = PyBytes_FromStringAndSize(NULL, arglen*2); if (!retval) return NULL; @@ -999,6 +1033,8 @@ binascii_unhexlify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen)) return NULL; + assert(arglen >= 0); + /* XXX What should we do about strings with an odd length? Should * we add an implicit leading zero, or a trailing zero? For now, * raise an exception. diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 546f4e2..d6bafe7 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -172,13 +172,17 @@ static PyGetSetDef codecctx_getsets[] = { static int expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) { - Py_ssize_t orgpos, orgsize; + Py_ssize_t orgpos, orgsize, incsize; orgpos = (Py_ssize_t)((char *)buf->outbuf - PyBytes_AS_STRING(buf->outobj)); orgsize = PyBytes_GET_SIZE(buf->outobj); - if (_PyBytes_Resize(&buf->outobj, orgsize + ( - esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) + incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); + + if (orgsize > PY_SSIZE_T_MAX - incsize) + return -1; + + if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1) return -1; buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos; @@ -481,6 +485,12 @@ multibytecodec_encode(MultibyteCodec *codec, buf.excobj = NULL; buf.inbuf = buf.inbuf_top = *data; buf.inbuf_end = buf.inbuf_top + datalen; + + if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { + PyErr_NoMemory(); + goto errorexit; + } + buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16); if (buf.outobj == NULL) goto errorexit; @@ -743,6 +753,11 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, origpending = ctx->pendingsize; if (origpending > 0) { + if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { + PyErr_NoMemory(); + /* inbuf_tmp == NULL */ + goto errorexit; + } inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); if (inbuf_tmp == NULL) goto errorexit; @@ -805,9 +820,10 @@ decoder_append_pending(MultibyteStatefulDecoderContext *ctx, Py_ssize_t npendings; npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - if (npendings + ctx->pendingsize > MAXDECPENDING) { - PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); - return -1; + if (npendings + ctx->pendingsize > MAXDECPENDING || + npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { + PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); + return -1; } memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); ctx->pendingsize += npendings; @@ -1009,7 +1025,7 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, PyObject *args, PyObject *kwargs) { MultibyteDecodeBuffer buf; - char *data, *wdata; + char *data, *wdata = NULL; Py_ssize_t wsize, finalsize = 0, size, origpending; int final = 0; @@ -1025,6 +1041,10 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, wdata = data; } else { + if (size > PY_SSIZE_T_MAX - self->pendingsize) { + PyErr_NoMemory(); + goto errorexit; + } wsize = size + self->pendingsize; wdata = PyMem_Malloc(wsize); if (wdata == NULL) @@ -1244,6 +1264,10 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, PyObject *ctr; char *ctrdata; + if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { + PyErr_NoMemory(); + goto errorexit; + } rsize = PyBytes_GET_SIZE(cres) + self->pendingsize; ctr = PyBytes_FromStringAndSize(NULL, rsize); if (ctr == NULL) diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index 6c2d5a0..4dd4fe31 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -1111,6 +1111,8 @@ format_utcoffset(char *buf, size_t buflen, const char *sep, char sign; int none; + assert(buflen >= 1); + offset = call_utcoffset(tzinfo, tzinfoarg, &none); if (offset == -1 && PyErr_Occurred()) return -1; @@ -1250,6 +1252,11 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, * a new format. Since computing the replacements for those codes * is expensive, don't unless they're actually used. */ + if (flen > INT_MAX - 1) { + PyErr_NoMemory(); + goto Done; + } + totalnew = flen + 1; /* realistic if no %z/%Z */ newfmt = PyBytes_FromStringAndSize(NULL, totalnew); if (newfmt == NULL) goto Done; diff --git a/Objects/listobject.c b/Objects/listobject.c index d5f3a19..255f087 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -45,7 +45,16 @@ list_resize(PyListObject *self, Py_ssize_t newsize) * system realloc(). * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... */ - new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6) + newsize; + new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6); + + /* check for integer overflow */ + if (new_allocated > PY_SIZE_MAX - newsize) { + PyErr_NoMemory(); + return -1; + } else { + new_allocated += newsize; + } + if (newsize == 0) new_allocated = 0; items = self->ob_item; @@ -118,8 +127,9 @@ PyList_New(Py_ssize_t size) return NULL; } nbytes = size * sizeof(PyObject *); - /* Check for overflow */ - if (nbytes / sizeof(PyObject *) != (size_t)size) + /* Check for overflow without an actual overflow, + * which can cause compiler to optimise out */ + if (size > PY_SIZE_MAX / sizeof(PyObject *)) return PyErr_NoMemory(); if (numfree) { numfree--; @@ -1323,6 +1333,10 @@ merge_getmem(MergeState *ms, Py_ssize_t need) * we don't care what's in the block. */ merge_freemem(ms); + if (need > PY_SSIZE_T_MAX / sizeof(PyObject*)) { + PyErr_NoMemory(); + return -1; + } ms->a = (PyObject **)PyMem_Malloc(need * sizeof(PyObject*)); if (ms->a) { ms->alloced = need; @@ -2415,6 +2429,8 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) step = -step; } + assert(slicelength <= PY_SIZE_MAX / sizeof(PyObject*)); + garbage = (PyObject**) PyMem_MALLOC(slicelength*sizeof(PyObject*)); if (!garbage) { diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 9bb1697..78ada17 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -151,8 +151,11 @@ _indirect_copy_nd(char *dest, Py_buffer *view, char fort) char *ptr; void (*func)(int, Py_ssize_t *, Py_ssize_t *); + if (view->ndim > PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) { + PyErr_NoMemory(); + return -1; + } - /* XXX(nnorwitz): need to check for overflow! */ indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*view->ndim); if (indices == NULL) { PyErr_NoMemory(); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2f2b35e..efbd566 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -526,9 +526,9 @@ new_arena(void) numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; if (numarenas <= maxarenas) return NULL; /* overflow */ - nbytes = numarenas * sizeof(*arenas); - if (nbytes / sizeof(*arenas) != numarenas) + if (numarenas > PY_SIZE_MAX / sizeof(*arenas)) return NULL; /* overflow */ + nbytes = numarenas * sizeof(*arenas); arenaobj = (struct arena_object *)realloc(arenas, nbytes); if (arenaobj == NULL) return NULL; diff --git a/Parser/node.c b/Parser/node.c index d133a0d..f4c86cb 100644 --- a/Parser/node.c +++ b/Parser/node.c @@ -91,6 +91,9 @@ PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offs if (current_capacity < 0 || required_capacity < 0) return E_OVERFLOW; if (current_capacity < required_capacity) { + if (required_capacity > PY_SIZE_MAX / sizeof(node)) { + return E_NOMEM; + } n = n1->n_child; n = (node *) PyObject_REALLOC(n, required_capacity * sizeof(node)); diff --git a/Python/asdl.c b/Python/asdl.c index 72329b9..1105d3a 100644 --- a/Python/asdl.c +++ b/Python/asdl.c @@ -5,8 +5,22 @@ asdl_seq * asdl_seq_new(int size, PyArena *arena) { asdl_seq *seq = NULL; - size_t n = sizeof(asdl_seq) + - (size ? (sizeof(void *) * (size - 1)) : 0); + size_t n = (size ? (sizeof(void *) * (size - 1)) : 0); + + /* check size is sane */ + if (size < 0 || size == INT_MIN || + (size && ((size - 1) > (PY_SIZE_MAX / sizeof(void *))))) { + PyErr_NoMemory(); + return NULL; + } + + /* check if size can be added safely */ + if (n > PY_SIZE_MAX - sizeof(asdl_seq)) { + PyErr_NoMemory(); + return NULL; + } + + n += sizeof(asdl_seq); seq = (asdl_seq *)PyArena_Malloc(arena, n); if (!seq) { @@ -22,8 +36,22 @@ asdl_int_seq * asdl_int_seq_new(int size, PyArena *arena) { asdl_int_seq *seq = NULL; - size_t n = sizeof(asdl_seq) + - (size ? (sizeof(int) * (size - 1)) : 0); + size_t n = (size ? (sizeof(void *) * (size - 1)) : 0); + + /* check size is sane */ + if (size < 0 || size == INT_MIN || + (size && ((size - 1) > (PY_SIZE_MAX / sizeof(void *))))) { + PyErr_NoMemory(); + return NULL; + } + + /* check if size can be added safely */ + if (n > PY_SIZE_MAX - sizeof(asdl_seq)) { + PyErr_NoMemory(); + return NULL; + } + + n += sizeof(asdl_seq); seq = (asdl_int_seq *)PyArena_Malloc(arena, n); if (!seq) { diff --git a/Python/ast.c b/Python/ast.c index da42a0e..79c9403 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -3145,6 +3145,9 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons buf = (char *)s; u = NULL; } else { + /* check for integer overflow */ + if (len > PY_SIZE_MAX / 4) + return NULL; /* "\XX" may become "\u005c\uHHLL" (12 bytes) */ u = PyBytes_FromStringAndSize((char *)NULL, len * 4); if (u == NULL) diff --git a/Python/compile.c b/Python/compile.c index 6017b2c..942ca1f52 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -227,6 +227,10 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident) return ident; /* Don't mangle if class is just underscores */ } plen = Py_UNICODE_strlen(p); + + assert(1 <= PY_SSIZE_T_MAX - nlen); + assert(1 + nlen <= PY_SSIZE_T_MAX - plen); + ident = PyUnicode_FromStringAndSize(NULL, 1 + nlen + plen); if (!ident) return 0; @@ -635,6 +639,12 @@ compiler_next_instr(struct compiler *c, basicblock *b) size_t oldsize, newsize; oldsize = b->b_ialloc * sizeof(struct instr); newsize = oldsize << 1; + + if (oldsize > (PY_SIZE_MAX >> 1)) { + PyErr_NoMemory(); + return -1; + } + if (newsize == 0) { PyErr_NoMemory(); return -1; @@ -3711,6 +3721,10 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno) a->a_lnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); if (!a->a_lnotab) return 0; + if (nblocks > PY_SIZE_MAX / sizeof(basicblock *)) { + PyErr_NoMemory(); + return 0; + } a->a_postorder = (basicblock **)PyObject_Malloc( sizeof(basicblock *) * nblocks); if (!a->a_postorder) { @@ -3819,10 +3833,14 @@ assemble_lnotab(struct assembler *a, struct instr *i) nbytes = a->a_lnotab_off + 2 * ncodes; len = PyBytes_GET_SIZE(a->a_lnotab); if (nbytes >= len) { - if (len * 2 < nbytes) + if ((len <= INT_MAX / 2) && (len * 2 < nbytes)) len = nbytes; - else + else if (len <= INT_MAX / 2) len *= 2; + else { + PyErr_NoMemory(); + return 0; + } if (_PyBytes_Resize(&a->a_lnotab, len) < 0) return 0; } @@ -3841,10 +3859,14 @@ assemble_lnotab(struct assembler *a, struct instr *i) nbytes = a->a_lnotab_off + 2 * ncodes; len = PyBytes_GET_SIZE(a->a_lnotab); if (nbytes >= len) { - if (len * 2 < nbytes) + if ((len <= INT_MAX / 2) && len * 2 < nbytes) len = nbytes; - else + else if (len <= INT_MAX / 2) len *= 2; + else { + PyErr_NoMemory(); + return 0; + } if (_PyBytes_Resize(&a->a_lnotab, len) < 0) return 0; } @@ -3903,6 +3925,8 @@ assemble_emit(struct assembler *a, struct instr *i) if (i->i_lineno && !assemble_lnotab(a, i)) return 0; if (a->a_offset + size >= len) { + if (len > PY_SSIZE_T_MAX / 2) + return 0; if (_PyBytes_Resize(&a->a_bytecode, len * 2) < 0) return 0; } |