From ed4c130d3dd7ab0f0b142d36ee49b74df57d182e Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Mon, 29 Sep 2014 18:18:57 -0400 Subject: cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518) --- Objects/unicodeobject.c | 69 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f52ee92..bdb14d7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1510,9 +1510,15 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler when there are no errors in the rest of the string) */ repptr = PyUnicode_AS_UNICODE(repunicode); repsize = PyUnicode_GET_SIZE(repunicode); - requiredsize = *outpos + repsize + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize += repsize; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize<2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (_PyUnicode_Resize(output, requiredsize) < 0) goto onError; @@ -1529,6 +1535,11 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler onError: Py_XDECREF(restuple); return res; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "decoded result is too long for a Python string"); + goto onError; } /* --- UTF-7 Codec -------------------------------------------------------- */ @@ -3646,7 +3657,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, const Py_UNICODE *collstart = p; const Py_UNICODE *collend = p; /* find all unecodable characters */ - while ((collend < endp) && ((*collend)>=limit)) + while ((collend < endp) && ((*collend) >= limit)) ++collend; /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (known_errorHandler==-1) { @@ -3666,34 +3677,41 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason); goto onError; case 2: /* replace */ - while (collstart++ PY_SSIZE_T_MAX - incr) + goto overflow; + requiredsize += incr; } - requiredsize = respos+repsize+(endp-collend); + if (requiredsize > PY_SSIZE_T_MAX - (endp - collend)) + goto overflow; + requiredsize += endp - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyString_Resize(&res, requiredsize)) goto onError; @@ -3716,11 +3734,16 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ - respos = str-PyString_AS_STRING(res); + respos = str - PyString_AS_STRING(res); repsize = PyUnicode_GET_SIZE(repunicode); - requiredsize = respos+repsize+(endp-collend); + if (respos > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize = respos + repsize; + if (requiredsize > PY_SSIZE_T_MAX - (endp - collend)) + goto overflow; + requiredsize += endp - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyString_Resize(&res, requiredsize)) { Py_DECREF(repunicode); @@ -3731,7 +3754,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, } /* check if there is anything unencodable in the replacement and copy it to the output */ - for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) { + for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2, ++str) { c = *uni2; if (c >= limit) { raise_encode_exception(&exc, encoding, startp, size, @@ -3747,14 +3770,18 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, } } /* Resize if we allocated to much */ - respos = str-PyString_AS_STRING(res); - if (respos