From a1c1be4e03cb428a7229d9fe703dbaa0ddd359f3 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Mon, 29 Sep 2014 18:18:57 -0400 Subject: cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518) --- Objects/unicodeobject.c | 74 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4085d22..07832ba 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4168,9 +4168,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos + replen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - replen) + goto overflow; + requiredsize += replen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize<2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4191,9 +4197,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ - requiredsize = *outpos + repwlen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - repwlen) + goto overflow; + requiredsize += repwlen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize < 2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4210,6 +4222,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, onError: Py_XDECREF(restuple); return res; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "decoded result is too long for a Python string"); + goto onError; } /* --- UTF-7 Codec -------------------------------------------------------- */ @@ -6358,7 +6375,7 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t collstart = pos; Py_ssize_t collend = pos; /* find all unecodable characters */ - while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit)) + while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (known_errorHandler==-1) { @@ -6378,36 +6395,43 @@ unicode_encode_ucs1(PyObject *unicode, raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); goto onError; case 2: /* replace */ - while (collstart++ PY_SSIZE_T_MAX - incr) + goto overflow; + requiredsize += incr; } - requiredsize = respos+repsize+(size-collend); + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) goto onError; @@ -6433,6 +6457,10 @@ unicode_encode_ucs1(PyObject *unicode, if (repsize > 1) { /* Make room for all additional bytes. */ respos = str - PyBytes_AS_STRING(res); + if (ressize > PY_SSIZE_T_MAX - repsize - 1) { + Py_DECREF(repunicode); + goto overflow; + } if (_PyBytes_Resize(&res, ressize+repsize-1)) { Py_DECREF(repunicode); goto onError; @@ -6451,9 +6479,15 @@ unicode_encode_ucs1(PyObject *unicode, we won't have to check space for encodable characters) */ respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos+repsize+(size-collend); + requiredsize = respos; + if (requiredsize > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize += repsize; + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) { Py_DECREF(repunicode); @@ -6491,6 +6525,10 @@ unicode_encode_ucs1(PyObject *unicode, Py_XDECREF(exc); return res; + overflow: + PyErr_SetString(PyExc_OverflowError, + "encoded result is too long for a Python string"); + onError: Py_XDECREF(res); Py_XDECREF(errorHandler); -- cgit v0.12 From 3bbb2e4844f29d8a74be08ec876b84e150cd5a6c Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Mon, 29 Sep 2014 18:42:35 -0400 Subject: NEWS issue for #22518 --- Misc/NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Misc/NEWS b/Misc/NEWS index b5eab85..034c72d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 3.3.6 release candidate 1? Core and Builtins ----------------- +- Issue #22518: Fix integer overflow issues in latin-1 encoding. + Library ------- -- cgit v0.12