diff options
author | Benjamin Peterson <benjamin@python.org> | 2014-09-29 22:18:57 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2014-09-29 22:18:57 (GMT) |
commit | a1c1be4e03cb428a7229d9fe703dbaa0ddd359f3 (patch) | |
tree | 0a342e3d8184847e2e9df80793c10e380abd55fe /Objects | |
parent | e025b52db0651081eb08978efa850269c8282073 (diff) | |
download | cpython-a1c1be4e03cb428a7229d9fe703dbaa0ddd359f3.zip cpython-a1c1be4e03cb428a7229d9fe703dbaa0ddd359f3.tar.gz cpython-a1c1be4e03cb428a7229d9fe703dbaa0ddd359f3.tar.bz2 |
cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 74 |
1 files changed, 56 insertions, 18 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4085d22..07832ba 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4168,9 +4168,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos + replen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - replen) + goto overflow; + requiredsize += replen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize<2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4191,9 +4197,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ - requiredsize = *outpos + repwlen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - repwlen) + goto overflow; + requiredsize += repwlen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize < 2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4210,6 +4222,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, onError: Py_XDECREF(restuple); return res; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "decoded result is too long for a Python string"); + goto onError; } /* --- UTF-7 Codec -------------------------------------------------------- */ @@ -6358,7 +6375,7 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t collstart = pos; Py_ssize_t collend = pos; /* find all unecodable characters */ - while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit)) + while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (known_errorHandler==-1) { @@ -6378,36 +6395,43 @@ unicode_encode_ucs1(PyObject *unicode, raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); goto onError; case 2: /* replace */ - while (collstart++<collend) + while (collstart++ < collend) *str++ = '?'; /* fall through */ case 3: /* ignore */ pos = collend; break; case 4: /* xmlcharrefreplace */ respos = str - PyBytes_AS_STRING(res); + requiredsize = respos; /* determine replacement size */ - for (i = collstart, repsize = 0; i < collend; ++i) { + for (i = collstart; i < collend; ++i) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); + Py_ssize_t incr; if (ch < 10) - repsize += 2+1+1; + incr = 2+1+1; else if (ch < 100) - repsize += 2+2+1; + incr = 2+2+1; else if (ch < 1000) - repsize += 2+3+1; + incr = 2+3+1; else if (ch < 10000) - repsize += 2+4+1; + incr = 2+4+1; else if (ch < 100000) - repsize += 2+5+1; + incr = 2+5+1; else if (ch < 1000000) - repsize += 2+6+1; + incr = 2+6+1; else { assert(ch <= MAX_UNICODE); - repsize += 2+7+1; + incr = 2+7+1; } + if (requiredsize > PY_SSIZE_T_MAX - incr) + goto overflow; + requiredsize += incr; } - requiredsize = respos+repsize+(size-collend); + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) goto onError; @@ -6433,6 +6457,10 @@ unicode_encode_ucs1(PyObject *unicode, if (repsize > 1) { /* Make room for all additional bytes. */ respos = str - PyBytes_AS_STRING(res); + if (ressize > PY_SSIZE_T_MAX - repsize - 1) { + Py_DECREF(repunicode); + goto overflow; + } if (_PyBytes_Resize(&res, ressize+repsize-1)) { Py_DECREF(repunicode); goto onError; @@ -6451,9 +6479,15 @@ unicode_encode_ucs1(PyObject *unicode, we won't have to check space for encodable characters) */ respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos+repsize+(size-collend); + requiredsize = respos; + if (requiredsize > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize += repsize; + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) { Py_DECREF(repunicode); @@ -6491,6 +6525,10 @@ unicode_encode_ucs1(PyObject *unicode, Py_XDECREF(exc); return res; + overflow: + PyErr_SetString(PyExc_OverflowError, + "encoded result is too long for a Python string"); + onError: Py_XDECREF(res); Py_XDECREF(errorHandler); |