summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2014-09-29 22:50:06 (GMT)
committerBenjamin Peterson <benjamin@python.org>2014-09-29 22:50:06 (GMT)
commit2b76ce6d27c5395d88e7aef3a2bab811afc5d8cb (patch)
tree4cc312175b8c76caceb9b3ddf4f9f0a0200df856
parent12dc0d96e0b32f3ee0aeba2e93994ff5e11bec29 (diff)
parent3bbb2e4844f29d8a74be08ec876b84e150cd5a6c (diff)
downloadcpython-2b76ce6d27c5395d88e7aef3a2bab811afc5d8cb.zip
cpython-2b76ce6d27c5395d88e7aef3a2bab811afc5d8cb.tar.gz
cpython-2b76ce6d27c5395d88e7aef3a2bab811afc5d8cb.tar.bz2
merge 3.3 (closes #22518)
-rw-r--r--Misc/NEWS2
-rw-r--r--Objects/unicodeobject.c64
2 files changed, 49 insertions, 17 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index f95b593..3d13192 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,8 @@ What's New in Python 3.4.3?
Core and Builtins
-----------------
+- Issue #22518: Fix integer overflow issues in latin-1 encoding.
+
Library
-------
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d9c131c..3da09ef 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4093,16 +4093,21 @@ unicode_decode_call_errorhandler_wchar(
have+the replacement+the rest of the string (starting
at the new input position), so we won't have to check space
when there are no errors in the rest of the string) */
- requiredsize = *outpos + repwlen + insize-newpos;
+ requiredsize = *outpos;
+ if (requiredsize > PY_SSIZE_T_MAX - repwlen)
+ goto overflow;
+ requiredsize += repwlen;
+ if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
+ goto overflow;
+ requiredsize += insize - newpos;
if (requiredsize > outsize) {
- if (requiredsize < 2*outsize)
+ if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize;
if (unicode_resize(output, requiredsize) < 0)
goto onError;
}
wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
*outpos += repwlen;
-
*endinpos = newpos;
*inptr = *input + newpos;
@@ -4110,6 +4115,10 @@ unicode_decode_call_errorhandler_wchar(
Py_XDECREF(restuple);
return 0;
+ overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "decoded result is too long for a Python string");
+
onError:
Py_XDECREF(restuple);
return -1;
@@ -6502,7 +6511,7 @@ unicode_encode_ucs1(PyObject *unicode,
Py_ssize_t collstart = pos;
Py_ssize_t collend = pos;
/* find all unecodable characters */
- while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit))
+ while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
++collend;
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (known_errorHandler==-1) {
@@ -6522,36 +6531,43 @@ unicode_encode_ucs1(PyObject *unicode,
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
goto onError;
case 2: /* replace */
- while (collstart++<collend)
+ while (collstart++ < collend)
*str++ = '?'; /* fall through */
case 3: /* ignore */
pos = collend;
break;
case 4: /* xmlcharrefreplace */
respos = str - PyBytes_AS_STRING(res);
+ requiredsize = respos;
/* determine replacement size */
- for (i = collstart, repsize = 0; i < collend; ++i) {
+ for (i = collstart; i < collend; ++i) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ Py_ssize_t incr;
if (ch < 10)
- repsize += 2+1+1;
+ incr = 2+1+1;
else if (ch < 100)
- repsize += 2+2+1;
+ incr = 2+2+1;
else if (ch < 1000)
- repsize += 2+3+1;
+ incr = 2+3+1;
else if (ch < 10000)
- repsize += 2+4+1;
+ incr = 2+4+1;
else if (ch < 100000)
- repsize += 2+5+1;
+ incr = 2+5+1;
else if (ch < 1000000)
- repsize += 2+6+1;
+ incr = 2+6+1;
else {
assert(ch <= MAX_UNICODE);
- repsize += 2+7+1;
+ incr = 2+7+1;
}
+ if (requiredsize > PY_SSIZE_T_MAX - incr)
+ goto overflow;
+ requiredsize += incr;
}
- requiredsize = respos+repsize+(size-collend);
+ if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
+ goto overflow;
+ requiredsize += size - collend;
if (requiredsize > ressize) {
- if (requiredsize<2*ressize)
+ if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
requiredsize = 2*ressize;
if (_PyBytes_Resize(&res, requiredsize))
goto onError;
@@ -6577,6 +6593,10 @@ unicode_encode_ucs1(PyObject *unicode,
if (repsize > 1) {
/* Make room for all additional bytes. */
respos = str - PyBytes_AS_STRING(res);
+ if (ressize > PY_SSIZE_T_MAX - repsize - 1) {
+ Py_DECREF(repunicode);
+ goto overflow;
+ }
if (_PyBytes_Resize(&res, ressize+repsize-1)) {
Py_DECREF(repunicode);
goto onError;
@@ -6595,9 +6615,15 @@ unicode_encode_ucs1(PyObject *unicode,
we won't have to check space for encodable characters) */
respos = str - PyBytes_AS_STRING(res);
repsize = PyUnicode_GET_LENGTH(repunicode);
- requiredsize = respos+repsize+(size-collend);
+ requiredsize = respos;
+ if (requiredsize > PY_SSIZE_T_MAX - repsize)
+ goto overflow;
+ requiredsize += repsize;
+ if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
+ goto overflow;
+ requiredsize += size - collend;
if (requiredsize > ressize) {
- if (requiredsize<2*ressize)
+ if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
requiredsize = 2*ressize;
if (_PyBytes_Resize(&res, requiredsize)) {
Py_DECREF(repunicode);
@@ -6635,6 +6661,10 @@ unicode_encode_ucs1(PyObject *unicode,
Py_XDECREF(exc);
return res;
+ overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "encoded result is too long for a Python string");
+
onError:
Py_XDECREF(res);
Py_XDECREF(errorHandler);