diff options
author | Xiang Zhang <angwerzx@126.com> | 2018-01-31 12:48:05 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-31 12:48:05 (GMT) |
commit | 2c7fd46e11333ef5e5cce34212f7d087694f3658 (patch) | |
tree | 0497c3b1fa32112a475fe3b7da5390b59205f7fd /Objects/unicodeobject.c | |
parent | 84521047e413d7d1150aaa1c333580b683b3f4b1 (diff) | |
download | cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.zip cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.gz cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.bz2 |
bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)
When using customized decode error handlers, it is possible for builtin decoders
to write out-of-bounds and then crash.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 775bd15..3d9e09d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4190,7 +4190,10 @@ unicode_decode_call_errorhandler_writer( Py_ssize_t insize; Py_ssize_t newpos; Py_ssize_t replen; + Py_ssize_t remain; PyObject *inputobj = NULL; + int need_to_grow = 0; + const char *new_inptr; if (*errorHandler == NULL) { *errorHandler = PyCodec_LookupError(errors); @@ -4221,6 +4224,7 @@ unicode_decode_call_errorhandler_writer( inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); if (!inputobj) goto onError; + remain = *inend - *input - *endinpos; *input = PyBytes_AS_STRING(inputobj); insize = PyBytes_GET_SIZE(inputobj); *inend = *input + insize; @@ -4238,6 +4242,19 @@ unicode_decode_call_errorhandler_writer( replen = PyUnicode_GET_LENGTH(repunicode); if (replen > 1) { writer->min_length += replen - 1; + need_to_grow = 1; + } + new_inptr = *input + newpos; + if (*inend - new_inptr > remain) { + /* We don't know the decoding algorithm here so we make the worst + assumption that one byte decodes to one unicode character. + If unfortunately one byte could decode to more unicode characters, + the decoder may write out-of-bound then. Is it possible for the + algorithms using this function? */ + writer->min_length += *inend - new_inptr - remain; + need_to_grow = 1; + } + if (need_to_grow) { writer->overallocate = 1; if (_PyUnicodeWriter_Prepare(writer, writer->min_length, PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1) @@ -4247,7 +4264,7 @@ unicode_decode_call_errorhandler_writer( goto onError; *endinpos = newpos; - *inptr = *input + newpos; + *inptr = new_inptr; /* we made it! */ Py_DECREF(restuple); @@ -5572,7 +5589,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s, #endif /* Note: size will always be longer than the resulting Unicode - character count */ + character count normally. Error handler will take care of + resizing when needed. */ _PyUnicodeWriter_Init(&writer); writer.min_length = (e - q + 1) / 2; if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) |