diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-07 14:25:25 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-02-07 14:25:25 (GMT) |
commit | 03ee12ed7251b6b251d55d708a22616ed2538b19 (patch) | |
tree | 94227dbfa67f3186277e59f454a15ef34de7f64a | |
parent | cf0904ff6523c5883b1b6ba5d633bb5fbfef970a (diff) | |
parent | 3fd4ab356d76b048f2dbd25797fec87f68dd7f73 (diff) | |
download | cpython-03ee12ed7251b6b251d55d708a22616ed2538b19.zip cpython-03ee12ed7251b6b251d55d708a22616ed2538b19.tar.gz cpython-03ee12ed7251b6b251d55d708a22616ed2538b19.tar.bz2 |
Issue #17043: The unicode-internal decoder no longer read past the end of
input buffer.
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 48 |
2 files changed, 25 insertions, 26 deletions
@@ -12,6 +12,9 @@ What's New in Python 3.3.1? Core and Builtins ----------------- +- Issue #17043: The unicode-internal decoder no longer read past the end of + input buffer. + - Issue #17098: All modules now have __loader__ set even if they pre-exist the bootstrapping of importlib. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e845913..abe793d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6103,6 +6103,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, while (s < end) { Py_UNICODE uch; Py_UCS4 ch; + if (end - s < Py_UNICODE_SIZE) { + endinpos = end-starts; + reason = "truncated input"; + goto error; + } /* We copy the raw representation one byte at a time because the pointer may be unaligned (see test_codeccallbacks). */ ((char *) &uch)[0] = s[0]; @@ -6112,37 +6117,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, ((char *) &uch)[3] = s[3]; #endif ch = uch; - +#ifdef Py_UNICODE_WIDE /* We have to sanity check the raw data, otherwise doom looms for some malformed UCS-4 data. */ - if ( -#ifdef Py_UNICODE_WIDE - ch > 0x10ffff || -#endif - end-s < Py_UNICODE_SIZE - ) - { - startinpos = s - starts; - if (end-s < Py_UNICODE_SIZE) { - endinpos = end-starts; - reason = "truncated input"; - } - else { - endinpos = s - starts + Py_UNICODE_SIZE; - reason = "illegal code point (> 0x10FFFF)"; - } - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicode_internal", reason, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) - goto onError; - continue; + if (ch > 0x10ffff) { + endinpos = s - starts + Py_UNICODE_SIZE; + reason = "illegal code point (> 0x10FFFF)"; + goto error; } - +#endif s += Py_UNICODE_SIZE; #ifndef Py_UNICODE_WIDE - if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end) + if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE) { Py_UNICODE uch2; ((char *) &uch2)[0] = s[0]; @@ -6157,6 +6143,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, if (unicode_putchar(&v, &outpos, ch) < 0) goto onError; + continue; + + error: + startinpos = s - starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicode_internal", reason, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &outpos)) + goto onError; } if (unicode_resize(&v, outpos) < 0) |