summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-02-07 14:25:25 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-02-07 14:25:25 (GMT)
commit03ee12ed7251b6b251d55d708a22616ed2538b19 (patch)
tree94227dbfa67f3186277e59f454a15ef34de7f64a
parentcf0904ff6523c5883b1b6ba5d633bb5fbfef970a (diff)
parent3fd4ab356d76b048f2dbd25797fec87f68dd7f73 (diff)
downloadcpython-03ee12ed7251b6b251d55d708a22616ed2538b19.zip
cpython-03ee12ed7251b6b251d55d708a22616ed2538b19.tar.gz
cpython-03ee12ed7251b6b251d55d708a22616ed2538b19.tar.bz2
Issue #17043: The unicode-internal decoder no longer read past the end of
input buffer.
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/unicodeobject.c48
2 files changed, 25 insertions, 26 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index b63511c..9491614 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.3.1?
Core and Builtins
-----------------
+- Issue #17043: The unicode-internal decoder no longer read past the end of
+ input buffer.
+
- Issue #17098: All modules now have __loader__ set even if they pre-exist the
bootstrapping of importlib.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e845913..abe793d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6103,6 +6103,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch;
+ if (end - s < Py_UNICODE_SIZE) {
+ endinpos = end-starts;
+ reason = "truncated input";
+ goto error;
+ }
/* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */
((char *) &uch)[0] = s[0];
@@ -6112,37 +6117,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
((char *) &uch)[3] = s[3];
#endif
ch = uch;
-
+#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
- if (
-#ifdef Py_UNICODE_WIDE
- ch > 0x10ffff ||
-#endif
- end-s < Py_UNICODE_SIZE
- )
- {
- startinpos = s - starts;
- if (end-s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- }
- else {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- }
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "unicode_internal", reason,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos))
- goto onError;
- continue;
+ if (ch > 0x10ffff) {
+ endinpos = s - starts + Py_UNICODE_SIZE;
+ reason = "illegal code point (> 0x10FFFF)";
+ goto error;
}
-
+#endif
s += Py_UNICODE_SIZE;
#ifndef Py_UNICODE_WIDE
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
{
Py_UNICODE uch2;
((char *) &uch2)[0] = s[0];
@@ -6157,6 +6143,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
if (unicode_putchar(&v, &outpos, ch) < 0)
goto onError;
+ continue;
+
+ error:
+ startinpos = s - starts;
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "unicode_internal", reason,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos))
+ goto onError;
}
if (unicode_resize(&v, outpos) < 0)