summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c48
1 files changed, 22 insertions, 26 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index cbd2870..8596e54 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5976,6 +5976,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch;
+ if (end - s < Py_UNICODE_SIZE) {
+ endinpos = end-starts;
+ reason = "truncated input";
+ goto error;
+ }
/* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */
((char *) &uch)[0] = s[0];
@@ -5985,37 +5990,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
((char *) &uch)[3] = s[3];
#endif
ch = uch;
-
+#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
- if (
-#ifdef Py_UNICODE_WIDE
- ch > 0x10ffff ||
-#endif
- end-s < Py_UNICODE_SIZE
- )
- {
- startinpos = s - starts;
- if (end-s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- }
- else {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- }
- if (unicode_decode_call_errorhandler_writer(
- errors, &errorHandler,
- "unicode_internal", reason,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &writer))
- goto onError;
- continue;
+ if (ch > 0x10ffff) {
+ endinpos = s - starts + Py_UNICODE_SIZE;
+ reason = "illegal code point (> 0x10FFFF)";
+ goto error;
}
-
+#endif
s += Py_UNICODE_SIZE;
#ifndef Py_UNICODE_WIDE
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
{
Py_UNICODE uch2;
((char *) &uch2)[0] = s[0];
@@ -6032,6 +6018,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
goto onError;
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
writer.pos++;
+ continue;
+
+ error:
+ startinpos = s - starts;
+ if (unicode_decode_call_errorhandler_writer(
+ errors, &errorHandler,
+ "unicode_internal", reason,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &writer))
+ goto onError;
}
Py_XDECREF(errorHandler);