diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-29 08:28:07 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-29 08:28:07 (GMT) |
commit | 24193debd4a380604afacdfa8fb4405b00c34bcd (patch) | |
tree | 9df7cd917107031e1a3bc5c876d977321fc33752 /Objects | |
parent | 4dafd4075c363b2eb850653f3d562dd4afa739a1 (diff) | |
parent | d679377be78c732f1c7414726542c6b49c9b34ab (diff) | |
download | cpython-24193debd4a380604afacdfa8fb4405b00c34bcd.zip cpython-24193debd4a380604afacdfa8fb4405b00c34bcd.tar.gz cpython-24193debd4a380604afacdfa8fb4405b00c34bcd.tar.bz2 |
Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 82 |
1 files changed, 30 insertions, 52 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c96a91c..b559cb1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5508,7 +5508,6 @@ PyUnicode_DecodeUnicodeEscape(const char *s, const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; - int j; PyObject *v; const char *end; char* message; @@ -5630,29 +5629,19 @@ PyUnicode_DecodeUnicodeEscape(const char *s, message = "truncated \\UXXXXXXXX escape"; hexescape: chr = 0; - if (s+digits>end) { - endinpos = size; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", "end of string in escape sequence", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - goto nextByte; - } - for (j = 0; j < digits; ++j) { - c = (unsigned char) s[j]; - if (!Py_ISXDIGIT(c)) { - endinpos = (s+j+1)-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - len = PyUnicode_GET_LENGTH(v); - goto nextByte; + if (end - s < digits) { + /* count only hex digits */ + for (; s < end; ++s) { + c = (unsigned char)*s; + if (!Py_ISXDIGIT(c)) + goto error; } + goto error; + } + for (; digits--; ++s) { + c = (unsigned char)*s; + if (!Py_ISXDIGIT(c)) + goto error; chr = (chr<<4) & ~0xF; if (c >= '0' && c <= '9') chr += c - '0'; @@ -5661,24 +5650,16 @@ PyUnicode_DecodeUnicodeEscape(const char *s, else chr += 10 + c - 'A'; } - s += j; if (chr == 0xffffffff && PyErr_Occurred()) /* _decoding_error will have already written into the target buffer. */ break; store: /* when we get here, chr is a 32-bit unicode character */ - if (chr <= MAX_UNICODE) { - WRITECHAR(chr); - } else { - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", "illegal Unicode character", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - } + message = "illegal Unicode character"; + if (chr > MAX_UNICODE) + goto error; + WRITECHAR(chr); break; /* \N{name} */ @@ -5706,26 +5687,13 @@ PyUnicode_DecodeUnicodeEscape(const char *s, goto store; } } - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - break; + goto error; default: if (s > end) { message = "\\ at end of string"; s--; - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; + goto error; } else { WRITECHAR('\\'); @@ -5733,8 +5701,18 @@ PyUnicode_DecodeUnicodeEscape(const char *s, } break; } - nextByte: - ; + continue; + + error: + endinpos = s-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicodeescape", message, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &i)) + goto onError; + len = PyUnicode_GET_LENGTH(v); + continue; } #undef WRITECHAR |