diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-29 08:20:44 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-29 08:20:44 (GMT) |
commit | d679377be78c732f1c7414726542c6b49c9b34ab (patch) | |
tree | 145ae100449962e22449421a35a27c86ef0ccab8 /Objects/unicodeobject.c | |
parent | 8e0ae2a4f08bc01b18ba46cbebab78ac9f0c00a6 (diff) | |
download | cpython-d679377be78c732f1c7414726542c6b49c9b34ab.zip cpython-d679377be78c732f1c7414726542c6b49c9b34ab.tar.gz cpython-d679377be78c732f1c7414726542c6b49c9b34ab.tar.bz2 |
Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 79 |
1 files changed, 28 insertions, 51 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 92d1777..3a288d8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3760,7 +3760,6 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, Py_ssize_t startinpos; Py_ssize_t endinpos; Py_ssize_t outpos; - int i; PyUnicodeObject *v; Py_UNICODE *p; const char *end; @@ -3846,29 +3845,19 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, message = "truncated \\UXXXXXXXX escape"; hexescape: chr = 0; - outpos = p-PyUnicode_AS_UNICODE(v); - if (s+digits>end) { - endinpos = size; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", "end of string in escape sequence", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) - goto onError; - goto nextByte; - } - for (i = 0; i < digits; ++i) { - c = (unsigned char) s[i]; - if (!Py_ISXDIGIT(c)) { - endinpos = (s+i+1)-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) - goto onError; - goto nextByte; + if (end - s < digits) { + /* count only hex digits */ + for (; s < end; ++s) { + c = (unsigned char)*s; + if (!Py_ISXDIGIT(c)) + goto error; } + goto error; + } + for (; digits--; ++s) { + c = (unsigned char)*s; + if (!Py_ISXDIGIT(c)) + goto error; chr = (chr<<4) & ~0xF; if (c >= '0' && c <= '9') chr += c - '0'; @@ -3877,7 +3866,6 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, else chr += 10 + c - 'A'; } - s += i; if (chr == 0xffffffff && PyErr_Occurred()) /* _decoding_error will have already written into the target buffer. */ @@ -3898,14 +3886,8 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF); #endif } else { - endinpos = s-starts; - outpos = p-PyUnicode_AS_UNICODE(v); - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", "illegal Unicode character", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) - goto onError; + message = "illegal Unicode character"; + goto error; } break; @@ -3932,28 +3914,13 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, goto store; } } - endinpos = s-starts; - outpos = p-PyUnicode_AS_UNICODE(v); - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) - goto onError; - break; + goto error; default: if (s > end) { message = "\\ at end of string"; s--; - endinpos = s-starts; - outpos = p-PyUnicode_AS_UNICODE(v); - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) - goto onError; + goto error; } else { *p++ = '\\'; @@ -3961,8 +3928,18 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, } break; } - nextByte: - ; + continue; + + error: + endinpos = s-starts; + outpos = p-PyUnicode_AS_UNICODE(v); + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicodeescape", message, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &outpos, &p)) + goto onError; + continue; } if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0) goto onError; |