diff options
author | Eric V. Smith <eric@trueblade.com> | 2016-10-31 18:46:26 (GMT) |
---|---|---|
committer | Eric V. Smith <eric@trueblade.com> | 2016-10-31 18:46:26 (GMT) |
commit | 5646648678295a44aa82636c6e92826651baf33a (patch) | |
tree | 2a41306ca416712ba7b55a4e51bcb836ab2a693c /Objects/unicodeobject.c | |
parent | 7f0514ad54dd806817ce6d1f54969b8979475d34 (diff) | |
download | cpython-5646648678295a44aa82636c6e92826651baf33a.zip cpython-5646648678295a44aa82636c6e92826651baf33a.tar.gz cpython-5646648678295a44aa82636c6e92826651baf33a.tar.bz2 |
Issue 28128: Print out better error/warning messages for invalid string escapes. Backport to 3.6.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 38 |
1 files changed, 32 insertions, 6 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e45f3d7..50b21cf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5896,9 +5896,10 @@ PyUnicode_AsUTF16String(PyObject *unicode) static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; PyObject * -PyUnicode_DecodeUnicodeEscape(const char *s, - Py_ssize_t size, - const char *errors) +_PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors, + const char **first_invalid_escape) { const char *starts = s; _PyUnicodeWriter writer; @@ -5906,6 +5907,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; + // so we can remember if we've seen an invalid escape char or not + *first_invalid_escape = NULL; + if (size == 0) { _Py_RETURN_UNICODE_EMPTY(); } @@ -6080,9 +6084,10 @@ PyUnicode_DecodeUnicodeEscape(const char *s, goto error; default: - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", c) < 0) - goto onError; + if (*first_invalid_escape == NULL) { + *first_invalid_escape = s-1; /* Back up one char, since we've + already incremented s. */ + } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); continue; @@ -6117,6 +6122,27 @@ PyUnicode_DecodeUnicodeEscape(const char *s, return NULL; } +PyObject * +PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors) +{ + const char *first_invalid_escape; + PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors, + &first_invalid_escape); + if (result == NULL) + return NULL; + if (first_invalid_escape != NULL) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", + *first_invalid_escape) < 0) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + /* Return a Unicode-Escape string version of the Unicode object. If quotes is true, the string is enclosed in u"" or u'' quotes as |