summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2025-05-13 13:25:08 (GMT)
committerGitHub <noreply@github.com>2025-05-13 13:25:08 (GMT)
commit69b4387f78f413e8c47572a85b3478c47eba8142 (patch)
tree30bc6e580011bda809575c500cf3692edcb6a220 /Objects/unicodeobject.c
parentf0a7a6c2cc066523fc03d312cfebff8135d81aa2 (diff)
downloadcpython-69b4387f78f413e8c47572a85b3478c47eba8142.zip
cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.gz
cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.bz2
[3.14] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133942)
If the error handler is used, a new bytes object is created to set as the object attribute of UnicodeDecodeError, and that bytes object then replaces the original data. A pointer to the decoded data will became invalid after destroying that temporary bytes object. So we need other way to return the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal(). _PyBytes_DecodeEscape() does not have such issue, because it does not use the error handlers registry, but it should be changed for compatibility with _PyUnicode_DecodeUnicodeEscapeInternal(). (cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c46
1 files changed, 28 insertions, 18 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index eb3e1c4..4e1aafb 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6621,13 +6621,15 @@ _PyUnicode_GetNameCAPI(void)
/* --- Unicode Escape Codec ----------------------------------------------- */
PyObject *
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed,
- const char **first_invalid_escape)
+ int *first_invalid_escape_char,
+ const char **first_invalid_escape_ptr)
{
const char *starts = s;
+ const char *initial_starts = starts;
_PyUnicodeWriter writer;
const char *end;
PyObject *errorHandler = NULL;
@@ -6635,7 +6637,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
_PyUnicode_Name_CAPI *ucnhash_capi;
// so we can remember if we've seen an invalid escape char or not
- *first_invalid_escape = NULL;
+ *first_invalid_escape_char = -1;
+ *first_invalid_escape_ptr = NULL;
if (size == 0) {
if (consumed) {
@@ -6723,9 +6726,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
}
}
if (ch > 0377) {
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = ch;
+ if (starts == initial_starts) {
+ /* Back up 3 chars, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 3;
+ }
}
}
WRITE_CHAR(ch);
@@ -6820,9 +6826,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
goto error;
default:
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-1; /* Back up one char, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = c;
+ if (starts == initial_starts) {
+ /* Back up one char, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 1;
+ }
}
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
@@ -6867,19 +6876,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
const char *errors,
Py_ssize_t *consumed)
{
- const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
consumed,
- &first_invalid_escape);
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL)
return NULL;
- if (first_invalid_escape != NULL) {
- unsigned char c = *first_invalid_escape;
- if ('4' <= c && c <= '7') {
+ if (first_invalid_escape_char != -1) {
+ if (first_invalid_escape_char > 0xff) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "\"\\%.3s\" is an invalid octal escape sequence. "
+ "\"\\%o\" is an invalid octal escape sequence. "
"Such sequences will not work in the future. ",
- first_invalid_escape) < 0)
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
@@ -6889,7 +6899,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"\"\\%c\" is an invalid escape sequence. "
"Such sequences will not work in the future. ",
- c) < 0)
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;