summaryrefslogtreecommitdiffstats
path: root/Parser/string_parser.c
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2025-05-13 13:25:08 (GMT)
committerGitHub <noreply@github.com>2025-05-13 13:25:08 (GMT)
commit69b4387f78f413e8c47572a85b3478c47eba8142 (patch)
tree30bc6e580011bda809575c500cf3692edcb6a220 /Parser/string_parser.c
parentf0a7a6c2cc066523fc03d312cfebff8135d81aa2 (diff)
downloadcpython-69b4387f78f413e8c47572a85b3478c47eba8142.zip
cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.gz
cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.bz2
[3.14] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133942)
If the error handler is used, a new bytes object is created to set as the object attribute of UnicodeDecodeError, and that bytes object then replaces the original data. A pointer to the decoded data will became invalid after destroying that temporary bytes object. So we need other way to return the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal(). _PyBytes_DecodeEscape() does not have such issue, because it does not use the error handlers registry, but it should be changed for compatibility with _PyUnicode_DecodeUnicodeEscapeInternal(). (cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Parser/string_parser.c')
-rw-r--r--Parser/string_parser.c26
1 files changed, 16 insertions, 10 deletions
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index d3631b1..ebe6898 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -196,15 +196,18 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
len = (size_t)(p - buf);
s = buf;
- const char *first_invalid_escape;
- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape);
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
// HACK: later we can simply pass the line no, since we don't preserve the tokens
// when we are decoding the string but we preserve the line numbers.
- if (v != NULL && first_invalid_escape != NULL && t != NULL) {
- if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
- /* We have not decref u before because first_invalid_escape points
- inside u. */
+ if (v != NULL && first_invalid_escape_ptr != NULL && t != NULL) {
+ if (warn_invalid_escape_sequence(parser, s, first_invalid_escape_ptr, t) < 0) {
+ /* We have not decref u before because first_invalid_escape_ptr
+ points inside u. */
Py_XDECREF(u);
Py_DECREF(v);
return NULL;
@@ -217,14 +220,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
static PyObject *
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{
- const char *first_invalid_escape;
- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL) {
return NULL;
}
- if (first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
+ if (first_invalid_escape_ptr != NULL) {
+ if (warn_invalid_escape_sequence(p, s, first_invalid_escape_ptr, t) < 0) {
Py_DECREF(result);
return NULL;
}