From fbc349ff790c21f1a59af939d42033470790c530 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 20 Jul 2021 18:42:12 +0300 Subject: bpo-43950: Distinguish errors happening on character offset decoding (GH-27217) --- Parser/pegen.c | 18 +++++++++++++----- Python/traceback.c | 10 ++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Parser/pegen.c b/Parser/pegen.c index 3e8ddfb..106dba9 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -402,7 +402,7 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) { const char *str = PyUnicode_AsUTF8(line); if (!str) { - return 0; + return -1; } Py_ssize_t len = strlen(str); if (col_offset > len + 1) { @@ -411,7 +411,7 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) assert(col_offset >= 0); PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); if (!text) { - return 0; + return -1; } Py_ssize_t size = PyUnicode_GET_LENGTH(text); Py_DECREF(text); @@ -499,9 +499,17 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, if (p->tok->encoding != NULL) { col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); - end_col_number = end_col_number > 0 ? - _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset) : - end_col_number; + if (col_number < 0) { + goto error; + } + if (end_col_number > 0) { + Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); + if (end_col_offset < 0) { + goto error; + } else { + end_col_number = end_col_offset; + } + } } tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); if (!tmp) { diff --git a/Python/traceback.c b/Python/traceback.c index 643096c..e02caef 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -745,7 +745,17 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets. assert(source_line); Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); + if (start_offset < 0) { + err = ignore_source_errors() < 0; + goto done; + } + Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); + if (end_offset < 0) { + err = ignore_source_errors() < 0; + goto done; + } + Py_ssize_t left_end_offset = -1; Py_ssize_t right_start_offset = -1; -- cgit v0.12