diff options
-rw-r--r-- | Lib/test/test_traceback.py | 59 | ||||
-rw-r--r-- | Lib/traceback.py | 53 | ||||
-rw-r--r-- | Parser/pegen.c | 55 | ||||
-rw-r--r-- | Parser/pegen.h | 1 | ||||
-rw-r--r-- | Python/traceback.c | 35 |
5 files changed, 187 insertions, 16 deletions
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index b9b0463..e0ef9e0 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -922,8 +922,63 @@ class TracebackErrorLocationCaretTestBase: f" File \"{__file__}\", line {self.callable_line}, in get_exception", " callable()", f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f", - " print(1, www(", - " ^^^^", + f" print(1, www(", + f" ^^^^^^^", + ] + self.assertEqual(actual, expected) + + def test_byte_offset_with_wide_characters_term_highlight(self): + def f(): + 说明说明 = 1 + şçöğıĤellö = 0 # not wide but still non-ascii + return 说明说明 / şçöğıĤellö + + actual = self.get_exception(f) + expected = [ + f"Traceback (most recent call last):", + f" File \"{__file__}\", line {self.callable_line}, in get_exception", + f" callable()", + f" File \"{__file__}\", line {f.__code__.co_firstlineno + 3}, in f", + f" return 说明说明 / şçöğıĤellö", + f" ~~~~~~~~~^~~~~~~~~~~~", + ] + self.assertEqual(actual, expected) + + def test_byte_offset_with_emojis_term_highlight(self): + def f(): + return "✨🐍" + func_说明说明("📗🚛", + "📗🚛") + "🐍" + + actual = self.get_exception(f) + expected = [ + f"Traceback (most recent call last):", + f" File \"{__file__}\", line {self.callable_line}, in get_exception", + f" callable()", + f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f", + f' return "✨🐍" + func_说明说明("📗🚛",', + f" ^^^^^^^^^^^^^", + ] + self.assertEqual(actual, expected) + + def test_byte_offset_wide_chars_subscript(self): + def f(): + my_dct = { + "✨🚛✨": { + "说明": { + "🐍🐍🐍": None + } + } + } + return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"] + + actual = self.get_exception(f) + expected = [ + f"Traceback (most recent call last):", + f" File \"{__file__}\", line {self.callable_line}, in get_exception", + f" callable()", + f" File \"{__file__}\", line {f.__code__.co_firstlineno + 8}, in f", + f' return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]', + f" ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^", ] self.assertEqual(actual, expected) diff --git a/Lib/traceback.py b/Lib/traceback.py index c1be659..f61d5db 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -470,7 +470,8 @@ class StackSummary(list): stripped_line = frame_summary.line.strip() row.append(' {}\n'.format(stripped_line)) - orig_line_len = len(frame_summary._original_line) + line = frame_summary._original_line + orig_line_len = len(line) frame_line_len = len(frame_summary.line.lstrip()) stripped_characters = orig_line_len - frame_line_len if ( @@ -478,31 +479,40 @@ class StackSummary(list): and frame_summary.end_colno is not None ): start_offset = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.colno) + 1 + line, frame_summary.colno) end_offset = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.end_colno) + 1 + line, frame_summary.end_colno) + code_segment = line[start_offset:end_offset] anchors = None if frame_summary.lineno == frame_summary.end_lineno: with suppress(Exception): - anchors = _extract_caret_anchors_from_line_segment( - frame_summary._original_line[start_offset - 1:end_offset - 1] - ) + anchors = _extract_caret_anchors_from_line_segment(code_segment) else: - end_offset = stripped_characters + len(stripped_line) + # Don't count the newline since the anchors only need to + # go up until the last character of the line. + end_offset = len(line.rstrip()) # show indicators if primary char doesn't span the frame line if end_offset - start_offset < len(stripped_line) or ( anchors and anchors.right_start_offset - anchors.left_end_offset > 0): + # When showing this on a terminal, some of the non-ASCII characters + # might be rendered as double-width characters, so we need to take + # that into account when calculating the length of the line. + dp_start_offset = _display_width(line, start_offset) + 1 + dp_end_offset = _display_width(line, end_offset) + 1 + row.append(' ') - row.append(' ' * (start_offset - stripped_characters)) + row.append(' ' * (dp_start_offset - stripped_characters)) if anchors: - row.append(anchors.primary_char * (anchors.left_end_offset)) - row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset)) - row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset)) + dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset) + dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset) + row.append(anchors.primary_char * dp_left_end_offset) + row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset)) + row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset)) else: - row.append('^' * (end_offset - start_offset)) + row.append('^' * (dp_end_offset - dp_start_offset)) row.append('\n') @@ -623,6 +633,25 @@ def _extract_caret_anchors_from_line_segment(segment): return None +_WIDE_CHAR_SPECIFIERS = "WF" + +def _display_width(line, offset): + """Calculate the extra amount of width space the given source + code segment might take if it were to be displayed on a fixed + width output device. Supports wide unicode characters and emojis.""" + + # Fast track for ASCII-only strings + if line.isascii(): + return offset + + import unicodedata + + return sum( + 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1 + for char in line[:offset] + ) + + class _ExceptionPrintContext: def __init__(self): diff --git a/Parser/pegen.c b/Parser/pegen.c index b9894dd..ff02e88 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -38,6 +38,61 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) return size; } +// Calculate the extra amount of width space the given source +// code segment might take if it were to be displayed on a fixed +// width output device. Supports wide unicode characters and emojis. +Py_ssize_t +_PyPegen_calculate_display_width(PyObject *line, Py_ssize_t character_offset) +{ + PyObject *segment = PyUnicode_Substring(line, 0, character_offset); + if (!segment) { + return -1; + } + + // Fast track for ascii strings + if (PyUnicode_IS_ASCII(segment)) { + Py_DECREF(segment); + return character_offset; + } + + PyObject *width_fn = _PyImport_GetModuleAttrString("unicodedata", "east_asian_width"); + if (!width_fn) { + return -1; + } + + Py_ssize_t width = 0; + Py_ssize_t len = PyUnicode_GET_LENGTH(segment); + for (Py_ssize_t i = 0; i < len; i++) { + PyObject *chr = PyUnicode_Substring(segment, i, i + 1); + if (!chr) { + Py_DECREF(segment); + Py_DECREF(width_fn); + return -1; + } + + PyObject *width_specifier = PyObject_CallOneArg(width_fn, chr); + Py_DECREF(chr); + if (!width_specifier) { + Py_DECREF(segment); + Py_DECREF(width_fn); + return -1; + } + + if (_PyUnicode_EqualToASCIIString(width_specifier, "W") || + _PyUnicode_EqualToASCIIString(width_specifier, "F")) { + width += 2; + } + else { + width += 1; + } + Py_DECREF(width_specifier); + } + + Py_DECREF(segment); + Py_DECREF(width_fn); + return width; +} + // Here, mark is the start of the node, while p->mark is the end. // If node==NULL, they should be the same. int diff --git a/Parser/pegen.h b/Parser/pegen.h index a8bfa78..268f380 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -151,6 +151,7 @@ expr_ty _PyPegen_name_token(Parser *p); expr_ty _PyPegen_number_token(Parser *p); void *_PyPegen_string_token(Parser *p); Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset); +Py_ssize_t _PyPegen_calculate_display_width(PyObject *segment, Py_ssize_t character_offset); // Error handling functions and APIs typedef enum { diff --git a/Python/traceback.c b/Python/traceback.c index 0070f15..4fc4881 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -900,8 +900,39 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen goto done; } - if (print_error_location_carets(f, truncation, start_offset, end_offset, - right_start_offset, left_end_offset, + // Convert all offsets to display offsets (e.g. the space they would take up if printed + // on the screen). + Py_ssize_t dp_start = _PyPegen_calculate_display_width(source_line, start_offset); + if (dp_start < 0) { + err = ignore_source_errors() < 0; + goto done; + } + + Py_ssize_t dp_end = _PyPegen_calculate_display_width(source_line, end_offset); + if (dp_end < 0) { + err = ignore_source_errors() < 0; + goto done; + } + + Py_ssize_t dp_left_end = -1; + Py_ssize_t dp_right_start = -1; + if (has_secondary_ranges) { + dp_left_end = _PyPegen_calculate_display_width(source_line, left_end_offset); + if (dp_left_end < 0) { + err = ignore_source_errors() < 0; + goto done; + } + + dp_right_start = _PyPegen_calculate_display_width(source_line, right_start_offset); + if (dp_right_start < 0) { + err = ignore_source_errors() < 0; + goto done; + } + } + + + if (print_error_location_carets(f, truncation, dp_start, dp_end, + dp_right_start, dp_left_end, primary_error_char, secondary_error_char) < 0) { err = -1; goto done; |