summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_traceback.py59
-rw-r--r--Lib/traceback.py53
-rw-r--r--Parser/pegen.c55
-rw-r--r--Parser/pegen.h1
-rw-r--r--Python/traceback.c35
5 files changed, 187 insertions, 16 deletions
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index b9b0463..e0ef9e0 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -922,8 +922,63 @@ class TracebackErrorLocationCaretTestBase:
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
- " print(1, www(",
- " ^^^^",
+ f" print(1, www(",
+ f" ^^^^^^^",
+ ]
+ self.assertEqual(actual, expected)
+
+ def test_byte_offset_with_wide_characters_term_highlight(self):
+ def f():
+ 说明说明 = 1
+ şçöğıĤellö = 0 # not wide but still non-ascii
+ return 说明说明 / şçöğıĤellö
+
+ actual = self.get_exception(f)
+ expected = [
+ f"Traceback (most recent call last):",
+ f" File \"{__file__}\", line {self.callable_line}, in get_exception",
+ f" callable()",
+ f" File \"{__file__}\", line {f.__code__.co_firstlineno + 3}, in f",
+ f" return 说明说明 / şçöğıĤellö",
+ f" ~~~~~~~~~^~~~~~~~~~~~",
+ ]
+ self.assertEqual(actual, expected)
+
+ def test_byte_offset_with_emojis_term_highlight(self):
+ def f():
+ return "✨🐍" + func_说明说明("📗🚛",
+ "📗🚛") + "🐍"
+
+ actual = self.get_exception(f)
+ expected = [
+ f"Traceback (most recent call last):",
+ f" File \"{__file__}\", line {self.callable_line}, in get_exception",
+ f" callable()",
+ f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
+ f' return "✨🐍" + func_说明说明("📗🚛",',
+ f" ^^^^^^^^^^^^^",
+ ]
+ self.assertEqual(actual, expected)
+
+ def test_byte_offset_wide_chars_subscript(self):
+ def f():
+ my_dct = {
+ "✨🚛✨": {
+ "说明": {
+ "🐍🐍🐍": None
+ }
+ }
+ }
+ return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]
+
+ actual = self.get_exception(f)
+ expected = [
+ f"Traceback (most recent call last):",
+ f" File \"{__file__}\", line {self.callable_line}, in get_exception",
+ f" callable()",
+ f" File \"{__file__}\", line {f.__code__.co_firstlineno + 8}, in f",
+ f' return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]',
+ f" ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^",
]
self.assertEqual(actual, expected)
diff --git a/Lib/traceback.py b/Lib/traceback.py
index c1be659..f61d5db 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -470,7 +470,8 @@ class StackSummary(list):
stripped_line = frame_summary.line.strip()
row.append(' {}\n'.format(stripped_line))
- orig_line_len = len(frame_summary._original_line)
+ line = frame_summary._original_line
+ orig_line_len = len(line)
frame_line_len = len(frame_summary.line.lstrip())
stripped_characters = orig_line_len - frame_line_len
if (
@@ -478,31 +479,40 @@ class StackSummary(list):
and frame_summary.end_colno is not None
):
start_offset = _byte_offset_to_character_offset(
- frame_summary._original_line, frame_summary.colno) + 1
+ line, frame_summary.colno)
end_offset = _byte_offset_to_character_offset(
- frame_summary._original_line, frame_summary.end_colno) + 1
+ line, frame_summary.end_colno)
+ code_segment = line[start_offset:end_offset]
anchors = None
if frame_summary.lineno == frame_summary.end_lineno:
with suppress(Exception):
- anchors = _extract_caret_anchors_from_line_segment(
- frame_summary._original_line[start_offset - 1:end_offset - 1]
- )
+ anchors = _extract_caret_anchors_from_line_segment(code_segment)
else:
- end_offset = stripped_characters + len(stripped_line)
+ # Don't count the newline since the anchors only need to
+ # go up until the last character of the line.
+ end_offset = len(line.rstrip())
# show indicators if primary char doesn't span the frame line
if end_offset - start_offset < len(stripped_line) or (
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
+ # When showing this on a terminal, some of the non-ASCII characters
+ # might be rendered as double-width characters, so we need to take
+ # that into account when calculating the length of the line.
+ dp_start_offset = _display_width(line, start_offset) + 1
+ dp_end_offset = _display_width(line, end_offset) + 1
+
row.append(' ')
- row.append(' ' * (start_offset - stripped_characters))
+ row.append(' ' * (dp_start_offset - stripped_characters))
if anchors:
- row.append(anchors.primary_char * (anchors.left_end_offset))
- row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
- row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
+ dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset)
+ dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset)
+ row.append(anchors.primary_char * dp_left_end_offset)
+ row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset))
+ row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset))
else:
- row.append('^' * (end_offset - start_offset))
+ row.append('^' * (dp_end_offset - dp_start_offset))
row.append('\n')
@@ -623,6 +633,25 @@ def _extract_caret_anchors_from_line_segment(segment):
return None
+_WIDE_CHAR_SPECIFIERS = "WF"
+
+def _display_width(line, offset):
+ """Calculate the extra amount of width space the given source
+ code segment might take if it were to be displayed on a fixed
+ width output device. Supports wide unicode characters and emojis."""
+
+ # Fast track for ASCII-only strings
+ if line.isascii():
+ return offset
+
+ import unicodedata
+
+ return sum(
+ 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
+ for char in line[:offset]
+ )
+
+
class _ExceptionPrintContext:
def __init__(self):
diff --git a/Parser/pegen.c b/Parser/pegen.c
index b9894dd..ff02e88 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -38,6 +38,61 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
return size;
}
+// Calculate the extra amount of width space the given source
+// code segment might take if it were to be displayed on a fixed
+// width output device. Supports wide unicode characters and emojis.
+Py_ssize_t
+_PyPegen_calculate_display_width(PyObject *line, Py_ssize_t character_offset)
+{
+ PyObject *segment = PyUnicode_Substring(line, 0, character_offset);
+ if (!segment) {
+ return -1;
+ }
+
+ // Fast track for ascii strings
+ if (PyUnicode_IS_ASCII(segment)) {
+ Py_DECREF(segment);
+ return character_offset;
+ }
+
+ PyObject *width_fn = _PyImport_GetModuleAttrString("unicodedata", "east_asian_width");
+ if (!width_fn) {
+ return -1;
+ }
+
+ Py_ssize_t width = 0;
+ Py_ssize_t len = PyUnicode_GET_LENGTH(segment);
+ for (Py_ssize_t i = 0; i < len; i++) {
+ PyObject *chr = PyUnicode_Substring(segment, i, i + 1);
+ if (!chr) {
+ Py_DECREF(segment);
+ Py_DECREF(width_fn);
+ return -1;
+ }
+
+ PyObject *width_specifier = PyObject_CallOneArg(width_fn, chr);
+ Py_DECREF(chr);
+ if (!width_specifier) {
+ Py_DECREF(segment);
+ Py_DECREF(width_fn);
+ return -1;
+ }
+
+ if (_PyUnicode_EqualToASCIIString(width_specifier, "W") ||
+ _PyUnicode_EqualToASCIIString(width_specifier, "F")) {
+ width += 2;
+ }
+ else {
+ width += 1;
+ }
+ Py_DECREF(width_specifier);
+ }
+
+ Py_DECREF(segment);
+ Py_DECREF(width_fn);
+ return width;
+}
+
// Here, mark is the start of the node, while p->mark is the end.
// If node==NULL, they should be the same.
int
diff --git a/Parser/pegen.h b/Parser/pegen.h
index a8bfa78..268f380 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -151,6 +151,7 @@ expr_ty _PyPegen_name_token(Parser *p);
expr_ty _PyPegen_number_token(Parser *p);
void *_PyPegen_string_token(Parser *p);
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
+Py_ssize_t _PyPegen_calculate_display_width(PyObject *segment, Py_ssize_t character_offset);
// Error handling functions and APIs
typedef enum {
diff --git a/Python/traceback.c b/Python/traceback.c
index 0070f15..4fc4881 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -900,8 +900,39 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
goto done;
}
- if (print_error_location_carets(f, truncation, start_offset, end_offset,
- right_start_offset, left_end_offset,
+ // Convert all offsets to display offsets (e.g. the space they would take up if printed
+ // on the screen).
+ Py_ssize_t dp_start = _PyPegen_calculate_display_width(source_line, start_offset);
+ if (dp_start < 0) {
+ err = ignore_source_errors() < 0;
+ goto done;
+ }
+
+ Py_ssize_t dp_end = _PyPegen_calculate_display_width(source_line, end_offset);
+ if (dp_end < 0) {
+ err = ignore_source_errors() < 0;
+ goto done;
+ }
+
+ Py_ssize_t dp_left_end = -1;
+ Py_ssize_t dp_right_start = -1;
+ if (has_secondary_ranges) {
+ dp_left_end = _PyPegen_calculate_display_width(source_line, left_end_offset);
+ if (dp_left_end < 0) {
+ err = ignore_source_errors() < 0;
+ goto done;
+ }
+
+ dp_right_start = _PyPegen_calculate_display_width(source_line, right_start_offset);
+ if (dp_right_start < 0) {
+ err = ignore_source_errors() < 0;
+ goto done;
+ }
+ }
+
+
+ if (print_error_location_carets(f, truncation, dp_start, dp_end,
+ dp_right_start, dp_left_end,
primary_error_char, secondary_error_char) < 0) {
err = -1;
goto done;