summaryrefslogtreecommitdiffstats
path: root/Lib/traceback.py
diff options
context:
space:
mode:
authorBatuhan Taskaya <isidentical@gmail.com>2023-10-26 07:05:29 (GMT)
committerGitHub <noreply@github.com>2023-10-26 07:05:29 (GMT)
commit78e6d72e38ef4b490f0098b644454031f20ae361 (patch)
treecfc9958fc63af33a501d96995342ed709fa6075e /Lib/traceback.py
parent90a1b2859f99a4b07da6c46b99759444e3cefbfa (diff)
downloadcpython-78e6d72e38ef4b490f0098b644454031f20ae361.zip
cpython-78e6d72e38ef4b490f0098b644454031f20ae361.tar.gz
cpython-78e6d72e38ef4b490f0098b644454031f20ae361.tar.bz2
bpo-43950: handle wide unicode characters in tracebacks (#28150)
Diffstat (limited to 'Lib/traceback.py')
-rw-r--r--Lib/traceback.py53
1 files changed, 41 insertions, 12 deletions
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 4f0dff9..0d41c34 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -485,7 +485,8 @@ class StackSummary(list):
stripped_line = frame_summary.line.strip()
row.append(' {}\n'.format(stripped_line))
- orig_line_len = len(frame_summary._original_line)
+ line = frame_summary._original_line
+ orig_line_len = len(line)
frame_line_len = len(frame_summary.line.lstrip())
stripped_characters = orig_line_len - frame_line_len
if (
@@ -493,31 +494,40 @@ class StackSummary(list):
and frame_summary.end_colno is not None
):
start_offset = _byte_offset_to_character_offset(
- frame_summary._original_line, frame_summary.colno) + 1
+ line, frame_summary.colno)
end_offset = _byte_offset_to_character_offset(
- frame_summary._original_line, frame_summary.end_colno) + 1
+ line, frame_summary.end_colno)
+ code_segment = line[start_offset:end_offset]
anchors = None
if frame_summary.lineno == frame_summary.end_lineno:
with suppress(Exception):
- anchors = _extract_caret_anchors_from_line_segment(
- frame_summary._original_line[start_offset - 1:end_offset - 1]
- )
+ anchors = _extract_caret_anchors_from_line_segment(code_segment)
else:
- end_offset = stripped_characters + len(stripped_line)
+ # Don't count the newline since the anchors only need to
+ # go up until the last character of the line.
+ end_offset = len(line.rstrip())
# show indicators if primary char doesn't span the frame line
if end_offset - start_offset < len(stripped_line) or (
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
+ # When showing this on a terminal, some of the non-ASCII characters
+ # might be rendered as double-width characters, so we need to take
+ # that into account when calculating the length of the line.
+ dp_start_offset = _display_width(line, start_offset) + 1
+ dp_end_offset = _display_width(line, end_offset) + 1
+
row.append(' ')
- row.append(' ' * (start_offset - stripped_characters))
+ row.append(' ' * (dp_start_offset - stripped_characters))
if anchors:
- row.append(anchors.primary_char * (anchors.left_end_offset))
- row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
- row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
+ dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset)
+ dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset)
+ row.append(anchors.primary_char * dp_left_end_offset)
+ row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset))
+ row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset))
else:
- row.append('^' * (end_offset - start_offset))
+ row.append('^' * (dp_end_offset - dp_start_offset))
row.append('\n')
@@ -638,6 +648,25 @@ def _extract_caret_anchors_from_line_segment(segment):
return None
+_WIDE_CHAR_SPECIFIERS = "WF"
+
+def _display_width(line, offset):
+ """Calculate the extra amount of width space the given source
+ code segment might take if it were to be displayed on a fixed
+ width output device. Supports wide unicode characters and emojis."""
+
+ # Fast track for ASCII-only strings
+ if line.isascii():
+ return offset
+
+ import unicodedata
+
+ return sum(
+ 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
+ for char in line[:offset]
+ )
+
+
class _ExceptionPrintContext:
def __init__(self):