diff options
author | Batuhan Taskaya <isidentical@gmail.com> | 2023-10-26 07:05:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-26 07:05:29 (GMT) |
commit | 78e6d72e38ef4b490f0098b644454031f20ae361 (patch) | |
tree | cfc9958fc63af33a501d96995342ed709fa6075e /Lib/traceback.py | |
parent | 90a1b2859f99a4b07da6c46b99759444e3cefbfa (diff) | |
download | cpython-78e6d72e38ef4b490f0098b644454031f20ae361.zip cpython-78e6d72e38ef4b490f0098b644454031f20ae361.tar.gz cpython-78e6d72e38ef4b490f0098b644454031f20ae361.tar.bz2 |
bpo-43950: handle wide unicode characters in tracebacks (#28150)
Diffstat (limited to 'Lib/traceback.py')
-rw-r--r-- | Lib/traceback.py | 53 |
1 files changed, 41 insertions, 12 deletions
diff --git a/Lib/traceback.py b/Lib/traceback.py index 4f0dff9..0d41c34 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -485,7 +485,8 @@ class StackSummary(list): stripped_line = frame_summary.line.strip() row.append(' {}\n'.format(stripped_line)) - orig_line_len = len(frame_summary._original_line) + line = frame_summary._original_line + orig_line_len = len(line) frame_line_len = len(frame_summary.line.lstrip()) stripped_characters = orig_line_len - frame_line_len if ( @@ -493,31 +494,40 @@ class StackSummary(list): and frame_summary.end_colno is not None ): start_offset = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.colno) + 1 + line, frame_summary.colno) end_offset = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.end_colno) + 1 + line, frame_summary.end_colno) + code_segment = line[start_offset:end_offset] anchors = None if frame_summary.lineno == frame_summary.end_lineno: with suppress(Exception): - anchors = _extract_caret_anchors_from_line_segment( - frame_summary._original_line[start_offset - 1:end_offset - 1] - ) + anchors = _extract_caret_anchors_from_line_segment(code_segment) else: - end_offset = stripped_characters + len(stripped_line) + # Don't count the newline since the anchors only need to + # go up until the last character of the line. + end_offset = len(line.rstrip()) # show indicators if primary char doesn't span the frame line if end_offset - start_offset < len(stripped_line) or ( anchors and anchors.right_start_offset - anchors.left_end_offset > 0): + # When showing this on a terminal, some of the non-ASCII characters + # might be rendered as double-width characters, so we need to take + # that into account when calculating the length of the line. + dp_start_offset = _display_width(line, start_offset) + 1 + dp_end_offset = _display_width(line, end_offset) + 1 + row.append(' ') - row.append(' ' * (start_offset - stripped_characters)) + row.append(' ' * (dp_start_offset - stripped_characters)) if anchors: - row.append(anchors.primary_char * (anchors.left_end_offset)) - row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset)) - row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset)) + dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset) + dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset) + row.append(anchors.primary_char * dp_left_end_offset) + row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset)) + row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset)) else: - row.append('^' * (end_offset - start_offset)) + row.append('^' * (dp_end_offset - dp_start_offset)) row.append('\n') @@ -638,6 +648,25 @@ def _extract_caret_anchors_from_line_segment(segment): return None +_WIDE_CHAR_SPECIFIERS = "WF" + +def _display_width(line, offset): + """Calculate the extra amount of width space the given source + code segment might take if it were to be displayed on a fixed + width output device. Supports wide unicode characters and emojis.""" + + # Fast track for ASCII-only strings + if line.isascii(): + return offset + + import unicodedata + + return sum( + 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1 + for char in line[:offset] + ) + + class _ExceptionPrintContext: def __init__(self): |