diff options
author | Batuhan Taskaya <isidentical@gmail.com> | 2022-10-29 14:12:15 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-29 14:12:15 (GMT) |
commit | 751da28febfb0d01ddb8a9b4cb3256386e5f6a81 (patch) | |
tree | 37e977f1d9fb707efd35585e099b2cfd3cef3468 | |
parent | 12957d7cbd07bbbb80e3371f5300aa80f1997cc7 (diff) | |
download | cpython-751da28febfb0d01ddb8a9b4cb3256386e5f6a81.zip cpython-751da28febfb0d01ddb8a9b4cb3256386e5f6a81.tar.gz cpython-751da28febfb0d01ddb8a9b4cb3256386e5f6a81.tar.bz2 |
[3.11] gh-98744: Prevent column-level decoding crashes on traceback module (#98850)
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
-rw-r--r-- | Lib/test/test_traceback.py | 50 | ||||
-rw-r--r-- | Lib/traceback.py | 25 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst | 2 |
3 files changed, 63 insertions, 14 deletions
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 94ccc3f..6d15500 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -778,6 +778,56 @@ class TracebackErrorLocationCaretTests(unittest.TestCase): ] self.assertEqual(actual, expected) + def test_wide_characters_unicode_with_problematic_byte_offset(self): + def f(): + width + + actual = self.get_exception(f) + expected = [ + f"Traceback (most recent call last):", + f" File \"{__file__}\", line {self.callable_line}, in get_exception", + f" callable()", + f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f", + f" width", + ] + self.assertEqual(actual, expected) + + + def test_byte_offset_with_wide_characters_middle(self): + def f(): + width = 1 + raise ValueError(width) + + actual = self.get_exception(f) + expected = [ + f"Traceback (most recent call last):", + f" File \"{__file__}\", line {self.callable_line}, in get_exception", + f" callable()", + f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f", + f" raise ValueError(width)", + ] + self.assertEqual(actual, expected) + + def test_byte_offset_multiline(self): + def f(): + www = 1 + th = 0 + + print(1, www( + th)) + + actual = self.get_exception(f) + expected = [ + f"Traceback (most recent call last):", + f" File \"{__file__}\", line {self.callable_line}, in get_exception", + f" callable()", + f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f", + f" print(1, www(", + f" ^^^^", + ] + self.assertEqual(actual, expected) + + @cpython_only @requires_debug_ranges() class CPythonTracebackErrorCaretTests(TracebackErrorLocationCaretTests): diff --git a/Lib/traceback.py b/Lib/traceback.py index 55f8080..fa2cc34 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -475,32 +475,32 @@ class StackSummary(list): frame_summary.colno is not None and frame_summary.end_colno is not None ): - colno = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.colno) - end_colno = _byte_offset_to_character_offset( - frame_summary._original_line, frame_summary.end_colno) + start_offset = _byte_offset_to_character_offset( + frame_summary._original_line, frame_summary.colno) + 1 + end_offset = _byte_offset_to_character_offset( + frame_summary._original_line, frame_summary.end_colno) + 1 anchors = None if frame_summary.lineno == frame_summary.end_lineno: with suppress(Exception): anchors = _extract_caret_anchors_from_line_segment( - frame_summary._original_line[colno - 1:end_colno - 1] + frame_summary._original_line[start_offset - 1:end_offset - 1] ) else: - end_colno = stripped_characters + len(stripped_line) + end_offset = stripped_characters + len(stripped_line) # show indicators if primary char doesn't span the frame line - if end_colno - colno < len(stripped_line) or ( + if end_offset - start_offset < len(stripped_line) or ( anchors and anchors.right_start_offset - anchors.left_end_offset > 0): row.append(' ') - row.append(' ' * (colno - stripped_characters)) + row.append(' ' * (start_offset - stripped_characters)) if anchors: row.append(anchors.primary_char * (anchors.left_end_offset)) row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset)) - row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset)) + row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset)) else: - row.append('^' * (end_colno - colno)) + row.append('^' * (end_offset - start_offset)) row.append('\n') @@ -560,10 +560,7 @@ class StackSummary(list): def _byte_offset_to_character_offset(str, offset): as_utf8 = str.encode('utf-8') - if offset > len(as_utf8): - offset = len(as_utf8) - - return len(as_utf8[:offset + 1].decode("utf-8")) + return len(as_utf8[:offset].decode("utf-8", errors="replace")) _Anchors = collections.namedtuple( diff --git a/Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst b/Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst new file mode 100644 index 0000000..cf99ea5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst @@ -0,0 +1,2 @@ +Prevent crashing in :mod:`traceback` when retrieving the byte-offset for +some source files that contain certain unicode characters. |