summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBatuhan Taskaya <isidentical@gmail.com>2022-10-29 14:12:15 (GMT)
committerGitHub <noreply@github.com>2022-10-29 14:12:15 (GMT)
commit751da28febfb0d01ddb8a9b4cb3256386e5f6a81 (patch)
tree37e977f1d9fb707efd35585e099b2cfd3cef3468
parent12957d7cbd07bbbb80e3371f5300aa80f1997cc7 (diff)
downloadcpython-751da28febfb0d01ddb8a9b4cb3256386e5f6a81.zip
cpython-751da28febfb0d01ddb8a9b4cb3256386e5f6a81.tar.gz
cpython-751da28febfb0d01ddb8a9b4cb3256386e5f6a81.tar.bz2
[3.11] gh-98744: Prevent column-level decoding crashes on traceback module (#98850)
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
-rw-r--r--Lib/test/test_traceback.py50
-rw-r--r--Lib/traceback.py25
-rw-r--r--Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst2
3 files changed, 63 insertions, 14 deletions
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 94ccc3f..6d15500 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -778,6 +778,56 @@ class TracebackErrorLocationCaretTests(unittest.TestCase):
]
self.assertEqual(actual, expected)
+ def test_wide_characters_unicode_with_problematic_byte_offset(self):
+ def f():
+ width
+
+ actual = self.get_exception(f)
+ expected = [
+ f"Traceback (most recent call last):",
+ f" File \"{__file__}\", line {self.callable_line}, in get_exception",
+ f" callable()",
+ f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
+ f" width",
+ ]
+ self.assertEqual(actual, expected)
+
+
+ def test_byte_offset_with_wide_characters_middle(self):
+ def f():
+ width = 1
+ raise ValueError(width)
+
+ actual = self.get_exception(f)
+ expected = [
+ f"Traceback (most recent call last):",
+ f" File \"{__file__}\", line {self.callable_line}, in get_exception",
+ f" callable()",
+ f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
+ f" raise ValueError(width)",
+ ]
+ self.assertEqual(actual, expected)
+
+ def test_byte_offset_multiline(self):
+ def f():
+ www = 1
+ th = 0
+
+ print(1, www(
+ th))
+
+ actual = self.get_exception(f)
+ expected = [
+ f"Traceback (most recent call last):",
+ f" File \"{__file__}\", line {self.callable_line}, in get_exception",
+ f" callable()",
+ f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
+ f" print(1, www(",
+ f" ^^^^",
+ ]
+ self.assertEqual(actual, expected)
+
+
@cpython_only
@requires_debug_ranges()
class CPythonTracebackErrorCaretTests(TracebackErrorLocationCaretTests):
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 55f8080..fa2cc34 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -475,32 +475,32 @@ class StackSummary(list):
frame_summary.colno is not None
and frame_summary.end_colno is not None
):
- colno = _byte_offset_to_character_offset(
- frame_summary._original_line, frame_summary.colno)
- end_colno = _byte_offset_to_character_offset(
- frame_summary._original_line, frame_summary.end_colno)
+ start_offset = _byte_offset_to_character_offset(
+ frame_summary._original_line, frame_summary.colno) + 1
+ end_offset = _byte_offset_to_character_offset(
+ frame_summary._original_line, frame_summary.end_colno) + 1
anchors = None
if frame_summary.lineno == frame_summary.end_lineno:
with suppress(Exception):
anchors = _extract_caret_anchors_from_line_segment(
- frame_summary._original_line[colno - 1:end_colno - 1]
+ frame_summary._original_line[start_offset - 1:end_offset - 1]
)
else:
- end_colno = stripped_characters + len(stripped_line)
+ end_offset = stripped_characters + len(stripped_line)
# show indicators if primary char doesn't span the frame line
- if end_colno - colno < len(stripped_line) or (
+ if end_offset - start_offset < len(stripped_line) or (
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
row.append(' ')
- row.append(' ' * (colno - stripped_characters))
+ row.append(' ' * (start_offset - stripped_characters))
if anchors:
row.append(anchors.primary_char * (anchors.left_end_offset))
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
- row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset))
+ row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
else:
- row.append('^' * (end_colno - colno))
+ row.append('^' * (end_offset - start_offset))
row.append('\n')
@@ -560,10 +560,7 @@ class StackSummary(list):
def _byte_offset_to_character_offset(str, offset):
as_utf8 = str.encode('utf-8')
- if offset > len(as_utf8):
- offset = len(as_utf8)
-
- return len(as_utf8[:offset + 1].decode("utf-8"))
+ return len(as_utf8[:offset].decode("utf-8", errors="replace"))
_Anchors = collections.namedtuple(
diff --git a/Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst b/Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst
new file mode 100644
index 0000000..cf99ea5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-10-28-23-44-17.gh-issue-98744.sGHDWm.rst
@@ -0,0 +1,2 @@
+Prevent crashing in :mod:`traceback` when retrieving the byte-offset for
+some source files that contain certain unicode characters.