diff options
Diffstat (limited to 'Lib/traceback.py')
| -rw-r--r-- | Lib/traceback.py | 368 |
1 files changed, 286 insertions, 82 deletions
diff --git a/Lib/traceback.py b/Lib/traceback.py index 5d83f85..a0485a7 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -274,7 +274,7 @@ class FrameSummary: """ __slots__ = ('filename', 'lineno', 'end_lineno', 'colno', 'end_colno', - 'name', '_line', 'locals') + 'name', '_lines', '_lines_dedented', 'locals') def __init__(self, filename, lineno, name, *, lookup_line=True, locals=None, line=None, @@ -290,15 +290,16 @@ class FrameSummary: """ self.filename = filename self.lineno = lineno + self.end_lineno = lineno if end_lineno is None else end_lineno + self.colno = colno + self.end_colno = end_colno self.name = name - self._line = line + self._lines = line + self._lines_dedented = None if lookup_line: self.line self.locals = {k: _safe_string(v, 'local', func=repr) for k, v in locals.items()} if locals else None - self.end_lineno = end_lineno - self.colno = colno - self.end_colno = end_colno def __eq__(self, other): if isinstance(other, FrameSummary): @@ -323,19 +324,39 @@ class FrameSummary: def __len__(self): return 4 + def _set_lines(self): + if ( + self._lines is None + and self.lineno is not None + and self.end_lineno is not None + ): + lines = [] + for lineno in range(self.lineno, self.end_lineno + 1): + # treat errors (empty string) and empty lines (newline) as the same + lines.append(linecache.getline(self.filename, lineno).rstrip()) + self._lines = "\n".join(lines) + "\n" + @property - def _original_line(self): + def _original_lines(self): # Returns the line as-is from the source, without modifying whitespace. - self.line - return self._line + self._set_lines() + return self._lines + + @property + def _dedented_lines(self): + # Returns _original_lines, but dedented + self._set_lines() + if self._lines_dedented is None and self._lines is not None: + self._lines_dedented = textwrap.dedent(self._lines) + return self._lines_dedented @property def line(self): - if self._line is None: - if self.lineno is None: - return None - self._line = linecache.getline(self.filename, self.lineno) - return self._line.strip() + self._set_lines() + if self._lines is None: + return None + # return only the first line, stripped + return self._lines.partition("\n")[0].strip() def walk_stack(f): @@ -487,56 +508,135 @@ class StackSummary(list): filename = "<stdin>" row.append(' File "{}", line {}, in {}\n'.format( filename, frame_summary.lineno, frame_summary.name)) - if frame_summary.line: - stripped_line = frame_summary.line.strip() - row.append(' {}\n'.format(stripped_line)) - - line = frame_summary._original_line - orig_line_len = len(line) - frame_line_len = len(frame_summary.line.lstrip()) - stripped_characters = orig_line_len - frame_line_len + if frame_summary._dedented_lines and frame_summary._dedented_lines.strip(): if ( - frame_summary.colno is not None - and frame_summary.end_colno is not None + frame_summary.colno is None or + frame_summary.end_colno is None ): - start_offset = _byte_offset_to_character_offset( - line, frame_summary.colno) - end_offset = _byte_offset_to_character_offset( - line, frame_summary.end_colno) - code_segment = line[start_offset:end_offset] + # only output first line if column information is missing + row.append(textwrap.indent(frame_summary.line, ' ') + "\n") + else: + # get first and last line + all_lines_original = frame_summary._original_lines.splitlines() + first_line = all_lines_original[0] + # assume all_lines_original has enough lines (since we constructed it) + last_line = all_lines_original[frame_summary.end_lineno - frame_summary.lineno] + + # character index of the start/end of the instruction + start_offset = _byte_offset_to_character_offset(first_line, frame_summary.colno) + end_offset = _byte_offset_to_character_offset(last_line, frame_summary.end_colno) + + all_lines = frame_summary._dedented_lines.splitlines()[ + :frame_summary.end_lineno - frame_summary.lineno + 1 + ] + # adjust start/end offset based on dedent + dedent_characters = len(first_line) - len(all_lines[0]) + start_offset = max(0, start_offset - dedent_characters) + end_offset = max(0, end_offset - dedent_characters) + + # When showing this on a terminal, some of the non-ASCII characters + # might be rendered as double-width characters, so we need to take + # that into account when calculating the length of the line. + dp_start_offset = _display_width(all_lines[0], offset=start_offset) + dp_end_offset = _display_width(all_lines[-1], offset=end_offset) + + # get exact code segment corresponding to the instruction + segment = "\n".join(all_lines) + segment = segment[start_offset:len(segment) - (len(all_lines[-1]) - end_offset)] + + # attempt to parse for anchors anchors = None - if frame_summary.lineno == frame_summary.end_lineno: - with suppress(Exception): - anchors = _extract_caret_anchors_from_line_segment(code_segment) - else: - # Don't count the newline since the anchors only need to - # go up until the last character of the line. - end_offset = len(line.rstrip()) - - # show indicators if primary char doesn't span the frame line - if end_offset - start_offset < len(stripped_line) or ( - anchors and anchors.right_start_offset - anchors.left_end_offset > 0): - # When showing this on a terminal, some of the non-ASCII characters - # might be rendered as double-width characters, so we need to take - # that into account when calculating the length of the line. - dp_start_offset = _display_width(line, start_offset) + 1 - dp_end_offset = _display_width(line, end_offset) + 1 - - row.append(' ') - row.append(' ' * (dp_start_offset - stripped_characters)) - - if anchors: - dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset) - dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset) - row.append(anchors.primary_char * dp_left_end_offset) - row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset)) - row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset)) - else: - row.append('^' * (dp_end_offset - dp_start_offset)) + with suppress(Exception): + anchors = _extract_caret_anchors_from_line_segment(segment) + + # only use carets if there are anchors or the carets do not span all lines + show_carets = False + if anchors or all_lines[0][:start_offset].lstrip() or all_lines[-1][end_offset:].rstrip(): + show_carets = True + + result = [] + + # only display first line, last line, and lines around anchor start/end + significant_lines = {0, len(all_lines) - 1} + + anchors_left_end_offset = 0 + anchors_right_start_offset = 0 + primary_char = "^" + secondary_char = "^" + if anchors: + anchors_left_end_offset = anchors.left_end_offset + anchors_right_start_offset = anchors.right_start_offset + # computed anchor positions do not take start_offset into account, + # so account for it here + if anchors.left_end_lineno == 0: + anchors_left_end_offset += start_offset + if anchors.right_start_lineno == 0: + anchors_right_start_offset += start_offset + + # account for display width + anchors_left_end_offset = _display_width( + all_lines[anchors.left_end_lineno], offset=anchors_left_end_offset + ) + anchors_right_start_offset = _display_width( + all_lines[anchors.right_start_lineno], offset=anchors_right_start_offset + ) - row.append('\n') + primary_char = anchors.primary_char + secondary_char = anchors.secondary_char + significant_lines.update( + range(anchors.left_end_lineno - 1, anchors.left_end_lineno + 2) + ) + significant_lines.update( + range(anchors.right_start_lineno - 1, anchors.right_start_lineno + 2) + ) + # remove bad line numbers + significant_lines.discard(-1) + significant_lines.discard(len(all_lines)) + + def output_line(lineno): + """output all_lines[lineno] along with carets""" + result.append(all_lines[lineno] + "\n") + if not show_carets: + return + num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip()) + carets = [] + num_carets = dp_end_offset if lineno == len(all_lines) - 1 else _display_width(all_lines[lineno]) + # compute caret character for each position + for col in range(num_carets): + if col < num_spaces or (lineno == 0 and col < dp_start_offset): + # before first non-ws char of the line, or before start of instruction + carets.append(' ') + elif anchors and ( + lineno > anchors.left_end_lineno or + (lineno == anchors.left_end_lineno and col >= anchors_left_end_offset) + ) and ( + lineno < anchors.right_start_lineno or + (lineno == anchors.right_start_lineno and col < anchors_right_start_offset) + ): + # within anchors + carets.append(secondary_char) + else: + carets.append(primary_char) + result.append("".join(carets) + "\n") + + # display significant lines + sig_lines_list = sorted(significant_lines) + for i, lineno in enumerate(sig_lines_list): + if i: + linediff = lineno - sig_lines_list[i - 1] + if linediff == 2: + # 1 line in between - just output it + output_line(lineno - 1) + elif linediff > 2: + # > 1 line in between - abbreviate + result.append(f"...<{linediff - 1} lines>...\n") + output_line(lineno) + + row.append( + textwrap.indent(textwrap.dedent("".join(result)), ' ', lambda line: True) + ) if frame_summary.locals: for name, value in sorted(frame_summary.locals.items()): row.append(' {name} = {value}\n'.format(name=name, value=value)) @@ -599,7 +699,9 @@ def _byte_offset_to_character_offset(str, offset): _Anchors = collections.namedtuple( "_Anchors", [ + "left_end_lineno", "left_end_offset", + "right_start_lineno", "right_start_offset", "primary_char", "secondary_char", @@ -608,59 +710,161 @@ _Anchors = collections.namedtuple( ) def _extract_caret_anchors_from_line_segment(segment): + """ + Given source code `segment` corresponding to a FrameSummary, determine: + - for binary ops, the location of the binary op + - for indexing and function calls, the location of the brackets. + `segment` is expected to be a valid Python expression. + """ import ast try: - tree = ast.parse(segment) + # Without parentheses, `segment` is parsed as a statement. + # Binary ops, subscripts, and calls are expressions, so + # we can wrap them with parentheses to parse them as + # (possibly multi-line) expressions. + # e.g. if we try to highlight the addition in + # x = ( + # a + + # b + # ) + # then we would ast.parse + # a + + # b + # which is not a valid statement because of the newline. + # Adding brackets makes it a valid expression. + # ( + # a + + # b + # ) + # Line locations will be different than the original, + # which is taken into account later on. + tree = ast.parse(f"(\n{segment}\n)") except SyntaxError: return None if len(tree.body) != 1: return None - normalize = lambda offset: _byte_offset_to_character_offset(segment, offset) + lines = segment.splitlines() + + def normalize(lineno, offset): + """Get character index given byte offset""" + return _byte_offset_to_character_offset(lines[lineno], offset) + + def next_valid_char(lineno, col): + """Gets the next valid character index in `lines`, if + the current location is not valid. Handles empty lines. + """ + while lineno < len(lines) and col >= len(lines[lineno]): + col = 0 + lineno += 1 + assert lineno < len(lines) and col < len(lines[lineno]) + return lineno, col + + def increment(lineno, col): + """Get the next valid character index in `lines`.""" + col += 1 + lineno, col = next_valid_char(lineno, col) + return lineno, col + + def nextline(lineno, col): + """Get the next valid character at least on the next line""" + col = 0 + lineno += 1 + lineno, col = next_valid_char(lineno, col) + return lineno, col + + def increment_until(lineno, col, stop): + """Get the next valid non-"\\#" character that satisfies the `stop` predicate""" + while True: + ch = lines[lineno][col] + if ch in "\\#": + lineno, col = nextline(lineno, col) + elif not stop(ch): + lineno, col = increment(lineno, col) + else: + break + return lineno, col + + def setup_positions(expr, force_valid=True): + """Get the lineno/col position of the end of `expr`. If `force_valid` is True, + forces the position to be a valid character (e.g. if the position is beyond the + end of the line, move to the next line) + """ + # -2 since end_lineno is 1-indexed and because we added an extra + # bracket + newline to `segment` when calling ast.parse + lineno = expr.end_lineno - 2 + col = normalize(lineno, expr.end_col_offset) + return next_valid_char(lineno, col) if force_valid else (lineno, col) + statement = tree.body[0] match statement: case ast.Expr(expr): match expr: case ast.BinOp(): - operator_start = normalize(expr.left.end_col_offset) - operator_end = normalize(expr.right.col_offset) - operator_str = segment[operator_start:operator_end] - operator_offset = len(operator_str) - len(operator_str.lstrip()) + # ast gives these locations for BinOp subexpressions + # ( left_expr ) + ( right_expr ) + # left^^^^^ right^^^^^ + lineno, col = setup_positions(expr.left) - left_anchor = expr.left.end_col_offset + operator_offset - right_anchor = left_anchor + 1 + # First operator character is the first non-space/')' character + lineno, col = increment_until(lineno, col, lambda x: not x.isspace() and x != ')') + + # binary op is 1 or 2 characters long, on the same line, + # before the right subexpression + right_col = col + 1 if ( - operator_offset + 1 < len(operator_str) - and not operator_str[operator_offset + 1].isspace() + right_col < len(lines[lineno]) + and ( + # operator char should not be in the right subexpression + expr.right.lineno - 2 > lineno or + right_col < normalize(expr.right.lineno - 2, expr.right.col_offset) + ) + and not (ch := lines[lineno][right_col]).isspace() + and ch not in "\\#" ): - right_anchor += 1 + right_col += 1 - while left_anchor < len(segment) and ((ch := segment[left_anchor]).isspace() or ch in ")#"): - left_anchor += 1 - right_anchor += 1 - return _Anchors(normalize(left_anchor), normalize(right_anchor)) + # right_col can be invalid since it is exclusive + return _Anchors(lineno, col, lineno, right_col) case ast.Subscript(): - left_anchor = normalize(expr.value.end_col_offset) - right_anchor = normalize(expr.slice.end_col_offset + 1) - while left_anchor < len(segment) and ((ch := segment[left_anchor]).isspace() or ch != "["): - left_anchor += 1 - while right_anchor < len(segment) and ((ch := segment[right_anchor]).isspace() or ch != "]"): - right_anchor += 1 - if right_anchor < len(segment): - right_anchor += 1 - return _Anchors(left_anchor, right_anchor) + # ast gives these locations for value and slice subexpressions + # ( value_expr ) [ slice_expr ] + # value^^^^^ slice^^^^^ + # subscript^^^^^^^^^^^^^^^^^^^^ + + # find left bracket + left_lineno, left_col = setup_positions(expr.value) + left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '[') + # find right bracket (final character of expression) + right_lineno, right_col = setup_positions(expr, force_valid=False) + return _Anchors(left_lineno, left_col, right_lineno, right_col) + case ast.Call(): + # ast gives these locations for function call expressions + # ( func_expr ) (args, kwargs) + # func^^^^^ + # call^^^^^^^^^^^^^^^^^^^^^^^^ + + # find left bracket + left_lineno, left_col = setup_positions(expr.func) + left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '(') + # find right bracket (final character of expression) + right_lineno, right_col = setup_positions(expr, force_valid=False) + return _Anchors(left_lineno, left_col, right_lineno, right_col) return None _WIDE_CHAR_SPECIFIERS = "WF" -def _display_width(line, offset): +def _display_width(line, offset=None): """Calculate the extra amount of width space the given source code segment might take if it were to be displayed on a fixed width output device. Supports wide unicode characters and emojis.""" + if offset is None: + offset = len(line) + # Fast track for ASCII-only strings if line.isascii(): return offset |
