diff options
author | Ćukasz Langa <lukasz@langa.pl> | 2022-10-04 22:31:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-04 22:31:16 (GMT) |
commit | bbc7cd649a6ef56eb09278f3e746ca89b9d592c9 (patch) | |
tree | 9b27714a75a9d5e550e68405af984b7c61c3176e /Lib/traceback.py | |
parent | 7acb93f0d44c6fb971fdb09b86f68896e3b1e2f8 (diff) | |
download | cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.zip cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.gz cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.bz2 |
gh-97008: Add a Python implementation of AttributeError and NameError suggestions (#97022)
Relevant tests moved from test_exceptions to test_traceback to be able to
compare both implementations.
Co-authored-by: Carl Friedrich Bolz-Tereick <cfbolz@gmx.de>
Diffstat (limited to 'Lib/traceback.py')
-rw-r--r-- | Lib/traceback.py | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/Lib/traceback.py b/Lib/traceback.py index b1a5fd0..c46ddaf 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -707,6 +707,11 @@ class TracebackException: self.offset = exc_value.offset self.end_offset = exc_value.end_offset self.msg = exc_value.msg + elif exc_type and issubclass(exc_type, (NameError, AttributeError)) and \ + getattr(exc_value, "name", None) is not None: + suggestion = _compute_suggestion_error(exc_value, exc_traceback) + if suggestion: + self._str += f". Did you mean: '{suggestion}'?" if lookup_lines: self._load_lines() self.__suppress_context__ = \ @@ -977,3 +982,125 @@ class TracebackException: file = sys.stderr for line in self.format(chain=chain): print(line, file=file, end="") + + +_MAX_CANDIDATE_ITEMS = 750 +_MAX_STRING_SIZE = 40 +_MOVE_COST = 2 +_CASE_COST = 1 + + +def _substitution_cost(ch_a, ch_b): + if ch_a == ch_b: + return 0 + if ch_a.lower() == ch_b.lower(): + return _CASE_COST + return _MOVE_COST + + +def _compute_suggestion_error(exc_value, tb): + wrong_name = getattr(exc_value, "name", None) + if wrong_name is None or not isinstance(wrong_name, str): + return None + if isinstance(exc_value, AttributeError): + obj = exc_value.obj + try: + d = dir(obj) + except Exception: + return None + else: + assert isinstance(exc_value, NameError) + # find most recent frame + if tb is None: + return None + while tb.tb_next is not None: + tb = tb.tb_next + frame = tb.tb_frame + d = ( + list(frame.f_locals) + + list(frame.f_globals) + + list(frame.f_globals['__builtins__']) + ) + if len(d) > _MAX_CANDIDATE_ITEMS: + return None + wrong_name_len = len(wrong_name) + if wrong_name_len > _MAX_STRING_SIZE: + return None + best_distance = wrong_name_len + suggestion = None + for possible_name in d: + if possible_name == wrong_name: + # A missing attribute is "found". Don't suggest it (see GH-88821). + continue + # No more than 1/3 of the involved characters should need changed. + max_distance = (len(possible_name) + wrong_name_len + 3) * _MOVE_COST // 6 + # Don't take matches we've already beaten. + max_distance = min(max_distance, best_distance - 1) + current_distance = _levenshtein_distance(wrong_name, possible_name, max_distance) + if current_distance > max_distance: + continue + if not suggestion or current_distance < best_distance: + suggestion = possible_name + best_distance = current_distance + return suggestion + + +def _levenshtein_distance(a, b, max_cost): + # A Python implementation of Python/suggestions.c:levenshtein_distance. + + # Both strings are the same + if a == b: + return 0 + + # Trim away common affixes + pre = 0 + while a[pre:] and b[pre:] and a[pre] == b[pre]: + pre += 1 + a = a[pre:] + b = b[pre:] + post = 0 + while a[:post or None] and b[:post or None] and a[post-1] == b[post-1]: + post -= 1 + a = a[:post or None] + b = b[:post or None] + if not a or not b: + return _MOVE_COST * (len(a) + len(b)) + if len(a) > _MAX_STRING_SIZE or len(b) > _MAX_STRING_SIZE: + return max_cost + 1 + + # Prefer shorter buffer + if len(b) < len(a): + a, b = b, a + + # Quick fail when a match is impossible + if (len(b) - len(a)) * _MOVE_COST > max_cost: + return max_cost + 1 + + # Instead of producing the whole traditional len(a)-by-len(b) + # matrix, we can update just one row in place. + # Initialize the buffer row + row = list(range(_MOVE_COST, _MOVE_COST * (len(a) + 1), _MOVE_COST)) + + result = 0 + for bindex in range(len(b)): + bchar = b[bindex] + distance = result = bindex * _MOVE_COST + minimum = sys.maxsize + for index in range(len(a)): + # 1) Previous distance in this row is cost(b[:b_index], a[:index]) + substitute = distance + _substitution_cost(bchar, a[index]) + # 2) cost(b[:b_index], a[:index+1]) from previous row + distance = row[index] + # 3) existing result is cost(b[:b_index+1], a[index]) + + insert_delete = min(result, distance) + _MOVE_COST + result = min(insert_delete, substitute) + + # cost(b[:b_index+1], a[:index+1]) + row[index] = result + if result < minimum: + minimum = result + if minimum > max_cost: + # Everything in this row is too big, so bail early. + return max_cost + 1 + return result |