gh-97008: Add a Python implementation of AttributeError and NameError suggestions (#97022)

Relevant tests moved from test_exceptions to test_traceback to be able to compare both implementations. Co-authored-by: Carl Friedrich Bolz-Tereick <cfbolz@gmx.de>
author: Łukasz Langa <lukasz@langa.pl> 2022-10-04 22:31:16 (GMT)
committer: GitHub <noreply@github.com> 2022-10-04 22:31:16 (GMT)
commit: bbc7cd649a6ef56eb09278f3e746ca89b9d592c9 (patch)
tree: 9b27714a75a9d5e550e68405af984b7c61c3176e /Tools/scripts
parent: 7acb93f0d44c6fb971fdb09b86f68896e3b1e2f8 (diff)
download: cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.zip
cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.gz
cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.bz2
1 files changed, 70 insertions, 0 deletions
diff --git a/Tools/scripts/generate_levenshtein_examples.py b/Tools/scripts/generate_levenshtein_examples.py
new file mode 100644
index 0000000..5a8360f
--- /dev/null
+++ b/Tools/scripts/generate_levenshtein_examples.py
@@ -0,0 +1,70 @@
+"""Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
+
+import argparse
+from functools import cache
+import json
+import os.path
+from random import choices, randrange
+
+
+# This should be in sync with Lib/traceback.py.  It's not importing those values
+# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
+# build of Python.
+_MOVE_COST = 2
+_CASE_COST = 1
+
+
+def _substitution_cost(ch_a, ch_b):
+    if ch_a == ch_b:
+        return 0
+    if ch_a.lower() == ch_b.lower():
+        return _CASE_COST
+    return _MOVE_COST
+
+
+@cache
+def levenshtein(a, b):
+    if not a or not b:
+        return (len(a) + len(b)) * _MOVE_COST
+    option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
+    option2 = levenshtein(a[:-1], b) + _MOVE_COST
+    option3 = levenshtein(a, b[:-1]) + _MOVE_COST
+    return min(option1, option2, option3)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('output_path', metavar='FILE', type=str)
+    parser.add_argument('--overwrite', dest='overwrite', action='store_const',
+                        const=True, default=False,
+                        help='overwrite an existing test file')
+
+    args = parser.parse_args()
+    output_path = os.path.realpath(args.output_path)
+    if not args.overwrite and os.path.isfile(output_path):
+        print(f"{output_path} already exists, skipping regeneration.")
+        print(
+            "To force, add --overwrite to the invocation of this tool or"
+            " delete the existing file."
+        )
+        return
+
+    examples = set()
+    # Create a lot of non-empty examples, which should end up with a Gauss-like
+    # distribution for even costs (moves) and odd costs (case substitutions).
+    while len(examples) < 9990:
+        a = ''.join(choices("abcABC", k=randrange(1, 10)))
+        b = ''.join(choices("abcABC", k=randrange(1, 10)))
+        expected = levenshtein(a, b)
+        examples.add((a, b, expected))
+    # Create one empty case each for strings between 0 and 9 in length.
+    for i in range(10):
+        b = ''.join(choices("abcABC", k=i))
+        expected = levenshtein("", b)
+        examples.add(("", b, expected))
+    with open(output_path, "w") as f:
+        json.dump(sorted(examples), f, indent=2)
+
+
+if __name__ == "__main__":
+    main()
author	Łukasz Langa <lukasz@langa.pl>	2022-10-04 22:31:16 (GMT)
committer	GitHub <noreply@github.com>	2022-10-04 22:31:16 (GMT)
commit	bbc7cd649a6ef56eb09278f3e746ca89b9d592c9 (patch)
tree	9b27714a75a9d5e550e68405af984b7c61c3176e /Tools/scripts
parent	7acb93f0d44c6fb971fdb09b86f68896e3b1e2f8 (diff)
download	cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.zip cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.gz cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.bz2