summaryrefslogtreecommitdiffstats
path: root/Tools/scripts
diff options
context:
space:
mode:
authorƁukasz Langa <lukasz@langa.pl>2022-10-04 22:31:16 (GMT)
committerGitHub <noreply@github.com>2022-10-04 22:31:16 (GMT)
commitbbc7cd649a6ef56eb09278f3e746ca89b9d592c9 (patch)
tree9b27714a75a9d5e550e68405af984b7c61c3176e /Tools/scripts
parent7acb93f0d44c6fb971fdb09b86f68896e3b1e2f8 (diff)
downloadcpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.zip
cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.gz
cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.bz2
gh-97008: Add a Python implementation of AttributeError and NameError suggestions (#97022)
Relevant tests moved from test_exceptions to test_traceback to be able to compare both implementations. Co-authored-by: Carl Friedrich Bolz-Tereick <cfbolz@gmx.de>
Diffstat (limited to 'Tools/scripts')
-rw-r--r--Tools/scripts/generate_levenshtein_examples.py70
1 files changed, 70 insertions, 0 deletions
diff --git a/Tools/scripts/generate_levenshtein_examples.py b/Tools/scripts/generate_levenshtein_examples.py
new file mode 100644
index 0000000..5a8360f
--- /dev/null
+++ b/Tools/scripts/generate_levenshtein_examples.py
@@ -0,0 +1,70 @@
+"""Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
+
+import argparse
+from functools import cache
+import json
+import os.path
+from random import choices, randrange
+
+
+# This should be in sync with Lib/traceback.py. It's not importing those values
+# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
+# build of Python.
+_MOVE_COST = 2
+_CASE_COST = 1
+
+
+def _substitution_cost(ch_a, ch_b):
+ if ch_a == ch_b:
+ return 0
+ if ch_a.lower() == ch_b.lower():
+ return _CASE_COST
+ return _MOVE_COST
+
+
+@cache
+def levenshtein(a, b):
+ if not a or not b:
+ return (len(a) + len(b)) * _MOVE_COST
+ option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
+ option2 = levenshtein(a[:-1], b) + _MOVE_COST
+ option3 = levenshtein(a, b[:-1]) + _MOVE_COST
+ return min(option1, option2, option3)
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('output_path', metavar='FILE', type=str)
+ parser.add_argument('--overwrite', dest='overwrite', action='store_const',
+ const=True, default=False,
+ help='overwrite an existing test file')
+
+ args = parser.parse_args()
+ output_path = os.path.realpath(args.output_path)
+ if not args.overwrite and os.path.isfile(output_path):
+ print(f"{output_path} already exists, skipping regeneration.")
+ print(
+ "To force, add --overwrite to the invocation of this tool or"
+ " delete the existing file."
+ )
+ return
+
+ examples = set()
+ # Create a lot of non-empty examples, which should end up with a Gauss-like
+ # distribution for even costs (moves) and odd costs (case substitutions).
+ while len(examples) < 9990:
+ a = ''.join(choices("abcABC", k=randrange(1, 10)))
+ b = ''.join(choices("abcABC", k=randrange(1, 10)))
+ expected = levenshtein(a, b)
+ examples.add((a, b, expected))
+ # Create one empty case each for strings between 0 and 9 in length.
+ for i in range(10):
+ b = ''.join(choices("abcABC", k=i))
+ expected = levenshtein("", b)
+ examples.add(("", b, expected))
+ with open(output_path, "w") as f:
+ json.dump(sorted(examples), f, indent=2)
+
+
+if __name__ == "__main__":
+ main()