diff options
author | Ćukasz Langa <lukasz@langa.pl> | 2022-10-04 22:31:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-04 22:31:16 (GMT) |
commit | bbc7cd649a6ef56eb09278f3e746ca89b9d592c9 (patch) | |
tree | 9b27714a75a9d5e550e68405af984b7c61c3176e /Tools/scripts | |
parent | 7acb93f0d44c6fb971fdb09b86f68896e3b1e2f8 (diff) | |
download | cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.zip cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.gz cpython-bbc7cd649a6ef56eb09278f3e746ca89b9d592c9.tar.bz2 |
gh-97008: Add a Python implementation of AttributeError and NameError suggestions (#97022)
Relevant tests moved from test_exceptions to test_traceback to be able to
compare both implementations.
Co-authored-by: Carl Friedrich Bolz-Tereick <cfbolz@gmx.de>
Diffstat (limited to 'Tools/scripts')
-rw-r--r-- | Tools/scripts/generate_levenshtein_examples.py | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/Tools/scripts/generate_levenshtein_examples.py b/Tools/scripts/generate_levenshtein_examples.py new file mode 100644 index 0000000..5a8360f --- /dev/null +++ b/Tools/scripts/generate_levenshtein_examples.py @@ -0,0 +1,70 @@ +"""Generate 10,000 unique examples for the Levenshtein short-circuit tests.""" + +import argparse +from functools import cache +import json +import os.path +from random import choices, randrange + + +# This should be in sync with Lib/traceback.py. It's not importing those values +# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree +# build of Python. +_MOVE_COST = 2 +_CASE_COST = 1 + + +def _substitution_cost(ch_a, ch_b): + if ch_a == ch_b: + return 0 + if ch_a.lower() == ch_b.lower(): + return _CASE_COST + return _MOVE_COST + + +@cache +def levenshtein(a, b): + if not a or not b: + return (len(a) + len(b)) * _MOVE_COST + option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1]) + option2 = levenshtein(a[:-1], b) + _MOVE_COST + option3 = levenshtein(a, b[:-1]) + _MOVE_COST + return min(option1, option2, option3) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('output_path', metavar='FILE', type=str) + parser.add_argument('--overwrite', dest='overwrite', action='store_const', + const=True, default=False, + help='overwrite an existing test file') + + args = parser.parse_args() + output_path = os.path.realpath(args.output_path) + if not args.overwrite and os.path.isfile(output_path): + print(f"{output_path} already exists, skipping regeneration.") + print( + "To force, add --overwrite to the invocation of this tool or" + " delete the existing file." + ) + return + + examples = set() + # Create a lot of non-empty examples, which should end up with a Gauss-like + # distribution for even costs (moves) and odd costs (case substitutions). + while len(examples) < 9990: + a = ''.join(choices("abcABC", k=randrange(1, 10))) + b = ''.join(choices("abcABC", k=randrange(1, 10))) + expected = levenshtein(a, b) + examples.add((a, b, expected)) + # Create one empty case each for strings between 0 and 9 in length. + for i in range(10): + b = ''.join(choices("abcABC", k=i)) + expected = levenshtein("", b) + examples.add(("", b, expected)) + with open(output_path, "w") as f: + json.dump(sorted(examples), f, indent=2) + + +if __name__ == "__main__": + main() |