diff options
author | Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> | 2022-08-19 09:20:44 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-19 09:20:44 (GMT) |
commit | 2d9f252c0c08bce0e776b38906c3bbb59a3bd2c5 (patch) | |
tree | 36a6f8b67f68ee593c55ad32d96a9ab46429a3b6 /Tools/unicode | |
parent | ee9f22d3464308566c63e972133ebf71b7664baa (diff) | |
download | cpython-2d9f252c0c08bce0e776b38906c3bbb59a3bd2c5.zip cpython-2d9f252c0c08bce0e776b38906c3bbb59a3bd2c5.tar.gz cpython-2d9f252c0c08bce0e776b38906c3bbb59a3bd2c5.tar.bz2 |
gh-96019: Fix caching of decompositions in makeunicodedata (GH-96020)
Diffstat (limited to 'Tools/unicode')
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 4894844..f28266f 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -169,6 +169,7 @@ def makeunicodedata(unicode, trace): # 2) decomposition data + decomp_data_cache = {} decomp_data = [0] decomp_prefix = [""] decomp_index = [0] * len(unicode.chars) @@ -207,12 +208,15 @@ def makeunicodedata(unicode, trace): comp_first[l] = 1 comp_last[r] = 1 comp_pairs.append((l,r,char)) - try: - i = decomp_data.index(decomp) - except ValueError: + key = tuple(decomp) + i = decomp_data_cache.get(key, -1) + if i == -1: i = len(decomp_data) decomp_data.extend(decomp) decomp_size = decomp_size + len(decomp) * 2 + decomp_data_cache[key] = i + else: + assert decomp_data[i:i+len(decomp)] == decomp else: i = 0 decomp_index[char] = i |