diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2022-02-15 00:36:51 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-15 00:36:51 (GMT) |
commit | 12360aa159c42c7798fd14225d271e6fd84db7eb (patch) | |
tree | ad2b4f0d67c90a5a0be4ff532082fc3468d6d65b /Tools/scripts/generate_global_objects.py | |
parent | 278fdd3e3a2492665b2c2888fd2f428f7f59a3f5 (diff) | |
download | cpython-12360aa159c42c7798fd14225d271e6fd84db7eb.zip cpython-12360aa159c42c7798fd14225d271e6fd84db7eb.tar.gz cpython-12360aa159c42c7798fd14225d271e6fd84db7eb.tar.bz2 |
bpo-46541: Discover the global strings. (gh-31346)
Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files.
This is partly inspired by gh-31261.
https://bugs.python.org/issue46541
Diffstat (limited to 'Tools/scripts/generate_global_objects.py')
-rw-r--r-- | Tools/scripts/generate_global_objects.py | 329 |
1 files changed, 74 insertions, 255 deletions
diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index 7306889..e989f3c 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -13,298 +13,112 @@ INTERNAL = os.path.join(ROOT, 'Include', 'internal') STRING_LITERALS = { 'empty': '', 'dot': '.', - 'comma_sep': ', ', - 'percent': '%', - 'dbl_percent': '%%', - - '"anonymous" labels': None, - 'anon_dictcomp': '<dictcomp>', - 'anon_genexpr': '<genexpr>', - 'anon_lambda': '<lambda>', - 'anon_listcomp': '<listcomp>', - 'anon_module': '<module>', - 'anon_setcomp': '<setcomp>', - 'anon_string': '<string>', - 'dot_locals': '.<locals>', +} +IGNORED = { + 'ACTION', # Python/_warnings.c + 'ATTR', # Python/_warnings.c and Objects/funcobject.c + 'DUNDER', # Objects/typeobject.c + 'RDUNDER', # Objects/typeobject.c + 'SPECIAL', # Objects/weakrefobject.c } IDENTIFIERS = [ - 'Py_Repr', - 'TextIOWrapper', + # from ADD() Python/_warnings.c + 'default', + 'ignore', + + # from GET_WARNINGS_ATTR() in Python/_warnings.c 'WarningMessage', - '_', - '__IOBase_closed', - '__abc_tpflags__', + '_showwarnmsg', + '_warn_unawaited_coroutine', + 'defaultaction', + 'filters', + 'onceregistry', + + # from WRAP_METHOD() in Objects/weakrefobject.c + '__bytes__', + '__reversed__', + + # from COPY_ATTR() in Objects/funcobject.c + '__module__', + '__name__', + '__qualname__', + '__doc__', + '__annotations__', + + # from SLOT* in Objects/typeobject.c '__abs__', - '__abstractmethods__', '__add__', - '__aenter__', - '__aexit__', - '__aiter__', - '__all__', '__and__', - '__anext__', - '__annotations__', - '__args__', - '__await__', - '__bases__', - '__bool__', - '__build_class__', - '__builtins__', - '__bytes__', - '__call__', - '__cantrace__', - '__class__', - '__class_getitem__', - '__classcell__', - '__complex__', - '__contains__', - '__copy__', - '__del__', - '__delattr__', - '__delete__', - '__delitem__', - '__dict__', - '__dir__', '__divmod__', - '__doc__', - '__enter__', - '__eq__', - '__exit__', - '__file__', '__float__', '__floordiv__', - '__format__', - '__fspath__', - '__ge__', - '__get__', - '__getattr__', - '__getattribute__', - '__getinitargs__', '__getitem__', - '__getnewargs__', - '__getnewargs_ex__', - '__getstate__', - '__gt__', - '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', - '__import__', '__imul__', - '__index__', - '__init__', - '__init_subclass__', - '__instancecheck__', '__int__', '__invert__', '__ior__', - '__ipow__', '__irshift__', - '__isabstractmethod__', '__isub__', - '__iter__', '__itruediv__', '__ixor__', - '__le__', - '__len__', - '__length_hint__', - '__loader__', '__lshift__', - '__lt__', - '__ltrace__', - '__main__', '__matmul__', - '__missing__', '__mod__', - '__module__', - '__mro_entries__', '__mul__', - '__name__', - '__ne__', '__neg__', - '__new__', - '__newobj__', - '__newobj_ex__', - '__next__', - '__note__', '__or__', - '__origin__', - '__package__', - '__parameters__', - '__path__', '__pos__', '__pow__', - '__prepare__', - '__qualname__', '__radd__', '__rand__', '__rdivmod__', - '__reduce__', - '__reduce_ex__', - '__repr__', - '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__', '__rmod__', '__rmul__', '__ror__', - '__round__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__', '__rxor__', - '__set__', - '__set_name__', - '__setattr__', - '__setitem__', - '__setstate__', - '__sizeof__', - '__slotnames__', - '__slots__', - '__spec__', '__str__', '__sub__', - '__subclasscheck__', - '__subclasshook__', '__truediv__', - '__trunc__', - '__warningregistry__', - '__weakref__', '__xor__', - '_abc_impl', - '_blksize', - '_dealloc_warn', - '_finalizing', - '_find_and_load', - '_fix_up_module', - '_get_sourcefile', - '_handle_fromlist', - '_initializing', - '_is_text_encoding', - '_lock_unlock_module', - '_showwarnmsg', - '_shutdown', - '_slotnames', - '_strptime_time', - '_uninitialized_submodules', - '_warn_unawaited_coroutine', - '_xoptions', - 'add', - 'append', - 'big', - 'buffer', - 'builtins', - 'clear', - 'close', - 'code', - 'copy', - 'copyreg', - 'decode', - 'default', - 'defaultaction', - 'difference_update', - 'dispatch_table', - 'displayhook', - 'enable', - 'encoding', - 'end_lineno', - 'end_offset', - 'errors', - 'excepthook', - 'extend', - 'filename', - 'fileno', - 'fillvalue', - 'filters', - 'find_class', - 'flush', - 'get', - 'get_source', - 'getattr', - 'ignore', - 'importlib', - 'intersection', - 'isatty', - 'items', - 'iter', - 'keys', - 'last_traceback', - 'last_type', - 'last_value', - 'latin1', - 'lineno', - 'little', - 'match', - 'metaclass', - 'mode', - 'modules', - 'mro', - 'msg', - 'n_fields', - 'n_sequence_fields', - 'n_unnamed_fields', - 'name', - 'obj', - 'offset', - 'onceregistry', - 'open', - 'parent', - 'partial', - 'path', - 'peek', - 'persistent_id', - 'persistent_load', - 'print_file_and_line', - 'ps1', - 'ps2', - 'raw', - 'read', - 'read1', - 'readable', - 'readall', - 'readinto', - 'readinto1', - 'readline', - 'reducer_override', - 'reload', - 'replace', - 'reset', - 'return', - 'reversed', - 'seek', - 'seekable', - 'send', - 'setstate', - 'sort', - 'stderr', - 'stdin', - 'stdout', - 'strict', - 'symmetric_difference_update', - 'tell', - 'text', - 'threading', - 'throw', - 'unraisablehook', - 'values', - 'version', - 'warnings', - 'warnoptions', - 'writable', - 'write', - 'zipimporter', ] ####################################### # helpers +def iter_global_strings(): + id_regex = re.compile(r'\b_Py_ID\((\w+)\)') + str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') + for dirname, _, files in os.walk(ROOT): + if os.path.relpath(dirname, ROOT).startswith('Include'): + continue + for name in files: + if not name.endswith(('.c', '.h')): + continue + filename = os.path.join(dirname, name) + with open(os.path.join(filename), encoding='utf-8') as infile: + for lno, line in enumerate(infile, 1): + for m in id_regex.finditer(line): + identifier, = m.groups() + yield identifier, None, filename, lno, line + for m in str_regex.finditer(line): + varname, string = m.groups() + yield varname, string, filename, lno, line + def iter_to_marker(lines, marker): for line in lines: if line.rstrip() == marker: @@ -354,7 +168,7 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj END = '/* End auto-generated code */' -def generate_global_strings(): +def generate_global_strings(identifiers, strings): filename = os.path.join(INTERNAL, 'pycore_global_strings.h') # Read the non-generated part of the file. @@ -371,22 +185,18 @@ def generate_global_strings(): printer.write(START) with printer.block('struct _Py_global_strings', ';'): with printer.block('struct', ' literals;'): - for name, literal in STRING_LITERALS.items(): - if literal is None: - outfile.write('\n') - printer.write(f'// {name}') - else: - printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') + for name, literal in sorted(strings.items()): + printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') outfile.write('\n') with printer.block('struct', ' identifiers;'): - for name in sorted(IDENTIFIERS): + for name in sorted(identifiers): assert name.isidentifier(), name printer.write(f'STRUCT_FOR_ID({name})') printer.write(END) printer.write(after) -def generate_runtime_init(): +def generate_runtime_init(identifiers, strings): # First get some info from the declarations. nsmallposints = None nsmallnegints = None @@ -432,13 +242,10 @@ def generate_runtime_init(): # Global strings. with printer.block('.strings =', ','): with printer.block('.literals =', ','): - for name, literal in STRING_LITERALS.items(): - if literal is None: - printer.write('') - else: - printer.write(f'INIT_STR({name}, "{literal}"),') + for name, literal in sorted(strings.items()): + printer.write(f'INIT_STR({name}, "{literal}"),') with printer.block('.identifiers =', ','): - for name in sorted(IDENTIFIERS): + for name in sorted(identifiers): assert name.isidentifier(), name printer.write(f'INIT_ID({name}),') printer.write(END) @@ -507,9 +314,9 @@ TYPESLOTS_RE = re.compile(r''' ) ''', re.VERBOSE) -def check_orphan_strings(): +def check_orphan_strings(identifiers): literals = set(n for n, s in STRING_LITERALS.items() if s) - identifiers = set(IDENTIFIERS) + identifiers = set(identifiers) files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True) for i, filename in enumerate(files, start=1): print('.', end='') @@ -586,11 +393,23 @@ def check_orphan_strings(): # the script def main(*, check=False) -> None: - generate_global_strings() - generate_runtime_init() + identifiers = set(IDENTIFIERS) + strings = dict(STRING_LITERALS) + for name, string, filename, lno, _ in iter_global_strings(): + if string is None: + if name not in IGNORED: + identifiers.add(name) + else: + if name not in strings: + strings[name] = string + elif string != strings[name]: + raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') + + generate_global_strings(identifiers, strings) + generate_runtime_init(identifiers, strings) if check: - check_orphan_strings() + check_orphan_strings(identifiers) if __name__ == '__main__': |