summaryrefslogtreecommitdiffstats
path: root/Tools/scripts/generate_global_objects.py
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2022-02-15 00:36:51 (GMT)
committerGitHub <noreply@github.com>2022-02-15 00:36:51 (GMT)
commit12360aa159c42c7798fd14225d271e6fd84db7eb (patch)
treead2b4f0d67c90a5a0be4ff532082fc3468d6d65b /Tools/scripts/generate_global_objects.py
parent278fdd3e3a2492665b2c2888fd2f428f7f59a3f5 (diff)
downloadcpython-12360aa159c42c7798fd14225d271e6fd84db7eb.zip
cpython-12360aa159c42c7798fd14225d271e6fd84db7eb.tar.gz
cpython-12360aa159c42c7798fd14225d271e6fd84db7eb.tar.bz2
bpo-46541: Discover the global strings. (gh-31346)
Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files. This is partly inspired by gh-31261. https://bugs.python.org/issue46541
Diffstat (limited to 'Tools/scripts/generate_global_objects.py')
-rw-r--r--Tools/scripts/generate_global_objects.py329
1 files changed, 74 insertions, 255 deletions
diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py
index 7306889..e989f3c 100644
--- a/Tools/scripts/generate_global_objects.py
+++ b/Tools/scripts/generate_global_objects.py
@@ -13,298 +13,112 @@ INTERNAL = os.path.join(ROOT, 'Include', 'internal')
STRING_LITERALS = {
'empty': '',
'dot': '.',
- 'comma_sep': ', ',
- 'percent': '%',
- 'dbl_percent': '%%',
-
- '"anonymous" labels': None,
- 'anon_dictcomp': '<dictcomp>',
- 'anon_genexpr': '<genexpr>',
- 'anon_lambda': '<lambda>',
- 'anon_listcomp': '<listcomp>',
- 'anon_module': '<module>',
- 'anon_setcomp': '<setcomp>',
- 'anon_string': '<string>',
- 'dot_locals': '.<locals>',
+}
+IGNORED = {
+ 'ACTION', # Python/_warnings.c
+ 'ATTR', # Python/_warnings.c and Objects/funcobject.c
+ 'DUNDER', # Objects/typeobject.c
+ 'RDUNDER', # Objects/typeobject.c
+ 'SPECIAL', # Objects/weakrefobject.c
}
IDENTIFIERS = [
- 'Py_Repr',
- 'TextIOWrapper',
+ # from ADD() Python/_warnings.c
+ 'default',
+ 'ignore',
+
+ # from GET_WARNINGS_ATTR() in Python/_warnings.c
'WarningMessage',
- '_',
- '__IOBase_closed',
- '__abc_tpflags__',
+ '_showwarnmsg',
+ '_warn_unawaited_coroutine',
+ 'defaultaction',
+ 'filters',
+ 'onceregistry',
+
+ # from WRAP_METHOD() in Objects/weakrefobject.c
+ '__bytes__',
+ '__reversed__',
+
+ # from COPY_ATTR() in Objects/funcobject.c
+ '__module__',
+ '__name__',
+ '__qualname__',
+ '__doc__',
+ '__annotations__',
+
+ # from SLOT* in Objects/typeobject.c
'__abs__',
- '__abstractmethods__',
'__add__',
- '__aenter__',
- '__aexit__',
- '__aiter__',
- '__all__',
'__and__',
- '__anext__',
- '__annotations__',
- '__args__',
- '__await__',
- '__bases__',
- '__bool__',
- '__build_class__',
- '__builtins__',
- '__bytes__',
- '__call__',
- '__cantrace__',
- '__class__',
- '__class_getitem__',
- '__classcell__',
- '__complex__',
- '__contains__',
- '__copy__',
- '__del__',
- '__delattr__',
- '__delete__',
- '__delitem__',
- '__dict__',
- '__dir__',
'__divmod__',
- '__doc__',
- '__enter__',
- '__eq__',
- '__exit__',
- '__file__',
'__float__',
'__floordiv__',
- '__format__',
- '__fspath__',
- '__ge__',
- '__get__',
- '__getattr__',
- '__getattribute__',
- '__getinitargs__',
'__getitem__',
- '__getnewargs__',
- '__getnewargs_ex__',
- '__getstate__',
- '__gt__',
- '__hash__',
'__iadd__',
'__iand__',
'__ifloordiv__',
'__ilshift__',
'__imatmul__',
'__imod__',
- '__import__',
'__imul__',
- '__index__',
- '__init__',
- '__init_subclass__',
- '__instancecheck__',
'__int__',
'__invert__',
'__ior__',
- '__ipow__',
'__irshift__',
- '__isabstractmethod__',
'__isub__',
- '__iter__',
'__itruediv__',
'__ixor__',
- '__le__',
- '__len__',
- '__length_hint__',
- '__loader__',
'__lshift__',
- '__lt__',
- '__ltrace__',
- '__main__',
'__matmul__',
- '__missing__',
'__mod__',
- '__module__',
- '__mro_entries__',
'__mul__',
- '__name__',
- '__ne__',
'__neg__',
- '__new__',
- '__newobj__',
- '__newobj_ex__',
- '__next__',
- '__note__',
'__or__',
- '__origin__',
- '__package__',
- '__parameters__',
- '__path__',
'__pos__',
'__pow__',
- '__prepare__',
- '__qualname__',
'__radd__',
'__rand__',
'__rdivmod__',
- '__reduce__',
- '__reduce_ex__',
- '__repr__',
- '__reversed__',
'__rfloordiv__',
'__rlshift__',
'__rmatmul__',
'__rmod__',
'__rmul__',
'__ror__',
- '__round__',
'__rpow__',
'__rrshift__',
'__rshift__',
'__rsub__',
'__rtruediv__',
'__rxor__',
- '__set__',
- '__set_name__',
- '__setattr__',
- '__setitem__',
- '__setstate__',
- '__sizeof__',
- '__slotnames__',
- '__slots__',
- '__spec__',
'__str__',
'__sub__',
- '__subclasscheck__',
- '__subclasshook__',
'__truediv__',
- '__trunc__',
- '__warningregistry__',
- '__weakref__',
'__xor__',
- '_abc_impl',
- '_blksize',
- '_dealloc_warn',
- '_finalizing',
- '_find_and_load',
- '_fix_up_module',
- '_get_sourcefile',
- '_handle_fromlist',
- '_initializing',
- '_is_text_encoding',
- '_lock_unlock_module',
- '_showwarnmsg',
- '_shutdown',
- '_slotnames',
- '_strptime_time',
- '_uninitialized_submodules',
- '_warn_unawaited_coroutine',
- '_xoptions',
- 'add',
- 'append',
- 'big',
- 'buffer',
- 'builtins',
- 'clear',
- 'close',
- 'code',
- 'copy',
- 'copyreg',
- 'decode',
- 'default',
- 'defaultaction',
- 'difference_update',
- 'dispatch_table',
- 'displayhook',
- 'enable',
- 'encoding',
- 'end_lineno',
- 'end_offset',
- 'errors',
- 'excepthook',
- 'extend',
- 'filename',
- 'fileno',
- 'fillvalue',
- 'filters',
- 'find_class',
- 'flush',
- 'get',
- 'get_source',
- 'getattr',
- 'ignore',
- 'importlib',
- 'intersection',
- 'isatty',
- 'items',
- 'iter',
- 'keys',
- 'last_traceback',
- 'last_type',
- 'last_value',
- 'latin1',
- 'lineno',
- 'little',
- 'match',
- 'metaclass',
- 'mode',
- 'modules',
- 'mro',
- 'msg',
- 'n_fields',
- 'n_sequence_fields',
- 'n_unnamed_fields',
- 'name',
- 'obj',
- 'offset',
- 'onceregistry',
- 'open',
- 'parent',
- 'partial',
- 'path',
- 'peek',
- 'persistent_id',
- 'persistent_load',
- 'print_file_and_line',
- 'ps1',
- 'ps2',
- 'raw',
- 'read',
- 'read1',
- 'readable',
- 'readall',
- 'readinto',
- 'readinto1',
- 'readline',
- 'reducer_override',
- 'reload',
- 'replace',
- 'reset',
- 'return',
- 'reversed',
- 'seek',
- 'seekable',
- 'send',
- 'setstate',
- 'sort',
- 'stderr',
- 'stdin',
- 'stdout',
- 'strict',
- 'symmetric_difference_update',
- 'tell',
- 'text',
- 'threading',
- 'throw',
- 'unraisablehook',
- 'values',
- 'version',
- 'warnings',
- 'warnoptions',
- 'writable',
- 'write',
- 'zipimporter',
]
#######################################
# helpers
+def iter_global_strings():
+ id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
+ str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
+ for dirname, _, files in os.walk(ROOT):
+ if os.path.relpath(dirname, ROOT).startswith('Include'):
+ continue
+ for name in files:
+ if not name.endswith(('.c', '.h')):
+ continue
+ filename = os.path.join(dirname, name)
+ with open(os.path.join(filename), encoding='utf-8') as infile:
+ for lno, line in enumerate(infile, 1):
+ for m in id_regex.finditer(line):
+ identifier, = m.groups()
+ yield identifier, None, filename, lno, line
+ for m in str_regex.finditer(line):
+ varname, string = m.groups()
+ yield varname, string, filename, lno, line
+
def iter_to_marker(lines, marker):
for line in lines:
if line.rstrip() == marker:
@@ -354,7 +168,7 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj
END = '/* End auto-generated code */'
-def generate_global_strings():
+def generate_global_strings(identifiers, strings):
filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
# Read the non-generated part of the file.
@@ -371,22 +185,18 @@ def generate_global_strings():
printer.write(START)
with printer.block('struct _Py_global_strings', ';'):
with printer.block('struct', ' literals;'):
- for name, literal in STRING_LITERALS.items():
- if literal is None:
- outfile.write('\n')
- printer.write(f'// {name}')
- else:
- printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
+ for name, literal in sorted(strings.items()):
+ printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
outfile.write('\n')
with printer.block('struct', ' identifiers;'):
- for name in sorted(IDENTIFIERS):
+ for name in sorted(identifiers):
assert name.isidentifier(), name
printer.write(f'STRUCT_FOR_ID({name})')
printer.write(END)
printer.write(after)
-def generate_runtime_init():
+def generate_runtime_init(identifiers, strings):
# First get some info from the declarations.
nsmallposints = None
nsmallnegints = None
@@ -432,13 +242,10 @@ def generate_runtime_init():
# Global strings.
with printer.block('.strings =', ','):
with printer.block('.literals =', ','):
- for name, literal in STRING_LITERALS.items():
- if literal is None:
- printer.write('')
- else:
- printer.write(f'INIT_STR({name}, "{literal}"),')
+ for name, literal in sorted(strings.items()):
+ printer.write(f'INIT_STR({name}, "{literal}"),')
with printer.block('.identifiers =', ','):
- for name in sorted(IDENTIFIERS):
+ for name in sorted(identifiers):
assert name.isidentifier(), name
printer.write(f'INIT_ID({name}),')
printer.write(END)
@@ -507,9 +314,9 @@ TYPESLOTS_RE = re.compile(r'''
)
''', re.VERBOSE)
-def check_orphan_strings():
+def check_orphan_strings(identifiers):
literals = set(n for n, s in STRING_LITERALS.items() if s)
- identifiers = set(IDENTIFIERS)
+ identifiers = set(identifiers)
files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
for i, filename in enumerate(files, start=1):
print('.', end='')
@@ -586,11 +393,23 @@ def check_orphan_strings():
# the script
def main(*, check=False) -> None:
- generate_global_strings()
- generate_runtime_init()
+ identifiers = set(IDENTIFIERS)
+ strings = dict(STRING_LITERALS)
+ for name, string, filename, lno, _ in iter_global_strings():
+ if string is None:
+ if name not in IGNORED:
+ identifiers.add(name)
+ else:
+ if name not in strings:
+ strings[name] = string
+ elif string != strings[name]:
+ raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
+
+ generate_global_strings(identifiers, strings)
+ generate_runtime_init(identifiers, strings)
if check:
- check_orphan_strings()
+ check_orphan_strings(identifiers)
if __name__ == '__main__':