summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2022-02-08 20:39:07 (GMT)
committerGitHub <noreply@github.com>2022-02-08 20:39:07 (GMT)
commit81c72044a181dbbfbf689d7a977d0d99090f26a8 (patch)
tree14329746bd6f179cf2ae7c9818e1ae881eb46360 /Tools
parentc018d3037b5b62e6d48d5985d1a37b91762fbffb (diff)
downloadcpython-81c72044a181dbbfbf689d7a977d0d99090f26a8.zip
cpython-81c72044a181dbbfbf689d7a977d0d99090f26a8.tar.gz
cpython-81c72044a181dbbfbf689d7a977d0d99090f26a8.tar.bz2
bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)
We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code. It is still used in a number of non-builtin stdlib modules. The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime. A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings). https://bugs.python.org/issue46541#msg411799 explains the rationale for this change. The core of the change is in: * (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros * Include/internal/pycore_runtime_init.h - added the static initializers for the global strings * Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState * Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings. That check is added to the PR CI config. The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()). This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *. The following are not changed (yet): * stop using _Py_IDENTIFIER() in the stdlib modules * (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API * (maybe) intern the strings during runtime init https://bugs.python.org/issue46541
Diffstat (limited to 'Tools')
-rw-r--r--Tools/c-analyzer/TODO2
-rw-r--r--Tools/scripts/generate_global_objects.py495
2 files changed, 486 insertions, 11 deletions
diff --git a/Tools/c-analyzer/TODO b/Tools/c-analyzer/TODO
index 4b9b285..55338eb 100644
--- a/Tools/c-analyzer/TODO
+++ b/Tools/c-analyzer/TODO
@@ -156,8 +156,6 @@ Objects/codeobject.c:PyCode_NewEmpty():emptystring static PyObject
Objects/exceptions.c:_check_for_legacy_statements():exec_prefix static PyObject *exec_prefix
Objects/exceptions.c:_check_for_legacy_statements():print_prefix static PyObject *print_prefix
Objects/funcobject.c:PyFunction_NewWithQualName():__name__ static PyObject *__name__
-Objects/typeobject.c:object___reduce_ex___impl():objreduce static PyObject *objreduce
-Objects/typeobject.c:resolve_slotdups():pname static PyObject *pname
Objects/unicodeobject.c:unicode_empty static PyObject *unicode_empty
Objects/unicodeobject.c:unicode_latin1 static PyObject *unicode_latin1[256]
Python/_warnings.c:is_internal_frame():bootstrap_string static PyObject *bootstrap_string
diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py
index a06d201..b184e74 100644
--- a/Tools/scripts/generate_global_objects.py
+++ b/Tools/scripts/generate_global_objects.py
@@ -1,9 +1,7 @@
-import argparse
-import ast
-import builtins
-import collections
import contextlib
+import glob
import os.path
+import re
import sys
@@ -12,6 +10,298 @@ ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
INTERNAL = os.path.join(ROOT, 'Include', 'internal')
+STRING_LITERALS = {
+ 'empty': '',
+ 'dot': '.',
+ 'comma_sep': ', ',
+ 'percent': '%',
+ 'dbl_percent': '%%',
+
+ '"anonymous" labels': None,
+ 'anon_dictcomp': '<dictcomp>',
+ 'anon_genexpr': '<genexpr>',
+ 'anon_lambda': '<lambda>',
+ 'anon_listcomp': '<listcomp>',
+ 'anon_module': '<module>',
+ 'anon_setcomp': '<setcomp>',
+ 'anon_string': '<string>',
+ 'dot_locals': '.<locals>',
+}
+IDENTIFIERS = [
+ 'Py_Repr',
+ 'TextIOWrapper',
+ 'WarningMessage',
+ '_',
+ '__IOBase_closed',
+ '__abc_tpflags__',
+ '__abs__',
+ '__abstractmethods__',
+ '__add__',
+ '__aenter__',
+ '__aexit__',
+ '__aiter__',
+ '__all__',
+ '__and__',
+ '__anext__',
+ '__annotations__',
+ '__args__',
+ '__await__',
+ '__bases__',
+ '__bool__',
+ '__build_class__',
+ '__builtins__',
+ '__bytes__',
+ '__call__',
+ '__cantrace__',
+ '__class__',
+ '__class_getitem__',
+ '__classcell__',
+ '__complex__',
+ '__contains__',
+ '__copy__',
+ '__del__',
+ '__delattr__',
+ '__delete__',
+ '__delitem__',
+ '__dict__',
+ '__dir__',
+ '__divmod__',
+ '__doc__',
+ '__enter__',
+ '__eq__',
+ '__exit__',
+ '__file__',
+ '__float__',
+ '__floordiv__',
+ '__format__',
+ '__fspath__',
+ '__ge__',
+ '__get__',
+ '__getattr__',
+ '__getattribute__',
+ '__getinitargs__',
+ '__getitem__',
+ '__getnewargs__',
+ '__getnewargs_ex__',
+ '__getstate__',
+ '__gt__',
+ '__hash__',
+ '__iadd__',
+ '__iand__',
+ '__ifloordiv__',
+ '__ilshift__',
+ '__imatmul__',
+ '__imod__',
+ '__import__',
+ '__imul__',
+ '__index__',
+ '__init__',
+ '__init_subclass__',
+ '__instancecheck__',
+ '__int__',
+ '__invert__',
+ '__ior__',
+ '__ipow__',
+ '__irshift__',
+ '__isabstractmethod__',
+ '__isub__',
+ '__iter__',
+ '__itruediv__',
+ '__ixor__',
+ '__le__',
+ '__len__',
+ '__length_hint__',
+ '__loader__',
+ '__lshift__',
+ '__lt__',
+ '__ltrace__',
+ '__main__',
+ '__matmul__',
+ '__missing__',
+ '__mod__',
+ '__module__',
+ '__mro_entries__',
+ '__mul__',
+ '__name__',
+ '__ne__',
+ '__neg__',
+ '__new__',
+ '__newobj__',
+ '__newobj_ex__',
+ '__next__',
+ '__note__',
+ '__or__',
+ '__origin__',
+ '__package__',
+ '__parameters__',
+ '__path__',
+ '__pos__',
+ '__pow__',
+ '__prepare__',
+ '__qualname__',
+ '__radd__',
+ '__rand__',
+ '__rdivmod__',
+ '__reduce__',
+ '__reduce_ex__',
+ '__repr__',
+ '__reversed__',
+ '__rfloordiv__',
+ '__rlshift__',
+ '__rmatmul__',
+ '__rmod__',
+ '__rmul__',
+ '__ror__',
+ '__round__',
+ '__rpow__',
+ '__rrshift__',
+ '__rshift__',
+ '__rsub__',
+ '__rtruediv__',
+ '__rxor__',
+ '__set__',
+ '__set_name__',
+ '__setattr__',
+ '__setitem__',
+ '__setstate__',
+ '__sizeof__',
+ '__slotnames__',
+ '__slots__',
+ '__spec__',
+ '__str__',
+ '__sub__',
+ '__subclasscheck__',
+ '__subclasshook__',
+ '__truediv__',
+ '__trunc__',
+ '__warningregistry__',
+ '__weakref__',
+ '__xor__',
+ '_abc_impl',
+ '_blksize',
+ '_dealloc_warn',
+ '_finalizing',
+ '_find_and_load',
+ '_fix_up_module',
+ '_get_sourcefile',
+ '_handle_fromlist',
+ '_initializing',
+ '_is_text_encoding',
+ '_lock_unlock_module',
+ '_showwarnmsg',
+ '_shutdown',
+ '_slotnames',
+ '_strptime_time',
+ '_uninitialized_submodules',
+ '_warn_unawaited_coroutine',
+ '_xoptions',
+ 'add',
+ 'append',
+ 'big',
+ 'buffer',
+ 'builtins',
+ 'clear',
+ 'close',
+ 'code',
+ 'copy',
+ 'copyreg',
+ 'decode',
+ 'default',
+ 'defaultaction',
+ 'difference_update',
+ 'dispatch_table',
+ 'displayhook',
+ 'enable',
+ 'encoding',
+ 'end_lineno',
+ 'end_offset',
+ 'errors',
+ 'excepthook',
+ 'extend',
+ 'filename',
+ 'fileno',
+ 'fillvalue',
+ 'filters',
+ 'find_class',
+ 'flush',
+ 'get',
+ 'get_source',
+ 'getattr',
+ 'ignore',
+ 'importlib',
+ 'intersection',
+ 'isatty',
+ 'items',
+ 'iter',
+ 'keys',
+ 'last_traceback',
+ 'last_type',
+ 'last_value',
+ 'latin1',
+ 'lineno',
+ 'little',
+ 'match',
+ 'metaclass',
+ 'mode',
+ 'modules',
+ 'mro',
+ 'msg',
+ 'n_fields',
+ 'n_sequence_fields',
+ 'n_unnamed_fields',
+ 'name',
+ 'obj',
+ 'offset',
+ 'onceregistry',
+ 'open',
+ 'parent',
+ 'partial',
+ 'path',
+ 'peek',
+ 'persistent_id',
+ 'persistent_load',
+ 'print_file_and_line',
+ 'ps1',
+ 'ps2',
+ 'raw',
+ 'read',
+ 'read1',
+ 'readable',
+ 'readall',
+ 'readinto',
+ 'readinto1',
+ 'readline',
+ 'reducer_override',
+ 'reload',
+ 'replace',
+ 'reset',
+ 'return',
+ 'reversed',
+ 'seek',
+ 'seekable',
+ 'send',
+ 'setstate',
+ 'sort',
+ 'stderr',
+ 'stdin',
+ 'stdout',
+ 'strict',
+ 'symmetric_difference_update',
+ 'tell',
+ 'text',
+ 'threading',
+ 'throw',
+ 'unraisablehook',
+ 'values',
+ 'version',
+ 'warnings',
+ 'warnoptions',
+ 'writable',
+ 'write',
+ 'zipimporter',
+]
+
+
#######################################
# helpers
@@ -64,6 +354,38 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj
END = '/* End auto-generated code */'
+def generate_global_strings():
+ filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
+
+ # Read the non-generated part of the file.
+ with open(filename) as infile:
+ before = ''.join(iter_to_marker(infile, START))[:-1]
+ for _ in iter_to_marker(infile, END):
+ pass
+ after = infile.read()[:-1]
+
+ # Generate the file.
+ with open(filename, 'w', encoding='utf-8') as outfile:
+ printer = Printer(outfile)
+ printer.write(before)
+ printer.write(START)
+ with printer.block('struct _Py_global_strings', ';'):
+ with printer.block('struct', ' literals;'):
+ for name, literal in STRING_LITERALS.items():
+ if literal is None:
+ outfile.write('\n')
+ printer.write(f'// {name}')
+ else:
+ printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
+ outfile.write('\n')
+ with printer.block('struct', ' identifiers;'):
+ for name in sorted(IDENTIFIERS):
+ assert name.isidentifier(), name
+ printer.write(f'STRUCT_FOR_ID({name})')
+ printer.write(END)
+ printer.write(after)
+
+
def generate_runtime_init():
# First get some info from the declarations.
nsmallposints = None
@@ -106,19 +428,174 @@ def generate_runtime_init():
with printer.block('.bytes_characters =', ','):
for i in range(256):
printer.write(f'_PyBytes_CHAR_INIT({i}),')
+ printer.write('')
+ # Global strings.
+ with printer.block('.strings =', ','):
+ with printer.block('.literals =', ','):
+ for name, literal in STRING_LITERALS.items():
+ if literal is None:
+ printer.write('')
+ else:
+ printer.write(f'INIT_STR({name}, "{literal}"),')
+ with printer.block('.identifiers =', ','):
+ for name in sorted(IDENTIFIERS):
+ assert name.isidentifier(), name
+ printer.write(f'INIT_ID({name}),')
printer.write(END)
printer.write(after)
#######################################
+# checks
+
+def err(msg):
+ print(msg, file=sys.stderr)
+
+
+GETTER_RE = re.compile(r'''
+ ^
+ .*?
+ (?:
+ (?:
+ _Py_ID
+ [(]
+ ( \w+ ) # <identifier>
+ [)]
+ )
+ |
+ (?:
+ _Py_STR
+ [(]
+ ( \w+ ) # <literal>
+ [)]
+ )
+ )
+''', re.VERBOSE)
+TYPESLOTS_RE = re.compile(r'''
+ ^
+ .*?
+ (?:
+ (?:
+ SLOT0 [(] .*?, \s*
+ ( \w+ ) # <slot0>
+ [)]
+ )
+ |
+ (?:
+ SLOT1 [(] .*?, \s*
+ ( \w+ ) # <slot1>
+ , .* [)]
+ )
+ |
+ (?:
+ SLOT1BIN [(] .*?, .*?, \s*
+ ( \w+ ) # <slot1bin>
+ , \s*
+ ( \w+ ) # <reverse>
+ [)]
+ )
+ |
+ (?:
+ SLOT1BINFULL [(] .*?, .*?, .*?, \s*
+ ( \w+ ) # <slot1binfull>
+ , \s*
+ ( \w+ ) # <fullreverse>
+ [)]
+ )
+ |
+ ( SLOT \d .* [^)] $ ) # <wrapped>
+ )
+''', re.VERBOSE)
+
+def check_orphan_strings():
+ literals = set(n for n, s in STRING_LITERALS.items() if s)
+ identifiers = set(IDENTIFIERS)
+ files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
+ for i, filename in enumerate(files, start=1):
+ print('.', end='')
+ if i % 5 == 0:
+ print(' ', end='')
+ if i % 20 == 0:
+ print()
+ if i % 100 == 0:
+ print()
+ with open(filename) as infile:
+ wrapped = None
+ for line in infile:
+ identifier = literal = reverse = None
+
+ line = line.splitlines()[0]
+ if wrapped:
+ line = f'{wrapped.rstrip()} {line}'
+ wrapped = None
+
+ if os.path.basename(filename) == '_warnings.c':
+ m = re.match(r'^.* = GET_WARNINGS_ATTR[(][^,]*, (\w+),', line)
+ if m:
+ identifier, = m.groups()
+ elif os.path.basename(filename) == 'typeobject.c':
+ m = TYPESLOTS_RE.match(line)
+ if m:
+ (slot0,
+ slot1,
+ slot1bin, reverse,
+ slot1binfull, fullreverse,
+ wrapped,
+ ) = m.groups()
+ identifier = slot0 or slot1 or slot1bin or slot1binfull
+ reverse = reverse or fullreverse
+
+ if not identifier and not literal:
+ m = GETTER_RE.match(line)
+ if not m:
+ continue
+ identifier, literal = m.groups()
+
+ if literal:
+ if literals and literal in literals:
+ literals.remove(literal)
+ if identifier:
+ if identifiers and identifier in identifiers:
+ identifiers.remove(identifier)
+ if reverse:
+ if identifiers and reverse in identifiers:
+ identifiers.remove(reverse)
+ if not literals and not identifiers:
+ break
+ else:
+ continue
+ break
+ if i % 20:
+ print()
+ if not literals and not identifiers:
+ return
+ print('ERROR:', file=sys.stderr)
+ if literals:
+ err(' unused global string literals:')
+ for name in sorted(literals):
+ err(f' {name}')
+ if identifiers:
+ if literals:
+ print()
+ err(' unused global identifiers:')
+ for name in sorted(identifiers):
+ err(f' {name}')
+
+
+#######################################
# the script
-def main() -> None:
+def main(*, check=False) -> None:
+ generate_global_strings()
generate_runtime_init()
+ if check:
+ check_orphan_strings()
+
if __name__ == '__main__':
- argv = sys.argv[1:]
- if argv:
- sys.exit(f'ERROR: got unexpected args {argv}')
- main()
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--check', action='store_true')
+ args = parser.parse_args()
+ main(**vars(args))