diff options
author | Brandt Bucher <brandtbucher@microsoft.com> | 2022-03-21 11:11:17 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-21 11:11:17 (GMT) |
commit | 2bde6827ea4f136297b2d882480b981ff26262b6 (patch) | |
tree | 8ad0569c15e0f516eaf8547581c6de2ca702b349 /Tools | |
parent | 08eb754d840696914928355014c2d424131f8835 (diff) | |
download | cpython-2bde6827ea4f136297b2d882480b981ff26262b6.zip cpython-2bde6827ea4f136297b2d882480b981ff26262b6.tar.gz cpython-2bde6827ea4f136297b2d882480b981ff26262b6.tar.bz2 |
bpo-46841: Quicken code in-place (GH-31888)
* Moves the bytecode to the end of the corresponding PyCodeObject, and quickens it in-place.
* Removes the almost-always-unused co_varnames, co_freevars, and co_cellvars member caches
* _PyOpcode_Deopt is a new mapping from all opcodes to their un-quickened forms.
* _PyOpcode_InlineCacheEntries is renamed to _PyOpcode_Caches
* _Py_IncrementCountAndMaybeQuicken is renamed to _PyCode_Warmup
* _Py_Quicken is renamed to _PyCode_Quicken
* _co_quickened is renamed to _co_code_adaptive (and is now a read-only memoryview).
* Do not emit unused nonzero opargs anymore in the compiler.
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/scripts/deepfreeze.py | 47 | ||||
-rw-r--r-- | Tools/scripts/generate_opcode_h.py | 17 |
2 files changed, 36 insertions, 28 deletions
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 954fca8..d208258 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -229,12 +229,8 @@ class Printer: def generate_code(self, name: str, code: types.CodeType) -> str: # The ordering here matches PyCode_NewWithPosOnlyArgs() # (but see below). - co_code = self.generate(name + "_code", code.co_code) co_consts = self.generate(name + "_consts", code.co_consts) co_names = self.generate(name + "_names", code.co_names) - co_varnames = self.generate(name + "_varnames", code.co_varnames) - co_freevars = self.generate(name + "_freevars", code.co_freevars) - co_cellvars = self.generate(name + "_cellvars", code.co_cellvars) co_filename = self.generate(name + "_filename", code.co_filename) co_name = self.generate(name + "_name", code.co_name) co_qualname = self.generate(name + "_qualname", code.co_qualname) @@ -249,14 +245,17 @@ class Printer: # Derived values nlocals, nplaincellvars, ncellvars, nfreevars = \ get_localsplus_counts(code, localsplusnames, localspluskinds) - with self.block(f"static struct PyCodeObject {name} =", ";"): - self.object_head("PyCode_Type") + co_code_adaptive = make_string_literal(code.co_code) + self.write("static") + with self.indent(): + self.write(f"struct _PyCode_DEF({len(code.co_code)})") + with self.block(f"{name} =", ";"): + self.object_var_head("PyCode_Type", len(code.co_code) // 2) # But the ordering here must match that in cpython/code.h # (which is a pain because we tend to reorder those for perf) # otherwise MSVC doesn't like it. self.write(f".co_consts = {co_consts},") self.write(f".co_names = {co_names},") - self.write(f".co_firstinstr = (_Py_CODEUNIT *) {removesuffix(co_code, '.ob_base.ob_base')}.ob_sval,") self.write(f".co_exceptiontable = {co_exceptiontable},") self.field(code, "co_flags") self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") @@ -265,7 +264,11 @@ class Printer: self.field(code, "co_kwonlyargcount") self.field(code, "co_stacksize") self.field(code, "co_firstlineno") - self.write(f".co_code = {co_code},") + self.write(f".co_nlocalsplus = {len(localsplusnames)},") + self.field(code, "co_nlocals") + self.write(f".co_nplaincellvars = {nplaincellvars},") + self.write(f".co_ncellvars = {ncellvars},") + self.write(f".co_nfreevars = {nfreevars},") self.write(f".co_localsplusnames = {co_localsplusnames},") self.write(f".co_localspluskinds = {co_localspluskinds},") self.write(f".co_filename = {co_filename},") @@ -274,17 +277,11 @@ class Printer: self.write(f".co_linetable = {co_linetable},") self.write(f".co_endlinetable = {co_endlinetable},") self.write(f".co_columntable = {co_columntable},") - self.write(f".co_nlocalsplus = {len(localsplusnames)},") - self.field(code, "co_nlocals") - self.write(f".co_nplaincellvars = {nplaincellvars},") - self.write(f".co_ncellvars = {ncellvars},") - self.write(f".co_nfreevars = {nfreevars},") - self.write(f".co_varnames = {co_varnames},") - self.write(f".co_cellvars = {co_cellvars},") - self.write(f".co_freevars = {co_freevars},") - self.deallocs.append(f"_PyStaticCode_Dealloc(&{name});") - self.interns.append(f"_PyStaticCode_InternStrings(&{name})") - return f"& {name}.ob_base" + self.write(f".co_code_adaptive = {co_code_adaptive},") + name_as_code = f"(PyCodeObject *)&{name}" + self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});") + self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})") + return f"& {name}.ob_base.ob_base" def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str: if len(t) == 0: @@ -450,13 +447,13 @@ def generate(args: list[str], output: TextIO) -> None: code = compile(fd.read(), f"<frozen {modname}>", "exec") printer.generate_file(modname, code) with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"): - for p in printer.deallocs: - printer.write(p) + for p in printer.deallocs: + printer.write(p) with printer.block(f"int\n_Py_Deepfreeze_Init(void)"): - for p in printer.interns: - with printer.block(f"if ({p} < 0)"): - printer.write("return -1;") - printer.write("return 0;") + for p in printer.interns: + with printer.block(f"if ({p} < 0)"): + printer.write("return -1;") + printer.write("return 0;") if verbose: print(f"Cache hits: {printer.hits}, misses: {printer.misses}") diff --git a/Tools/scripts/generate_opcode_h.py b/Tools/scripts/generate_opcode_h.py index 75a9c3f..3b79dc6 100644 --- a/Tools/scripts/generate_opcode_h.py +++ b/Tools/scripts/generate_opcode_h.py @@ -28,7 +28,7 @@ footer = """ #endif /* !Py_OPCODE_H */ """ -DEFINE = "#define {:<31} {:>3}\n" +DEFINE = "#define {:<38} {:>3}\n" UINT32_MASK = (1<<32)-1 @@ -75,16 +75,27 @@ def main(opcode_py, outfile='Include/opcode.h'): fobj.write(DEFINE.format(name, next_op)) used[next_op] = True fobj.write(DEFINE.format('DO_TRACING', 255)) - fobj.write("\nextern const uint8_t _PyOpcode_InlineCacheEntries[256];\n") + fobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n") + fobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n") fobj.write("\n#ifdef NEED_OPCODE_TABLES\n") write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], fobj) write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], fobj) - fobj.write("\nconst uint8_t _PyOpcode_InlineCacheEntries[256] = {\n") + fobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n") for i, entries in enumerate(opcode["_inline_cache_entries"]): if entries: fobj.write(f" [{opname[i]}] = {entries},\n") fobj.write("};\n") + deoptcodes = {} + for basic in opmap: + deoptcodes[basic] = basic + for basic, family in opcode["_specializations"].items(): + for specialized in family: + deoptcodes[specialized] = basic + fobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n") + for opt, deopt in sorted(deoptcodes.items()): + fobj.write(f" [{opt}] = {deopt},\n") + fobj.write("};\n") fobj.write("#endif /* OPCODE_TABLES */\n") fobj.write("\n") |