summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorBrandt Bucher <brandtbucher@microsoft.com>2022-03-21 11:11:17 (GMT)
committerGitHub <noreply@github.com>2022-03-21 11:11:17 (GMT)
commit2bde6827ea4f136297b2d882480b981ff26262b6 (patch)
tree8ad0569c15e0f516eaf8547581c6de2ca702b349 /Tools
parent08eb754d840696914928355014c2d424131f8835 (diff)
downloadcpython-2bde6827ea4f136297b2d882480b981ff26262b6.zip
cpython-2bde6827ea4f136297b2d882480b981ff26262b6.tar.gz
cpython-2bde6827ea4f136297b2d882480b981ff26262b6.tar.bz2
bpo-46841: Quicken code in-place (GH-31888)
* Moves the bytecode to the end of the corresponding PyCodeObject, and quickens it in-place. * Removes the almost-always-unused co_varnames, co_freevars, and co_cellvars member caches * _PyOpcode_Deopt is a new mapping from all opcodes to their un-quickened forms. * _PyOpcode_InlineCacheEntries is renamed to _PyOpcode_Caches * _Py_IncrementCountAndMaybeQuicken is renamed to _PyCode_Warmup * _Py_Quicken is renamed to _PyCode_Quicken * _co_quickened is renamed to _co_code_adaptive (and is now a read-only memoryview). * Do not emit unused nonzero opargs anymore in the compiler.
Diffstat (limited to 'Tools')
-rw-r--r--Tools/scripts/deepfreeze.py47
-rw-r--r--Tools/scripts/generate_opcode_h.py17
2 files changed, 36 insertions, 28 deletions
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
index 954fca8..d208258 100644
--- a/Tools/scripts/deepfreeze.py
+++ b/Tools/scripts/deepfreeze.py
@@ -229,12 +229,8 @@ class Printer:
def generate_code(self, name: str, code: types.CodeType) -> str:
# The ordering here matches PyCode_NewWithPosOnlyArgs()
# (but see below).
- co_code = self.generate(name + "_code", code.co_code)
co_consts = self.generate(name + "_consts", code.co_consts)
co_names = self.generate(name + "_names", code.co_names)
- co_varnames = self.generate(name + "_varnames", code.co_varnames)
- co_freevars = self.generate(name + "_freevars", code.co_freevars)
- co_cellvars = self.generate(name + "_cellvars", code.co_cellvars)
co_filename = self.generate(name + "_filename", code.co_filename)
co_name = self.generate(name + "_name", code.co_name)
co_qualname = self.generate(name + "_qualname", code.co_qualname)
@@ -249,14 +245,17 @@ class Printer:
# Derived values
nlocals, nplaincellvars, ncellvars, nfreevars = \
get_localsplus_counts(code, localsplusnames, localspluskinds)
- with self.block(f"static struct PyCodeObject {name} =", ";"):
- self.object_head("PyCode_Type")
+ co_code_adaptive = make_string_literal(code.co_code)
+ self.write("static")
+ with self.indent():
+ self.write(f"struct _PyCode_DEF({len(code.co_code)})")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyCode_Type", len(code.co_code) // 2)
# But the ordering here must match that in cpython/code.h
# (which is a pain because we tend to reorder those for perf)
# otherwise MSVC doesn't like it.
self.write(f".co_consts = {co_consts},")
self.write(f".co_names = {co_names},")
- self.write(f".co_firstinstr = (_Py_CODEUNIT *) {removesuffix(co_code, '.ob_base.ob_base')}.ob_sval,")
self.write(f".co_exceptiontable = {co_exceptiontable},")
self.field(code, "co_flags")
self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
@@ -265,7 +264,11 @@ class Printer:
self.field(code, "co_kwonlyargcount")
self.field(code, "co_stacksize")
self.field(code, "co_firstlineno")
- self.write(f".co_code = {co_code},")
+ self.write(f".co_nlocalsplus = {len(localsplusnames)},")
+ self.field(code, "co_nlocals")
+ self.write(f".co_nplaincellvars = {nplaincellvars},")
+ self.write(f".co_ncellvars = {ncellvars},")
+ self.write(f".co_nfreevars = {nfreevars},")
self.write(f".co_localsplusnames = {co_localsplusnames},")
self.write(f".co_localspluskinds = {co_localspluskinds},")
self.write(f".co_filename = {co_filename},")
@@ -274,17 +277,11 @@ class Printer:
self.write(f".co_linetable = {co_linetable},")
self.write(f".co_endlinetable = {co_endlinetable},")
self.write(f".co_columntable = {co_columntable},")
- self.write(f".co_nlocalsplus = {len(localsplusnames)},")
- self.field(code, "co_nlocals")
- self.write(f".co_nplaincellvars = {nplaincellvars},")
- self.write(f".co_ncellvars = {ncellvars},")
- self.write(f".co_nfreevars = {nfreevars},")
- self.write(f".co_varnames = {co_varnames},")
- self.write(f".co_cellvars = {co_cellvars},")
- self.write(f".co_freevars = {co_freevars},")
- self.deallocs.append(f"_PyStaticCode_Dealloc(&{name});")
- self.interns.append(f"_PyStaticCode_InternStrings(&{name})")
- return f"& {name}.ob_base"
+ self.write(f".co_code_adaptive = {co_code_adaptive},")
+ name_as_code = f"(PyCodeObject *)&{name}"
+ self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});")
+ self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})")
+ return f"& {name}.ob_base.ob_base"
def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
if len(t) == 0:
@@ -450,13 +447,13 @@ def generate(args: list[str], output: TextIO) -> None:
code = compile(fd.read(), f"<frozen {modname}>", "exec")
printer.generate_file(modname, code)
with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
- for p in printer.deallocs:
- printer.write(p)
+ for p in printer.deallocs:
+ printer.write(p)
with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
- for p in printer.interns:
- with printer.block(f"if ({p} < 0)"):
- printer.write("return -1;")
- printer.write("return 0;")
+ for p in printer.interns:
+ with printer.block(f"if ({p} < 0)"):
+ printer.write("return -1;")
+ printer.write("return 0;")
if verbose:
print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
diff --git a/Tools/scripts/generate_opcode_h.py b/Tools/scripts/generate_opcode_h.py
index 75a9c3f..3b79dc6 100644
--- a/Tools/scripts/generate_opcode_h.py
+++ b/Tools/scripts/generate_opcode_h.py
@@ -28,7 +28,7 @@ footer = """
#endif /* !Py_OPCODE_H */
"""
-DEFINE = "#define {:<31} {:>3}\n"
+DEFINE = "#define {:<38} {:>3}\n"
UINT32_MASK = (1<<32)-1
@@ -75,16 +75,27 @@ def main(opcode_py, outfile='Include/opcode.h'):
fobj.write(DEFINE.format(name, next_op))
used[next_op] = True
fobj.write(DEFINE.format('DO_TRACING', 255))
- fobj.write("\nextern const uint8_t _PyOpcode_InlineCacheEntries[256];\n")
+ fobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n")
+ fobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n")
fobj.write("\n#ifdef NEED_OPCODE_TABLES\n")
write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], fobj)
write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], fobj)
- fobj.write("\nconst uint8_t _PyOpcode_InlineCacheEntries[256] = {\n")
+ fobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n")
for i, entries in enumerate(opcode["_inline_cache_entries"]):
if entries:
fobj.write(f" [{opname[i]}] = {entries},\n")
fobj.write("};\n")
+ deoptcodes = {}
+ for basic in opmap:
+ deoptcodes[basic] = basic
+ for basic, family in opcode["_specializations"].items():
+ for specialized in family:
+ deoptcodes[specialized] = basic
+ fobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n")
+ for opt, deopt in sorted(deoptcodes.items()):
+ fobj.write(f" [{opt}] = {deopt},\n")
+ fobj.write("};\n")
fobj.write("#endif /* OPCODE_TABLES */\n")
fobj.write("\n")