diff options
author | mpage <mpage@cs.stanford.edu> | 2024-11-04 19:13:32 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-04 19:13:32 (GMT) |
commit | 2e95c5ba3bf7e5004c7e2304afda4a8f8e2443a7 (patch) | |
tree | de32ac52ed5ffcb9460dfc062effc6b4b662ee5d /Lib | |
parent | e5a4b402ae55f5eeeb44d3e7bc3f3ec39b249846 (diff) | |
download | cpython-2e95c5ba3bf7e5004c7e2304afda4a8f8e2443a7.zip cpython-2e95c5ba3bf7e5004c7e2304afda4a8f8e2443a7.tar.gz cpython-2e95c5ba3bf7e5004c7e2304afda4a8f8e2443a7.tar.bz2 |
gh-115999: Implement thread-local bytecode and enable specialization for `BINARY_OP` (#123926)
Each thread specializes a thread-local copy of the bytecode, created on the first RESUME, in free-threaded builds. All copies of the bytecode for a code object are stored in the co_tlbc array on the code object. Threads reserve a globally unique index identifying its copy of the bytecode in all co_tlbc arrays at thread creation and release the index at thread destruction. The first entry in every co_tlbc array always points to the "main" copy of the bytecode that is stored at the end of the code object. This ensures that no bytecode is copied for programs that do not use threads.
Thread-local bytecode can be disabled at runtime by providing either -X tlbc=0 or PYTHON_TLBC=0. Disabling thread-local bytecode also disables specialization.
Concurrent modifications to the bytecode made by the specializing interpreter and instrumentation use atomics, with specialization taking care not to overwrite an instruction that was instrumented concurrently.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/support/__init__.py | 5 | ||||
-rw-r--r-- | Lib/test/test_capi/test_config.py | 1 | ||||
-rw-r--r-- | Lib/test/test_capi/test_opt.py | 7 | ||||
-rw-r--r-- | Lib/test/test_cmd_line.py | 52 | ||||
-rw-r--r-- | Lib/test/test_dis.py | 8 | ||||
-rw-r--r-- | Lib/test/test_embed.py | 1 | ||||
-rw-r--r-- | Lib/test/test_sys.py | 14 | ||||
-rw-r--r-- | Lib/test/test_thread_local_bytecode.py | 198 |
8 files changed, 281 insertions, 5 deletions
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 7c1ef42..2ad267e 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1274,6 +1274,11 @@ def requires_specialization(test): _opcode.ENABLE_SPECIALIZATION, "requires specialization")(test) +def requires_specialization_ft(test): + return unittest.skipUnless( + _opcode.ENABLE_SPECIALIZATION_FT, "requires specialization")(test) + + #======================================================================= # Check for the presence of docstrings. diff --git a/Lib/test/test_capi/test_config.py b/Lib/test/test_capi/test_config.py index 71fb9ae..77730ad 100644 --- a/Lib/test/test_capi/test_config.py +++ b/Lib/test/test_capi/test_config.py @@ -100,6 +100,7 @@ class CAPITests(unittest.TestCase): options.append(("run_presite", str | None, None)) if sysconfig.get_config_var('Py_GIL_DISABLED'): options.append(("enable_gil", int, None)) + options.append(("tlbc_enabled", int, None)) if support.MS_WINDOWS: options.extend(( ("legacy_windows_stdio", bool, None), diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f1ab721..c352325 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -7,7 +7,8 @@ import os import _opcode -from test.support import script_helper, requires_specialization, import_helper +from test.support import (script_helper, requires_specialization, + import_helper, Py_GIL_DISABLED) _testinternalcapi = import_helper.import_module("_testinternalcapi") @@ -34,6 +35,7 @@ def clear_executors(func): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") class TestOptimizerAPI(unittest.TestCase): @@ -138,6 +140,7 @@ def get_opnames(ex): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") class TestExecutorInvalidation(unittest.TestCase): @@ -219,6 +222,7 @@ class TestExecutorInvalidation(unittest.TestCase): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") @@ -586,6 +590,7 @@ class TestUops(unittest.TestCase): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index eca9adf..634efda 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -12,6 +12,7 @@ import unittest from test import support from test.support import os_helper from test.support import force_not_colorized +from test.support import threading_helper from test.support.script_helper import ( spawn_python, kill_python, assert_python_ok, assert_python_failure, interpreter_requires_environment @@ -1068,6 +1069,57 @@ class CmdLineTest(unittest.TestCase): out = res.out.strip().decode("utf-8") return tuple(int(i) for i in out.split()) + @unittest.skipUnless(support.Py_GIL_DISABLED, + "PYTHON_TLBC and -X tlbc" + " only supported in Py_GIL_DISABLED builds") + @threading_helper.requires_working_threading() + def test_disable_thread_local_bytecode(self): + code = """if 1: + import threading + def test(x, y): + return x + y + t = threading.Thread(target=test, args=(1,2)) + t.start() + t.join()""" + assert_python_ok("-W", "always", "-X", "tlbc=0", "-c", code) + assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="0") + + @unittest.skipUnless(support.Py_GIL_DISABLED, + "PYTHON_TLBC and -X tlbc" + " only supported in Py_GIL_DISABLED builds") + @threading_helper.requires_working_threading() + def test_enable_thread_local_bytecode(self): + code = """if 1: + import threading + def test(x, y): + return x + y + t = threading.Thread(target=test, args=(1,2)) + t.start() + t.join()""" + # The functionality of thread-local bytecode is tested more extensively + # in test_thread_local_bytecode + assert_python_ok("-W", "always", "-X", "tlbc=1", "-c", code) + assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="1") + + @unittest.skipUnless(support.Py_GIL_DISABLED, + "PYTHON_TLBC and -X tlbc" + " only supported in Py_GIL_DISABLED builds") + def test_invalid_thread_local_bytecode(self): + rc, out, err = assert_python_failure("-X", "tlbc") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure("-X", "tlbc=foo") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure("-X", "tlbc=-1") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure("-X", "tlbc=2") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure(PYTHON_TLBC="foo") + self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err) + rc, out, err = assert_python_failure(PYTHON_TLBC="-1") + self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err) + rc, out, err = assert_python_failure(PYTHON_TLBC="2") + self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err) + @unittest.skipIf(interpreter_requires_environment(), 'Cannot run -I tests when PYTHON env vars are required.') diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 3c6570a..a991c67 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -10,7 +10,8 @@ import sys import types import unittest from test.support import (captured_stdout, requires_debug_ranges, - requires_specialization, cpython_only) + requires_specialization, requires_specialization_ft, + cpython_only) from test.support.bytecode_helper import BytecodeTestCase import opcode @@ -1261,7 +1262,7 @@ class DisTests(DisTestBase): self.do_disassembly_compare(got, dis_load_test_quickened_code) @cpython_only - @requires_specialization + @requires_specialization_ft def test_binary_specialize(self): binary_op_quicken = """\ 0 RESUME_CHECK 0 @@ -1281,6 +1282,9 @@ class DisTests(DisTestBase): got = self.get_disassembly(co_unicode, adaptive=True) self.do_disassembly_compare(got, binary_op_quicken % "BINARY_OP_ADD_UNICODE 0 (+)") + @cpython_only + @requires_specialization + def test_binary_subscr_specialize(self): binary_subscr_quicken = """\ 0 RESUME_CHECK 0 diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 5e886b6..bf861ef 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -644,6 +644,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): CONFIG_COMPAT['run_presite'] = None if support.Py_GIL_DISABLED: CONFIG_COMPAT['enable_gil'] = -1 + CONFIG_COMPAT['tlbc_enabled'] = GET_DEFAULT_CONFIG if MS_WINDOWS: CONFIG_COMPAT.update({ 'legacy_windows_stdio': False, diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index c0862d7..d839893 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1094,7 +1094,14 @@ class SysModuleTest(unittest.TestCase): # While we could imagine a Python session where the number of # multiple buffer objects would exceed the sharing of references, # it is unlikely to happen in a normal test run. - self.assertLess(a, sys.gettotalrefcount()) + # + # In free-threaded builds each code object owns an array of + # pointers to copies of the bytecode. When the number of + # code objects is a large fraction of the total number of + # references, this can cause the total number of allocated + # blocks to exceed the total number of references. + if not support.Py_GIL_DISABLED: + self.assertLess(a, sys.gettotalrefcount()) except AttributeError: # gettotalrefcount() not available pass @@ -1613,7 +1620,10 @@ class SizeofTest(unittest.TestCase): def func(): return sys._getframe() x = func() - INTERPRETER_FRAME = '9PhcP' + if support.Py_GIL_DISABLED: + INTERPRETER_FRAME = '10PhcP' + else: + INTERPRETER_FRAME = '9PhcP' check(x, size('3PiccPP' + INTERPRETER_FRAME + 'P')) # function def func(): pass diff --git a/Lib/test/test_thread_local_bytecode.py b/Lib/test/test_thread_local_bytecode.py new file mode 100644 index 0000000..7a8809c --- /dev/null +++ b/Lib/test/test_thread_local_bytecode.py @@ -0,0 +1,198 @@ +"""Tests for thread-local bytecode.""" +import dis +import textwrap +import unittest + +from test import support +from test.support import cpython_only, import_helper, requires_specialization_ft +from test.support.script_helper import assert_python_ok +from test.support.threading_helper import requires_working_threading + +# Skip this test if the _testinternalcapi module isn't available +_testinternalcapi = import_helper.import_module("_testinternalcapi") + + +@cpython_only +@requires_working_threading() +@unittest.skipUnless(support.Py_GIL_DISABLED, "only in free-threaded builds") +class TLBCTests(unittest.TestCase): + @requires_specialization_ft + def test_new_threads_start_with_unspecialized_code(self): + code = textwrap.dedent(""" + import dis + import queue + import threading + + from _testinternalcapi import get_tlbc + + def all_opnames(bc): + return {i.opname for i in dis._get_instructions_bytes(bc)} + + def f(a, b, q=None): + if q is not None: + q.put(get_tlbc(f)) + return a + b + + for _ in range(100): + # specialize + f(1, 2) + + q = queue.Queue() + t = threading.Thread(target=f, args=('a', 'b', q)) + t.start() + t.join() + + assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f)) + assert "BINARY_OP_ADD_INT" not in all_opnames(q.get()) + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + @requires_specialization_ft + def test_threads_specialize_independently(self): + code = textwrap.dedent(""" + import dis + import queue + import threading + + from _testinternalcapi import get_tlbc + + def all_opnames(bc): + return {i.opname for i in dis._get_instructions_bytes(bc)} + + def f(a, b): + return a + b + + def g(a, b, q=None): + for _ in range(100): + f(a, b) + if q is not None: + q.put(get_tlbc(f)) + + # specialize in main thread + g(1, 2) + + # specialize in other thread + q = queue.Queue() + t = threading.Thread(target=g, args=('a', 'b', q)) + t.start() + t.join() + + assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f)) + t_opnames = all_opnames(q.get()) + assert "BINARY_OP_ADD_INT" not in t_opnames + assert "BINARY_OP_ADD_UNICODE" in t_opnames + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + def test_reuse_tlbc_across_threads_different_lifetimes(self): + code = textwrap.dedent(""" + import queue + import threading + + from _testinternalcapi import get_tlbc_id + + def f(a, b, q=None): + if q is not None: + q.put(get_tlbc_id(f)) + return a + b + + q = queue.Queue() + tlbc_ids = [] + for _ in range(3): + t = threading.Thread(target=f, args=('a', 'b', q)) + t.start() + t.join() + tlbc_ids.append(q.get()) + + assert tlbc_ids[0] == tlbc_ids[1] + assert tlbc_ids[1] == tlbc_ids[2] + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + def test_no_copies_if_tlbc_disabled(self): + code = textwrap.dedent(""" + import queue + import threading + + from _testinternalcapi import get_tlbc_id + + def f(a, b, q=None): + if q is not None: + q.put(get_tlbc_id(f)) + return a + b + + q = queue.Queue() + threads = [] + for _ in range(3): + t = threading.Thread(target=f, args=('a', 'b', q)) + t.start() + threads.append(t) + + tlbc_ids = [] + for t in threads: + t.join() + tlbc_ids.append(q.get()) + + main_tlbc_id = get_tlbc_id(f) + assert main_tlbc_id is not None + assert tlbc_ids[0] == main_tlbc_id + assert tlbc_ids[1] == main_tlbc_id + assert tlbc_ids[2] == main_tlbc_id + """) + assert_python_ok("-X", "tlbc=0", "-c", code) + + def test_no_specialization_if_tlbc_disabled(self): + code = textwrap.dedent(""" + import dis + import queue + import threading + + from _testinternalcapi import get_tlbc + + def all_opnames(f): + bc = get_tlbc(f) + return {i.opname for i in dis._get_instructions_bytes(bc)} + + def f(a, b): + return a + b + + for _ in range(100): + f(1, 2) + + assert "BINARY_OP_ADD_INT" not in all_opnames(f) + """) + assert_python_ok("-X", "tlbc=0", "-c", code) + + def test_generator_throw(self): + code = textwrap.dedent(""" + import queue + import threading + + from _testinternalcapi import get_tlbc_id + + def g(): + try: + yield + except: + yield get_tlbc_id(g) + + def f(q): + gen = g() + next(gen) + q.put(gen.throw(ValueError)) + + q = queue.Queue() + t = threading.Thread(target=f, args=(q,)) + t.start() + t.join() + + gen = g() + next(gen) + main_id = gen.throw(ValueError) + assert main_id != q.get() + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + +if __name__ == "__main__": + unittest.main() |