diff options
author | Victor Stinner <vstinner@python.org> | 2024-06-26 11:54:03 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-26 11:54:03 (GMT) |
commit | 9e4a81f00fef689c6e18a64245aa064eaadc7ac7 (patch) | |
tree | a0e7efaf59d8bb90911830b230909659b1176904 /Include | |
parent | 9e45fd9858a059950f7387b4fda2b00df0e8e537 (diff) | |
download | cpython-9e4a81f00fef689c6e18a64245aa064eaadc7ac7.zip cpython-9e4a81f00fef689c6e18a64245aa064eaadc7ac7.tar.gz cpython-9e4a81f00fef689c6e18a64245aa064eaadc7ac7.tar.bz2 |
gh-120642: Move private PyCode APIs to the internal C API (#120643)
* Move _Py_CODEUNIT and related functions to pycore_code.h.
* Move _Py_BackoffCounter to pycore_backoff.h.
* Move Include/cpython/optimizer.h content to pycore_optimizer.h.
* Remove Include/cpython/optimizer.h.
* Remove PyUnstable_Replace_Executor().
Rename functions:
* PyUnstable_GetExecutor() => _Py_GetExecutor()
* PyUnstable_GetOptimizer() => _Py_GetOptimizer()
* PyUnstable_SetOptimizer() => _Py_SetTier2Optimizer()
* PyUnstable_Optimizer_NewCounter() => _PyOptimizer_NewCounter()
* PyUnstable_Optimizer_NewUOpOptimizer() => _PyOptimizer_NewUOpOptimizer()
Diffstat (limited to 'Include')
-rw-r--r-- | Include/Python.h | 1 | ||||
-rw-r--r-- | Include/cpython/code.h | 52 | ||||
-rw-r--r-- | Include/cpython/optimizer.h | 135 | ||||
-rw-r--r-- | Include/internal/pycore_backoff.h | 12 | ||||
-rw-r--r-- | Include/internal/pycore_code.h | 44 | ||||
-rw-r--r-- | Include/internal/pycore_interp.h | 1 | ||||
-rw-r--r-- | Include/internal/pycore_optimizer.h | 127 |
7 files changed, 182 insertions, 190 deletions
diff --git a/Include/Python.h b/Include/Python.h index d4a254f..8fffa22 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -132,6 +132,5 @@ #include "fileutils.h" #include "cpython/pyfpe.h" #include "cpython/tracemalloc.h" -#include "cpython/optimizer.h" #endif /* !Py_PYTHON_H */ diff --git a/Include/cpython/code.h b/Include/cpython/code.h index ef8f930..07ed520 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -24,58 +24,6 @@ typedef struct _Py_GlobalMonitors { uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS]; } _Py_GlobalMonitors; -typedef struct { - union { - struct { - uint16_t backoff : 4; - uint16_t value : 12; - }; - uint16_t as_counter; // For printf("%#x", ...) - }; -} _Py_BackoffCounter; - -/* Each instruction in a code object is a fixed-width value, - * currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG - * opcode allows for larger values but the current limit is 3 uses - * of EXTENDED_ARG (see Python/compile.c), for a maximum - * 32-bit value. This aligns with the note in Python/compile.c - * (compiler_addop_i_line) indicating that the max oparg value is - * 2**32 - 1, rather than INT_MAX. - */ - -typedef union { - uint16_t cache; - struct { - uint8_t code; - uint8_t arg; - } op; - _Py_BackoffCounter counter; // First cache entry of specializable op -} _Py_CODEUNIT; - - -/* These macros only remain defined for compatibility. */ -#define _Py_OPCODE(word) ((word).op.code) -#define _Py_OPARG(word) ((word).op.arg) - -static inline _Py_CODEUNIT -_py_make_codeunit(uint8_t opcode, uint8_t oparg) -{ - // No designated initialisers because of C++ compat - _Py_CODEUNIT word; - word.op.code = opcode; - word.op.arg = oparg; - return word; -} - -static inline void -_py_set_opcode(_Py_CODEUNIT *word, uint8_t opcode) -{ - word->op.code = opcode; -} - -#define _Py_MAKE_CODEUNIT(opcode, oparg) _py_make_codeunit((opcode), (oparg)) -#define _Py_SET_OPCODE(word, opcode) _py_set_opcode(&(word), (opcode)) - typedef struct { PyObject *_co_code; diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h deleted file mode 100644 index f2093a1..0000000 --- a/Include/cpython/optimizer.h +++ /dev/null @@ -1,135 +0,0 @@ - -#ifndef Py_LIMITED_API -#ifndef Py_OPTIMIZER_H -#define Py_OPTIMIZER_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct _PyExecutorLinkListNode { - struct _PyExecutorObject *next; - struct _PyExecutorObject *previous; -} _PyExecutorLinkListNode; - - -/* Bloom filter with m = 256 - * https://en.wikipedia.org/wiki/Bloom_filter */ -#define _Py_BLOOM_FILTER_WORDS 8 - -typedef struct { - uint32_t bits[_Py_BLOOM_FILTER_WORDS]; -} _PyBloomFilter; - -typedef struct { - uint8_t opcode; - uint8_t oparg; - uint8_t valid; - uint8_t linked; - int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). - _PyBloomFilter bloom; - _PyExecutorLinkListNode links; - PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). -} _PyVMData; - -/* Depending on the format, - * the 32 bits between the oparg and operand are: - * UOP_FORMAT_TARGET: - * uint32_t target; - * UOP_FORMAT_EXIT - * uint16_t exit_index; - * uint16_t error_target; - * UOP_FORMAT_JUMP - * uint16_t jump_target; - * uint16_t error_target; - */ -typedef struct { - uint16_t opcode:14; - uint16_t format:2; - uint16_t oparg; - union { - uint32_t target; - struct { - union { - uint16_t exit_index; - uint16_t jump_target; - }; - uint16_t error_target; - }; - }; - uint64_t operand; // A cache entry -} _PyUOpInstruction; - -typedef struct { - uint32_t target; - _Py_BackoffCounter temperature; - const struct _PyExecutorObject *executor; -} _PyExitData; - -typedef struct _PyExecutorObject { - PyObject_VAR_HEAD - const _PyUOpInstruction *trace; - _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ - uint32_t exit_count; - uint32_t code_size; - size_t jit_size; - void *jit_code; - void *jit_side_entry; - _PyExitData exits[1]; -} _PyExecutorObject; - -typedef struct _PyOptimizerObject _PyOptimizerObject; - -/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ -typedef int (*_Py_optimize_func)( - _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, - _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, - int curr_stackentries); - -struct _PyOptimizerObject { - PyObject_HEAD - _Py_optimize_func optimize; - /* Data needed by the optimizer goes here, but is opaque to the VM */ -}; - -/** Test support **/ -typedef struct { - _PyOptimizerObject base; - int64_t count; -} _PyCounterOptimizerObject; - -PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor); - -_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); - -PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer); - -PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); - -PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); - -void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); -void _Py_ExecutorDetach(_PyExecutorObject *); -void _Py_BloomFilter_Init(_PyBloomFilter *); -void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); -PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); -/* For testing */ -PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); -PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); - -#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 -#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 - -#ifdef _Py_TIER2 -PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); -PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); -#else -# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) -# define _Py_Executors_InvalidateAll(A, B) ((void)0) -#endif - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OPTIMIZER_H */ -#endif /* Py_LIMITED_API */ diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 90735b2..0bcca1e 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -13,6 +13,18 @@ extern "C" { #include <stdbool.h> #include <stdint.h> + +typedef struct { + union { + struct { + uint16_t backoff : 4; + uint16_t value : 12; + }; + uint16_t as_counter; // For printf("%#x", ...) + }; +} _Py_BackoffCounter; + + /* 16-bit countdown counters using exponential backoff. These are used by the adaptive specializer to count down until diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index bcbaf60..48ff270 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -9,6 +9,50 @@ extern "C" { #endif #include "pycore_lock.h" // PyMutex +#include "pycore_backoff.h" // _Py_BackoffCounter + + +/* Each instruction in a code object is a fixed-width value, + * currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG + * opcode allows for larger values but the current limit is 3 uses + * of EXTENDED_ARG (see Python/compile.c), for a maximum + * 32-bit value. This aligns with the note in Python/compile.c + * (compiler_addop_i_line) indicating that the max oparg value is + * 2**32 - 1, rather than INT_MAX. + */ + +typedef union { + uint16_t cache; + struct { + uint8_t code; + uint8_t arg; + } op; + _Py_BackoffCounter counter; // First cache entry of specializable op +} _Py_CODEUNIT; + + +/* These macros only remain defined for compatibility. */ +#define _Py_OPCODE(word) ((word).op.code) +#define _Py_OPARG(word) ((word).op.arg) + +static inline _Py_CODEUNIT +_py_make_codeunit(uint8_t opcode, uint8_t oparg) +{ + // No designated initialisers because of C++ compat + _Py_CODEUNIT word; + word.op.code = opcode; + word.op.arg = oparg; + return word; +} + +static inline void +_py_set_opcode(_Py_CODEUNIT *word, uint8_t opcode) +{ + word->op.code = opcode; +} + +#define _Py_MAKE_CODEUNIT(opcode, oparg) _py_make_codeunit((opcode), (oparg)) +#define _Py_SET_OPCODE(word, opcode) _py_set_opcode(&(word), (opcode)) // We hide some of the newer PyCodeObject fields behind macros. diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b40b63a..4a83862 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -30,6 +30,7 @@ extern "C" { #include "pycore_list.h" // struct _Py_list_state #include "pycore_mimalloc.h" // struct _mimalloc_interp_state #include "pycore_object_state.h" // struct _py_object_state +#include "pycore_optimizer.h" // _PyOptimizerObject #include "pycore_obmalloc.h" // struct _obmalloc_state #include "pycore_qsbr.h" // struct _qsbr_state #include "pycore_tstate.h" // _PyThreadStateImpl diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index fd7833f..04a029d 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -11,12 +11,135 @@ extern "C" { #include "pycore_uop_ids.h" #include <stdbool.h> + +typedef struct _PyExecutorLinkListNode { + struct _PyExecutorObject *next; + struct _PyExecutorObject *previous; +} _PyExecutorLinkListNode; + + +/* Bloom filter with m = 256 + * https://en.wikipedia.org/wiki/Bloom_filter */ +#define _Py_BLOOM_FILTER_WORDS 8 + +typedef struct { + uint32_t bits[_Py_BLOOM_FILTER_WORDS]; +} _PyBloomFilter; + +typedef struct { + uint8_t opcode; + uint8_t oparg; + uint8_t valid; + uint8_t linked; + int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). + _PyBloomFilter bloom; + _PyExecutorLinkListNode links; + PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). +} _PyVMData; + +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_EXIT + * uint16_t exit_index; + * uint16_t error_target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct { + uint16_t opcode:14; + uint16_t format:2; + uint16_t oparg; + union { + uint32_t target; + struct { + union { + uint16_t exit_index; + uint16_t jump_target; + }; + uint16_t error_target; + }; + }; + uint64_t operand; // A cache entry +} _PyUOpInstruction; + +typedef struct { + uint32_t target; + _Py_BackoffCounter temperature; + const struct _PyExecutorObject *executor; +} _PyExitData; + +typedef struct _PyExecutorObject { + PyObject_VAR_HEAD + const _PyUOpInstruction *trace; + _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + uint32_t exit_count; + uint32_t code_size; + size_t jit_size; + void *jit_code; + void *jit_side_entry; + _PyExitData exits[1]; +} _PyExecutorObject; + +typedef struct _PyOptimizerObject _PyOptimizerObject; + +/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ +typedef int (*_Py_optimize_func)( + _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, + _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, + int curr_stackentries); + +struct _PyOptimizerObject { + PyObject_HEAD + _Py_optimize_func optimize; + /* Data needed by the optimizer goes here, but is opaque to the VM */ +}; + +/** Test support **/ +typedef struct { + _PyOptimizerObject base; + int64_t count; +} _PyCounterOptimizerObject; + +_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); + + +// Export for '_opcode' shared extension (JIT compiler). +PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset); + +void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); +void _Py_ExecutorDetach(_PyExecutorObject *); +void _Py_BloomFilter_Init(_PyBloomFilter *); +void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); +PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); + +// For testing +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(_PyOptimizerObject *) _Py_GetOptimizer(void); +PyAPI_FUNC(int) _Py_SetTier2Optimizer(_PyOptimizerObject* optimizer); +PyAPI_FUNC(PyObject *) _PyOptimizer_NewCounter(void); +PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); + +#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 +#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 + +#ifdef _Py_TIER2 +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); +PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +#else +# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) +# define _Py_Executors_InvalidateAll(A, B) ((void)0) +#endif + + // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 800 #define TRACE_STACK_SIZE 5 -int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, +int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); @@ -148,7 +271,7 @@ extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); -PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); +PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); #ifdef __cplusplus } |