diff options
author | Guido van Rossum <guido@python.org> | 2024-04-04 15:03:27 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-04 15:03:27 (GMT) |
commit | 060a96f1a9a901b01ed304aa82b886d248ca1cb6 (patch) | |
tree | cb3e95ecac1f90440b7d3752c4aad015ea734bf0 /Include | |
parent | 63bbe77d9bb2be4db83ed09b96dd22f2a44ef55b (diff) | |
download | cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.zip cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.gz cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.bz2 |
gh-116968: Reimplement Tier 2 counters (#117144)
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
API used for adaptive specialization counters is changed but the behavior is
(supposed to be) identical.
The behavior of the Tier 2 counters is changed:
- There are no longer dynamic thresholds (we never varied these).
- All counters now use the same exponential backoff.
- The counter for ``JUMP_BACKWARD`` starts counting down from 16.
- The ``temperature`` in side exits starts counting down from 64.
Diffstat (limited to 'Include')
-rw-r--r-- | Include/cpython/code.h | 11 | ||||
-rw-r--r-- | Include/cpython/optimizer.h | 15 | ||||
-rw-r--r-- | Include/internal/pycore_backoff.h | 128 | ||||
-rw-r--r-- | Include/internal/pycore_code.h | 64 | ||||
-rw-r--r-- | Include/internal/pycore_interp.h | 6 |
5 files changed, 167 insertions, 57 deletions
diff --git a/Include/cpython/code.h b/Include/cpython/code.h index d5dac17..b0e226e 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -24,6 +24,16 @@ typedef struct _Py_GlobalMonitors { uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS]; } _Py_GlobalMonitors; +typedef struct { + union { + struct { + uint16_t backoff : 4; + uint16_t value : 12; + }; + uint16_t as_counter; // For printf("%#x", ...) + }; +} _Py_BackoffCounter; + /* Each instruction in a code object is a fixed-width value, * currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG * opcode allows for larger values but the current limit is 3 uses @@ -39,6 +49,7 @@ typedef union { uint8_t code; uint8_t arg; } op; + _Py_BackoffCounter counter; // First cache entry of specializable op } _Py_CODEUNIT; diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index bc960c5..819251a 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -89,7 +89,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) typedef struct _exit_data { uint32_t target; - int16_t temperature; + _Py_BackoffCounter temperature; const struct _PyExecutorObject *executor; } _PyExitData; @@ -115,11 +115,6 @@ typedef int (*optimize_func)( struct _PyOptimizerObject { PyObject_HEAD optimize_func optimize; - /* These thresholds are treated as signed so do not exceed INT16_MAX - * Use INT16_MAX to indicate that the optimizer should never be called */ - uint16_t resume_threshold; - uint16_t side_threshold; - uint16_t backedge_threshold; /* Data needed by the optimizer goes here, but is opaque to the VM */ }; @@ -151,14 +146,6 @@ extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_inval PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); -#define OPTIMIZER_BITS_IN_COUNTER 4 -/* Minimum of 16 additional executions before retry */ -#define MIN_TIER2_BACKOFF 4 -#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER) -#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1) -/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */ -#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX - #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h new file mode 100644 index 0000000..5d93c88 --- /dev/null +++ b/Include/internal/pycore_backoff.h @@ -0,0 +1,128 @@ + +#ifndef Py_INTERNAL_BACKOFF_H +#define Py_INTERNAL_BACKOFF_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> + +/* 16-bit countdown counters using exponential backoff. + + These are used by the adaptive specializer to count down until + it is time to specialize an instruction. If specialization fails + the counter is reset using exponential backoff. + + Another use is for the Tier 2 optimizer to decide when to create + a new Tier 2 trace (executor). Again, exponential backoff is used. + + The 16-bit counter is structured as a 12-bit unsigned 'value' + and a 4-bit 'backoff' field. When resetting the counter, the + backoff field is incremented (until it reaches a limit) and the + value is set to a bit mask representing the value 2**backoff - 1. + The maximum backoff is 12 (the number of value bits). + + There is an exceptional value which must not be updated, 0xFFFF. +*/ + +#define UNREACHABLE_BACKOFF 0xFFFF + +static inline bool +is_unreachable_backoff_counter(_Py_BackoffCounter counter) +{ + return counter.as_counter == UNREACHABLE_BACKOFF; +} + +static inline _Py_BackoffCounter +make_backoff_counter(uint16_t value, uint16_t backoff) +{ + assert(backoff <= 15); + assert(value <= 0xFFF); + return (_Py_BackoffCounter){.value = value, .backoff = backoff}; +} + +static inline _Py_BackoffCounter +forge_backoff_counter(uint16_t counter) +{ + return (_Py_BackoffCounter){.as_counter = counter}; +} + +static inline _Py_BackoffCounter +restart_backoff_counter(_Py_BackoffCounter counter) +{ + assert(!is_unreachable_backoff_counter(counter)); + if (counter.backoff < 12) { + return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1); + } + else { + return make_backoff_counter((1 << 12) - 1, 12); + } +} + +static inline _Py_BackoffCounter +pause_backoff_counter(_Py_BackoffCounter counter) +{ + return make_backoff_counter(counter.value | 1, counter.backoff); +} + +static inline _Py_BackoffCounter +advance_backoff_counter(_Py_BackoffCounter counter) +{ + if (!is_unreachable_backoff_counter(counter)) { + return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff); + } + else { + return counter; + } +} + +static inline bool +backoff_counter_triggers(_Py_BackoffCounter counter) +{ + return counter.value == 0; +} + +/* Initial JUMP_BACKWARD counter. + * This determines when we create a trace for a loop. +* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */ +#define JUMP_BACKWARD_INITIAL_VALUE 16 +#define JUMP_BACKWARD_INITIAL_BACKOFF 4 +static inline _Py_BackoffCounter +initial_jump_backoff_counter(void) +{ + return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE, + JUMP_BACKWARD_INITIAL_BACKOFF); +} + +/* Initial exit temperature. + * Must be larger than ADAPTIVE_COOLDOWN_VALUE, + * otherwise when a side exit warms up we may construct + * a new trace before the Tier 1 code has properly re-specialized. + * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */ +#define COLD_EXIT_INITIAL_VALUE 64 +#define COLD_EXIT_INITIAL_BACKOFF 6 + +static inline _Py_BackoffCounter +initial_temperature_backoff_counter(void) +{ + return make_backoff_counter(COLD_EXIT_INITIAL_VALUE, + COLD_EXIT_INITIAL_BACKOFF); +} + +/* Unreachable backoff counter. */ +static inline _Py_BackoffCounter +initial_unreachable_backoff_counter(void) +{ + return forge_backoff_counter(UNREACHABLE_BACKOFF); +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_BACKOFF_H */ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 6c90c9e..688051b 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -31,7 +31,7 @@ extern "C" { #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT)) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t module_keys_version; uint16_t builtin_keys_version; uint16_t index; @@ -40,44 +40,44 @@ typedef struct { #define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyBinaryOpCache; #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyUnpackSequenceCache; #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ CACHE_ENTRIES(_PyUnpackSequenceCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyCompareOpCache; #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyBinarySubscrCache; #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PySuperAttrCache; #define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t version[2]; uint16_t index; } _PyAttrCache; typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t type_version[2]; union { uint16_t keys_version[2]; @@ -93,39 +93,39 @@ typedef struct { #define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t func_version[2]; } _PyCallCache; #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyStoreSubscrCache; #define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyForIterCache; #define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PySendCache; #define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t version[2]; } _PyToBoolCache; #define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyContainsOpCache; #define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache) @@ -451,18 +451,14 @@ write_location_entry_start(uint8_t *ptr, int code, int length) /** Counters * The first 16-bit value in each inline cache is a counter. - * When counting misses, the counter is treated as a simple unsigned value. * * When counting executions until the next specialization attempt, * exponential backoff is used to reduce the number of specialization failures. - * The high 12 bits store the counter, the low 4 bits store the backoff exponent. - * On a specialization failure, the backoff exponent is incremented and the - * counter set to (2**backoff - 1). - * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023. + * See pycore_backoff.h for more details. + * On a specialization failure, the backoff counter is restarted. */ -/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */ -#define ADAPTIVE_BACKOFF_BITS 4 +#include "pycore_backoff.h" // A value of 1 means that we attempt to specialize the *second* time each // instruction is executed. Executing twice is a much better indicator of @@ -480,36 +476,30 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_VALUE 52 #define ADAPTIVE_COOLDOWN_BACKOFF 0 -#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS) +// Can't assert this in pycore_backoff.h because of header order dependencies +static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE, + "Cold exit value should be larger than adaptive cooldown value"); - -static inline uint16_t +static inline _Py_BackoffCounter adaptive_counter_bits(uint16_t value, uint16_t backoff) { - return ((value << ADAPTIVE_BACKOFF_BITS) - | (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1))); + return make_backoff_counter(value, backoff); } -static inline uint16_t +static inline _Py_BackoffCounter adaptive_counter_warmup(void) { return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE, ADAPTIVE_WARMUP_BACKOFF); } -static inline uint16_t +static inline _Py_BackoffCounter adaptive_counter_cooldown(void) { return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE, ADAPTIVE_COOLDOWN_BACKOFF); } -static inline uint16_t -adaptive_counter_backoff(uint16_t counter) { - uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1); - backoff++; - if (backoff > MAX_BACKOFF_VALUE) { - backoff = MAX_BACKOFF_VALUE; - } - uint16_t value = (uint16_t)(1 << backoff) - 1; - return adaptive_counter_bits(value, backoff); +static inline _Py_BackoffCounter +adaptive_counter_backoff(_Py_BackoffCounter counter) { + return restart_backoff_counter(counter); } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b8d0fdc..b5cea86 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -239,12 +239,6 @@ struct _is { _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - /* These two values are shifted and offset to speed up check in JUMP_BACKWARD */ - uint32_t optimizer_resume_threshold; - uint32_t optimizer_backedge_threshold; - - uint16_t optimizer_side_threshold; - _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; |