summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2024-04-04 15:03:27 (GMT)
committerGitHub <noreply@github.com>2024-04-04 15:03:27 (GMT)
commit060a96f1a9a901b01ed304aa82b886d248ca1cb6 (patch)
treecb3e95ecac1f90440b7d3752c4aad015ea734bf0 /Include
parent63bbe77d9bb2be4db83ed09b96dd22f2a44ef55b (diff)
downloadcpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.zip
cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.gz
cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.bz2
gh-116968: Reimplement Tier 2 counters (#117144)
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``), shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The API used for adaptive specialization counters is changed but the behavior is (supposed to be) identical. The behavior of the Tier 2 counters is changed: - There are no longer dynamic thresholds (we never varied these). - All counters now use the same exponential backoff. - The counter for ``JUMP_BACKWARD`` starts counting down from 16. - The ``temperature`` in side exits starts counting down from 64.
Diffstat (limited to 'Include')
-rw-r--r--Include/cpython/code.h11
-rw-r--r--Include/cpython/optimizer.h15
-rw-r--r--Include/internal/pycore_backoff.h128
-rw-r--r--Include/internal/pycore_code.h64
-rw-r--r--Include/internal/pycore_interp.h6
5 files changed, 167 insertions, 57 deletions
diff --git a/Include/cpython/code.h b/Include/cpython/code.h
index d5dac17..b0e226e 100644
--- a/Include/cpython/code.h
+++ b/Include/cpython/code.h
@@ -24,6 +24,16 @@ typedef struct _Py_GlobalMonitors {
uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS];
} _Py_GlobalMonitors;
+typedef struct {
+ union {
+ struct {
+ uint16_t backoff : 4;
+ uint16_t value : 12;
+ };
+ uint16_t as_counter; // For printf("%#x", ...)
+ };
+} _Py_BackoffCounter;
+
/* Each instruction in a code object is a fixed-width value,
* currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG
* opcode allows for larger values but the current limit is 3 uses
@@ -39,6 +49,7 @@ typedef union {
uint8_t code;
uint8_t arg;
} op;
+ _Py_BackoffCounter counter; // First cache entry of specializable op
} _Py_CODEUNIT;
diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h
index bc960c5..819251a 100644
--- a/Include/cpython/optimizer.h
+++ b/Include/cpython/optimizer.h
@@ -89,7 +89,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
typedef struct _exit_data {
uint32_t target;
- int16_t temperature;
+ _Py_BackoffCounter temperature;
const struct _PyExecutorObject *executor;
} _PyExitData;
@@ -115,11 +115,6 @@ typedef int (*optimize_func)(
struct _PyOptimizerObject {
PyObject_HEAD
optimize_func optimize;
- /* These thresholds are treated as signed so do not exceed INT16_MAX
- * Use INT16_MAX to indicate that the optimizer should never be called */
- uint16_t resume_threshold;
- uint16_t side_threshold;
- uint16_t backedge_threshold;
/* Data needed by the optimizer goes here, but is opaque to the VM */
};
@@ -151,14 +146,6 @@ extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_inval
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
-#define OPTIMIZER_BITS_IN_COUNTER 4
-/* Minimum of 16 additional executions before retry */
-#define MIN_TIER2_BACKOFF 4
-#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
-#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
-/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
-#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX
-
#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h
new file mode 100644
index 0000000..5d93c88
--- /dev/null
+++ b/Include/internal/pycore_backoff.h
@@ -0,0 +1,128 @@
+
+#ifndef Py_INTERNAL_BACKOFF_H
+#define Py_INTERNAL_BACKOFF_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+# error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+/* 16-bit countdown counters using exponential backoff.
+
+ These are used by the adaptive specializer to count down until
+ it is time to specialize an instruction. If specialization fails
+ the counter is reset using exponential backoff.
+
+ Another use is for the Tier 2 optimizer to decide when to create
+ a new Tier 2 trace (executor). Again, exponential backoff is used.
+
+ The 16-bit counter is structured as a 12-bit unsigned 'value'
+ and a 4-bit 'backoff' field. When resetting the counter, the
+ backoff field is incremented (until it reaches a limit) and the
+ value is set to a bit mask representing the value 2**backoff - 1.
+ The maximum backoff is 12 (the number of value bits).
+
+ There is an exceptional value which must not be updated, 0xFFFF.
+*/
+
+#define UNREACHABLE_BACKOFF 0xFFFF
+
+static inline bool
+is_unreachable_backoff_counter(_Py_BackoffCounter counter)
+{
+ return counter.as_counter == UNREACHABLE_BACKOFF;
+}
+
+static inline _Py_BackoffCounter
+make_backoff_counter(uint16_t value, uint16_t backoff)
+{
+ assert(backoff <= 15);
+ assert(value <= 0xFFF);
+ return (_Py_BackoffCounter){.value = value, .backoff = backoff};
+}
+
+static inline _Py_BackoffCounter
+forge_backoff_counter(uint16_t counter)
+{
+ return (_Py_BackoffCounter){.as_counter = counter};
+}
+
+static inline _Py_BackoffCounter
+restart_backoff_counter(_Py_BackoffCounter counter)
+{
+ assert(!is_unreachable_backoff_counter(counter));
+ if (counter.backoff < 12) {
+ return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1);
+ }
+ else {
+ return make_backoff_counter((1 << 12) - 1, 12);
+ }
+}
+
+static inline _Py_BackoffCounter
+pause_backoff_counter(_Py_BackoffCounter counter)
+{
+ return make_backoff_counter(counter.value | 1, counter.backoff);
+}
+
+static inline _Py_BackoffCounter
+advance_backoff_counter(_Py_BackoffCounter counter)
+{
+ if (!is_unreachable_backoff_counter(counter)) {
+ return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff);
+ }
+ else {
+ return counter;
+ }
+}
+
+static inline bool
+backoff_counter_triggers(_Py_BackoffCounter counter)
+{
+ return counter.value == 0;
+}
+
+/* Initial JUMP_BACKWARD counter.
+ * This determines when we create a trace for a loop.
+* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */
+#define JUMP_BACKWARD_INITIAL_VALUE 16
+#define JUMP_BACKWARD_INITIAL_BACKOFF 4
+static inline _Py_BackoffCounter
+initial_jump_backoff_counter(void)
+{
+ return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE,
+ JUMP_BACKWARD_INITIAL_BACKOFF);
+}
+
+/* Initial exit temperature.
+ * Must be larger than ADAPTIVE_COOLDOWN_VALUE,
+ * otherwise when a side exit warms up we may construct
+ * a new trace before the Tier 1 code has properly re-specialized.
+ * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */
+#define COLD_EXIT_INITIAL_VALUE 64
+#define COLD_EXIT_INITIAL_BACKOFF 6
+
+static inline _Py_BackoffCounter
+initial_temperature_backoff_counter(void)
+{
+ return make_backoff_counter(COLD_EXIT_INITIAL_VALUE,
+ COLD_EXIT_INITIAL_BACKOFF);
+}
+
+/* Unreachable backoff counter. */
+static inline _Py_BackoffCounter
+initial_unreachable_backoff_counter(void)
+{
+ return forge_backoff_counter(UNREACHABLE_BACKOFF);
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_BACKOFF_H */
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 6c90c9e..688051b 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -31,7 +31,7 @@ extern "C" {
#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
uint16_t module_keys_version;
uint16_t builtin_keys_version;
uint16_t index;
@@ -40,44 +40,44 @@ typedef struct {
#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PyBinaryOpCache;
#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PyUnpackSequenceCache;
#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
CACHE_ENTRIES(_PyUnpackSequenceCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PyCompareOpCache;
#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PyBinarySubscrCache;
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PySuperAttrCache;
#define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
uint16_t version[2];
uint16_t index;
} _PyAttrCache;
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
uint16_t type_version[2];
union {
uint16_t keys_version[2];
@@ -93,39 +93,39 @@ typedef struct {
#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
uint16_t func_version[2];
} _PyCallCache;
#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PyStoreSubscrCache;
#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PyForIterCache;
#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PySendCache;
#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
uint16_t version[2];
} _PyToBoolCache;
#define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache)
typedef struct {
- uint16_t counter;
+ _Py_BackoffCounter counter;
} _PyContainsOpCache;
#define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache)
@@ -451,18 +451,14 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
/** Counters
* The first 16-bit value in each inline cache is a counter.
- * When counting misses, the counter is treated as a simple unsigned value.
*
* When counting executions until the next specialization attempt,
* exponential backoff is used to reduce the number of specialization failures.
- * The high 12 bits store the counter, the low 4 bits store the backoff exponent.
- * On a specialization failure, the backoff exponent is incremented and the
- * counter set to (2**backoff - 1).
- * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
+ * See pycore_backoff.h for more details.
+ * On a specialization failure, the backoff counter is restarted.
*/
-/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
-#define ADAPTIVE_BACKOFF_BITS 4
+#include "pycore_backoff.h"
// A value of 1 means that we attempt to specialize the *second* time each
// instruction is executed. Executing twice is a much better indicator of
@@ -480,36 +476,30 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
#define ADAPTIVE_COOLDOWN_VALUE 52
#define ADAPTIVE_COOLDOWN_BACKOFF 0
-#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
+// Can't assert this in pycore_backoff.h because of header order dependencies
+static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE,
+ "Cold exit value should be larger than adaptive cooldown value");
-
-static inline uint16_t
+static inline _Py_BackoffCounter
adaptive_counter_bits(uint16_t value, uint16_t backoff) {
- return ((value << ADAPTIVE_BACKOFF_BITS)
- | (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1)));
+ return make_backoff_counter(value, backoff);
}
-static inline uint16_t
+static inline _Py_BackoffCounter
adaptive_counter_warmup(void) {
return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE,
ADAPTIVE_WARMUP_BACKOFF);
}
-static inline uint16_t
+static inline _Py_BackoffCounter
adaptive_counter_cooldown(void) {
return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE,
ADAPTIVE_COOLDOWN_BACKOFF);
}
-static inline uint16_t
-adaptive_counter_backoff(uint16_t counter) {
- uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1);
- backoff++;
- if (backoff > MAX_BACKOFF_VALUE) {
- backoff = MAX_BACKOFF_VALUE;
- }
- uint16_t value = (uint16_t)(1 << backoff) - 1;
- return adaptive_counter_bits(value, backoff);
+static inline _Py_BackoffCounter
+adaptive_counter_backoff(_Py_BackoffCounter counter) {
+ return restart_backoff_counter(counter);
}
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index b8d0fdc..b5cea86 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -239,12 +239,6 @@ struct _is {
_PyOptimizerObject *optimizer;
_PyExecutorObject *executor_list_head;
- /* These two values are shifted and offset to speed up check in JUMP_BACKWARD */
- uint32_t optimizer_resume_threshold;
- uint32_t optimizer_backedge_threshold;
-
- uint16_t optimizer_side_threshold;
-
_rare_events rare_events;
PyDict_WatchCallback builtins_dict_watcher;