From c66ffcf8e3ab889a30aae43520aa29c167344bd3 Mon Sep 17 00:00:00 2001 From: "T. Wouters" Date: Tue, 15 Apr 2025 11:39:32 +0200 Subject: gh-129987: Selectively re-enable SLP autovectorization of _PyEval_EvalFrameDefault (#132530) Only disable SLP autovectorization of `_PyEval_EvalFrameDefault` on newer GCCs, as the optimization bug seems to exist only on GCC 12 and later, and before GCC 9 disabling the optimization has a dramatic performance impact. --- Python/ceval.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 278d9e3..e534c7e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -948,11 +948,15 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) #include "generated_cases.c.h" #endif -#if (defined(__GNUC__) && !defined(__clang__)) && defined(__x86_64__) +#if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__) /* - * gh-129987: The SLP autovectorizer can cause poor code generation for opcode - * dispatch, negating any benefit we get from vectorization elsewhere in the - * interpreter loop. + * gh-129987: The SLP autovectorizer can cause poor code generation for + * opcode dispatch in some GCC versions (observed in GCCs 12 through 15, + * probably caused by https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115777), + * negating any benefit we get from vectorization elsewhere in the + * interpreter loop. Disabling it significantly affected older GCC versions + * (prior to GCC 9, 40% performance drop), so we have to selectively disable + * it. */ #define DONT_SLP_VECTORIZE __attribute__((optimize ("no-tree-slp-vectorize"))) #else -- cgit v0.12