summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/using/configure.rst21
-rw-r--r--Doc/whatsnew/3.12.rst4
-rw-r--r--Makefile.pre.in10
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS.d/next/Build/2022-08-12-13-06-03.gh-issue-90536.qMpF6p.rst2
-rwxr-xr-xconfigure261
-rw-r--r--configure.ac53
7 files changed, 351 insertions, 1 deletions
diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst
index 4e50e73..ef770d7 100644
--- a/Doc/using/configure.rst
+++ b/Doc/using/configure.rst
@@ -191,7 +191,8 @@ Performance options
-------------------
Configuring Python using ``--enable-optimizations --with-lto`` (PGO + LTO) is
-recommended for best performance.
+recommended for best performance. The experimental ``--enable-bolt`` flag can
+also be used to improve performance.
.. cmdoption:: --enable-optimizations
@@ -231,6 +232,24 @@ recommended for best performance.
.. versionadded:: 3.11
To use ThinLTO feature, use ``--with-lto=thin`` on Clang.
+.. cmdoption:: --enable-bolt
+
+ Enable usage of the `BOLT post-link binary optimizer
+ <https://github.com/llvm/llvm-project/tree/main/bolt>` (disabled by
+ default).
+
+ BOLT is part of the LLVM project but is not always included in their binary
+ distributions. This flag requires that ``llvm-bolt`` and ``merge-fdata``
+ are available.
+
+ BOLT is still a fairly new project so this flag should be considered
+ experimental for now. Because this tool operates on machine code its success
+ is dependent on a combination of the build environment + the other
+ optimization configure args + the CPU architecture, and not all combinations
+ are supported.
+
+ .. versionadded:: 3.12
+
.. cmdoption:: --with-computed-gotos
Enable computed gotos in evaluation loop (enabled by default on supported
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index 9689d9d..f9fa8ac 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -133,6 +133,10 @@ Optimizations
It reduces object size by 8 or 16 bytes on 64bit platform. (:pep:`623`)
(Contributed by Inada Naoki in :gh:`92536`.)
+* Added experimental support for using the BOLT binary optimizer in the build
+ process, which improves performance by 1-5%.
+ (Contributed by Kevin Modzelewski in :gh:`90536`.)
+
CPython bytecode changes
========================
diff --git a/Makefile.pre.in b/Makefile.pre.in
index c647853..ae7735c 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -640,6 +640,16 @@ profile-opt: profile-run-stamp
-rm -f profile-clean-stamp
$(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)"
+bolt-opt: @PREBOLT_RULE@
+ rm -f *.fdata
+ @LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst
+ ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true
+ @MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata
+ @LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot
+ rm -f *.fdata
+ rm -f $(BUILDPYTHON).bolt_inst
+ mv $(BUILDPYTHON).bolt $(BUILDPYTHON)
+
# Compile and run with gcov
.PHONY=coverage coverage-lcov coverage-report
coverage:
diff --git a/Misc/ACKS b/Misc/ACKS
index c1f570a..16a482e 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1212,6 +1212,7 @@ Gideon Mitchell
Tim Mitchell
Zubin Mithra
Florian Mladitsch
+Kevin Modzelewski
Doug Moen
Jakub Molinski
Juliette Monsel
diff --git a/Misc/NEWS.d/next/Build/2022-08-12-13-06-03.gh-issue-90536.qMpF6p.rst b/Misc/NEWS.d/next/Build/2022-08-12-13-06-03.gh-issue-90536.qMpF6p.rst
new file mode 100644
index 0000000..4605e03
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2022-08-12-13-06-03.gh-issue-90536.qMpF6p.rst
@@ -0,0 +1,2 @@
+Use the BOLT post-link optimizer to improve performance, particularly on
+medium-to-large applications.
diff --git a/configure b/configure
index 82b55a3..fb3a3c3 100755
--- a/configure
+++ b/configure
@@ -887,6 +887,9 @@ LLVM_PROF_FILE
LLVM_PROF_MERGER
PGO_PROF_USE_FLAG
PGO_PROF_GEN_FLAG
+MERGE_FDATA
+LLVM_BOLT
+PREBOLT_RULE
LLVM_AR_FOUND
LLVM_AR
PROFILE_TASK
@@ -1049,6 +1052,7 @@ enable_pystats
with_assertions
enable_optimizations
with_lto
+enable_bolt
with_address_sanitizer
with_memory_sanitizer
with_undefined_behavior_sanitizer
@@ -1774,6 +1778,8 @@ Optional Features:
--enable-pystats enable internal statistics gathering (default is no)
--enable-optimizations enable expensive, stable optimizations (PGO, etc.)
(default is no)
+ --enable-bolt enable usage of the llvm-bolt post-link optimizer
+ (default is no)
--enable-loadable-sqlite-extensions
support loadable extensions in the sqlite3 module,
see Doc/library/sqlite3.rst (default is no)
@@ -7878,6 +7884,261 @@ $as_echo "$as_me: llvm-ar found via xcrun: ${LLVM_AR}" >&6;}
LDFLAGS_NODIST="$LDFLAGS_NODIST $LTOFLAGS"
fi
+# Enable bolt flags
+Py_BOLT='false'
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --enable-bolt" >&5
+$as_echo_n "checking for --enable-bolt... " >&6; }
+# Check whether --enable-bolt was given.
+if test "${enable_bolt+set}" = set; then :
+ enableval=$enable_bolt;
+if test "$enableval" != no
+then
+ Py_BOLT='true'
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; };
+else
+ Py_BOLT='false'
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; };
+fi
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+
+if test "$Py_BOLT" = 'true' ; then
+ PREBOLT_RULE="${DEF_MAKE_ALL_RULE}"
+ DEF_MAKE_ALL_RULE="bolt-opt"
+ DEF_MAKE_RULE="build_all"
+
+ # These flags are required for bolt to work:
+ CFLAGS_NODIST="$CFLAGS_NODIST -fno-reorder-blocks-and-partition"
+ LDFLAGS_NODIST="$LDFLAGS_NODIST -Wl,--emit-relocs"
+
+ # These flags are required to get good performance from bolt:
+ CFLAGS_NODIST="$CFLAGS_NODIST -fno-pie"
+ # We want to add these no-pie flags to linking executables but not shared libraries:
+ LINKCC="$LINKCC -fno-pie -no-pie"
+ # Designate the DWARF version into 4 since the LLVM-BOLT does not support DWARF5 yet.
+ CFLAGS="$CFLAGS -gdwarf-4"
+ LDFLAGS="$LDFLAGS -gdwarf-4"
+
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}llvm-bolt", so it can be a program name with args.
+set dummy ${ac_tool_prefix}llvm-bolt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_LLVM_BOLT+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $LLVM_BOLT in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_LLVM_BOLT="$LLVM_BOLT" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_LLVM_BOLT="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+LLVM_BOLT=$ac_cv_path_LLVM_BOLT
+if test -n "$LLVM_BOLT"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LLVM_BOLT" >&5
+$as_echo "$LLVM_BOLT" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_LLVM_BOLT"; then
+ ac_pt_LLVM_BOLT=$LLVM_BOLT
+ # Extract the first word of "llvm-bolt", so it can be a program name with args.
+set dummy llvm-bolt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ac_pt_LLVM_BOLT+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $ac_pt_LLVM_BOLT in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_ac_pt_LLVM_BOLT="$ac_pt_LLVM_BOLT" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_ac_pt_LLVM_BOLT="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+ac_pt_LLVM_BOLT=$ac_cv_path_ac_pt_LLVM_BOLT
+if test -n "$ac_pt_LLVM_BOLT"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_LLVM_BOLT" >&5
+$as_echo "$ac_pt_LLVM_BOLT" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_pt_LLVM_BOLT" = x; then
+ LLVM_BOLT="''"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ LLVM_BOLT=$ac_pt_LLVM_BOLT
+ fi
+else
+ LLVM_BOLT="$ac_cv_path_LLVM_BOLT"
+fi
+
+ if test -n "${LLVM_BOLT}" -a -x "${LLVM_BOLT}"
+ then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"Found llvm-bolt\"" >&5
+$as_echo "\"Found llvm-bolt\"" >&6; }
+ else
+ as_fn_error $? "llvm-bolt is required for a --enable-bolt build but could not be found." "$LINENO" 5
+ fi
+
+
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}merge-fdata", so it can be a program name with args.
+set dummy ${ac_tool_prefix}merge-fdata; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_MERGE_FDATA+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $MERGE_FDATA in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_MERGE_FDATA="$MERGE_FDATA" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_MERGE_FDATA="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+MERGE_FDATA=$ac_cv_path_MERGE_FDATA
+if test -n "$MERGE_FDATA"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MERGE_FDATA" >&5
+$as_echo "$MERGE_FDATA" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_MERGE_FDATA"; then
+ ac_pt_MERGE_FDATA=$MERGE_FDATA
+ # Extract the first word of "merge-fdata", so it can be a program name with args.
+set dummy merge-fdata; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ac_pt_MERGE_FDATA+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $ac_pt_MERGE_FDATA in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_ac_pt_MERGE_FDATA="$ac_pt_MERGE_FDATA" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_ac_pt_MERGE_FDATA="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+ac_pt_MERGE_FDATA=$ac_cv_path_ac_pt_MERGE_FDATA
+if test -n "$ac_pt_MERGE_FDATA"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_MERGE_FDATA" >&5
+$as_echo "$ac_pt_MERGE_FDATA" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_pt_MERGE_FDATA" = x; then
+ MERGE_FDATA="''"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ MERGE_FDATA=$ac_pt_MERGE_FDATA
+ fi
+else
+ MERGE_FDATA="$ac_cv_path_MERGE_FDATA"
+fi
+
+ if test -n "${MERGE_FDATA}" -a -x "${MERGE_FDATA}"
+ then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"Found merge-fdata\"" >&5
+$as_echo "\"Found merge-fdata\"" >&6; }
+ else
+ as_fn_error $? "merge-fdata is required for a --enable-bolt build but could not be found." "$LINENO" 5
+ fi
+fi
+
# Enable PGO flags.
diff --git a/configure.ac b/configure.ac
index 85d9e80..bab405e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1917,6 +1917,59 @@ if test "$Py_LTO" = 'true' ; then
LDFLAGS_NODIST="$LDFLAGS_NODIST $LTOFLAGS"
fi
+# Enable bolt flags
+Py_BOLT='false'
+AC_MSG_CHECKING(for --enable-bolt)
+AC_ARG_ENABLE(bolt, AS_HELP_STRING(
+ [--enable-bolt],
+ [enable usage of the llvm-bolt post-link optimizer (default is no)]),
+[
+if test "$enableval" != no
+then
+ Py_BOLT='true'
+ AC_MSG_RESULT(yes);
+else
+ Py_BOLT='false'
+ AC_MSG_RESULT(no);
+fi],
+[AC_MSG_RESULT(no)])
+
+AC_SUBST(PREBOLT_RULE)
+if test "$Py_BOLT" = 'true' ; then
+ PREBOLT_RULE="${DEF_MAKE_ALL_RULE}"
+ DEF_MAKE_ALL_RULE="bolt-opt"
+ DEF_MAKE_RULE="build_all"
+
+ # These flags are required for bolt to work:
+ CFLAGS_NODIST="$CFLAGS_NODIST -fno-reorder-blocks-and-partition"
+ LDFLAGS_NODIST="$LDFLAGS_NODIST -Wl,--emit-relocs"
+
+ # These flags are required to get good performance from bolt:
+ CFLAGS_NODIST="$CFLAGS_NODIST -fno-pie"
+ # We want to add these no-pie flags to linking executables but not shared libraries:
+ LINKCC="$LINKCC -fno-pie -no-pie"
+ # Designate the DWARF version into 4 since the LLVM-BOLT does not support DWARF5 yet.
+ CFLAGS="$CFLAGS -gdwarf-4"
+ LDFLAGS="$LDFLAGS -gdwarf-4"
+ AC_SUBST(LLVM_BOLT)
+ AC_PATH_TOOL(LLVM_BOLT, llvm-bolt, '', ${llvm_path})
+ if test -n "${LLVM_BOLT}" -a -x "${LLVM_BOLT}"
+ then
+ AC_MSG_RESULT("Found llvm-bolt")
+ else
+ AC_MSG_ERROR([llvm-bolt is required for a --enable-bolt build but could not be found.])
+ fi
+
+ AC_SUBST(MERGE_FDATA)
+ AC_PATH_TOOL(MERGE_FDATA, merge-fdata, '', ${llvm_path})
+ if test -n "${MERGE_FDATA}" -a -x "${MERGE_FDATA}"
+ then
+ AC_MSG_RESULT("Found merge-fdata")
+ else
+ AC_MSG_ERROR([merge-fdata is required for a --enable-bolt build but could not be found.])
+ fi
+fi
+
# Enable PGO flags.
AC_SUBST(PGO_PROF_GEN_FLAG)
AC_SUBST(PGO_PROF_USE_FLAG)