summaryrefslogtreecommitdiffstats
path: root/configure
diff options
context:
space:
mode:
authorKevin Modzelewski <kmod@users.noreply.github.com>2022-08-18 21:33:54 (GMT)
committerGitHub <noreply@github.com>2022-08-18 21:33:54 (GMT)
commit214eb2cce5caa99f476ae8abd406077e2c293a3c (patch)
treeb3b6dc69e4b8ccc67bd0fdef9a7a1804a4ddee6f /configure
parent22a95cb5114891e87f6933482dc6eaa00e6a11ad (diff)
downloadcpython-214eb2cce5caa99f476ae8abd406077e2c293a3c.zip
cpython-214eb2cce5caa99f476ae8abd406077e2c293a3c.tar.gz
cpython-214eb2cce5caa99f476ae8abd406077e2c293a3c.tar.bz2
gh-90536: Add support for the BOLT post-link binary optimizer (gh-95908)
* Add support for the BOLT post-link binary optimizer Using [bolt](https://github.com/llvm/llvm-project/tree/main/bolt) provides a fairly large speedup without any code or functionality changes. It provides roughly a 1% speedup on pyperformance, and a 4% improvement on the Pyston web macrobenchmarks. It is gated behind an `--enable-bolt` configure arg because not all toolchains and environments are supported. It has been tested on a Linux x86_64 toolchain, using llvm-bolt built from the LLVM 14.0.6 sources (their binary distribution of this version did not include bolt). Compared to [a previous attempt](https://github.com/faster-cpython/ideas/issues/224), this commit uses bolt's preferred "instrumentation" approach, as well as adds some non-PIE flags which enable much better optimizations from bolt. The effects of this change are a bit more dependent on CPU microarchitecture than other changes, since it optimizes i-cache behavior which seems to be a bit more variable between architectures. The 1%/4% numbers were collected on an Intel Skylake CPU, and on an AMD Zen 3 CPU I got a slightly larger speedup (2%/4%), and on a c6i.xlarge EC2 instance I got a slightly lower speedup (1%/3%). The low speedup on pyperformance is not entirely unexpected, because BOLT improves i-cache behavior, and the benchmarks in the pyperformance suite are small and tend to fit in i-cache. This change uses the existing pgo profiling task (`python -m test --pgo`), though I was able to measure about a 1% macrobenchmark improvement by using the macrobenchmarks as the training task. I personally think that both the PGO and BOLT tasks should be updated to use macrobenchmarks, but for the sake of splitting up the work this PR uses the existing pgo task. * Simplify the build flags * Add a NEWS entry * Update Makefile.pre.in Co-authored-by: Dong-hee Na <donghee.na92@gmail.com> * Update configure.ac Co-authored-by: Dong-hee Na <donghee.na92@gmail.com> * Add myself to ACKS * Add docs * Other review comments * fix tab/space issue * Make it more clear that --enable-bolt is experimental * Add link to bolt's github page Co-authored-by: Dong-hee Na <donghee.na92@gmail.com>
Diffstat (limited to 'configure')
-rwxr-xr-xconfigure261
1 files changed, 261 insertions, 0 deletions
diff --git a/configure b/configure
index 82b55a3..fb3a3c3 100755
--- a/configure
+++ b/configure
@@ -887,6 +887,9 @@ LLVM_PROF_FILE
LLVM_PROF_MERGER
PGO_PROF_USE_FLAG
PGO_PROF_GEN_FLAG
+MERGE_FDATA
+LLVM_BOLT
+PREBOLT_RULE
LLVM_AR_FOUND
LLVM_AR
PROFILE_TASK
@@ -1049,6 +1052,7 @@ enable_pystats
with_assertions
enable_optimizations
with_lto
+enable_bolt
with_address_sanitizer
with_memory_sanitizer
with_undefined_behavior_sanitizer
@@ -1774,6 +1778,8 @@ Optional Features:
--enable-pystats enable internal statistics gathering (default is no)
--enable-optimizations enable expensive, stable optimizations (PGO, etc.)
(default is no)
+ --enable-bolt enable usage of the llvm-bolt post-link optimizer
+ (default is no)
--enable-loadable-sqlite-extensions
support loadable extensions in the sqlite3 module,
see Doc/library/sqlite3.rst (default is no)
@@ -7878,6 +7884,261 @@ $as_echo "$as_me: llvm-ar found via xcrun: ${LLVM_AR}" >&6;}
LDFLAGS_NODIST="$LDFLAGS_NODIST $LTOFLAGS"
fi
+# Enable bolt flags
+Py_BOLT='false'
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --enable-bolt" >&5
+$as_echo_n "checking for --enable-bolt... " >&6; }
+# Check whether --enable-bolt was given.
+if test "${enable_bolt+set}" = set; then :
+ enableval=$enable_bolt;
+if test "$enableval" != no
+then
+ Py_BOLT='true'
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; };
+else
+ Py_BOLT='false'
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; };
+fi
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+
+if test "$Py_BOLT" = 'true' ; then
+ PREBOLT_RULE="${DEF_MAKE_ALL_RULE}"
+ DEF_MAKE_ALL_RULE="bolt-opt"
+ DEF_MAKE_RULE="build_all"
+
+ # These flags are required for bolt to work:
+ CFLAGS_NODIST="$CFLAGS_NODIST -fno-reorder-blocks-and-partition"
+ LDFLAGS_NODIST="$LDFLAGS_NODIST -Wl,--emit-relocs"
+
+ # These flags are required to get good performance from bolt:
+ CFLAGS_NODIST="$CFLAGS_NODIST -fno-pie"
+ # We want to add these no-pie flags to linking executables but not shared libraries:
+ LINKCC="$LINKCC -fno-pie -no-pie"
+ # Designate the DWARF version into 4 since the LLVM-BOLT does not support DWARF5 yet.
+ CFLAGS="$CFLAGS -gdwarf-4"
+ LDFLAGS="$LDFLAGS -gdwarf-4"
+
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}llvm-bolt", so it can be a program name with args.
+set dummy ${ac_tool_prefix}llvm-bolt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_LLVM_BOLT+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $LLVM_BOLT in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_LLVM_BOLT="$LLVM_BOLT" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_LLVM_BOLT="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+LLVM_BOLT=$ac_cv_path_LLVM_BOLT
+if test -n "$LLVM_BOLT"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LLVM_BOLT" >&5
+$as_echo "$LLVM_BOLT" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_LLVM_BOLT"; then
+ ac_pt_LLVM_BOLT=$LLVM_BOLT
+ # Extract the first word of "llvm-bolt", so it can be a program name with args.
+set dummy llvm-bolt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ac_pt_LLVM_BOLT+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $ac_pt_LLVM_BOLT in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_ac_pt_LLVM_BOLT="$ac_pt_LLVM_BOLT" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_ac_pt_LLVM_BOLT="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+ac_pt_LLVM_BOLT=$ac_cv_path_ac_pt_LLVM_BOLT
+if test -n "$ac_pt_LLVM_BOLT"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_LLVM_BOLT" >&5
+$as_echo "$ac_pt_LLVM_BOLT" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_pt_LLVM_BOLT" = x; then
+ LLVM_BOLT="''"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ LLVM_BOLT=$ac_pt_LLVM_BOLT
+ fi
+else
+ LLVM_BOLT="$ac_cv_path_LLVM_BOLT"
+fi
+
+ if test -n "${LLVM_BOLT}" -a -x "${LLVM_BOLT}"
+ then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"Found llvm-bolt\"" >&5
+$as_echo "\"Found llvm-bolt\"" >&6; }
+ else
+ as_fn_error $? "llvm-bolt is required for a --enable-bolt build but could not be found." "$LINENO" 5
+ fi
+
+
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}merge-fdata", so it can be a program name with args.
+set dummy ${ac_tool_prefix}merge-fdata; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_MERGE_FDATA+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $MERGE_FDATA in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_MERGE_FDATA="$MERGE_FDATA" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_MERGE_FDATA="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+MERGE_FDATA=$ac_cv_path_MERGE_FDATA
+if test -n "$MERGE_FDATA"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MERGE_FDATA" >&5
+$as_echo "$MERGE_FDATA" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_MERGE_FDATA"; then
+ ac_pt_MERGE_FDATA=$MERGE_FDATA
+ # Extract the first word of "merge-fdata", so it can be a program name with args.
+set dummy merge-fdata; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ac_pt_MERGE_FDATA+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ case $ac_pt_MERGE_FDATA in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_ac_pt_MERGE_FDATA="$ac_pt_MERGE_FDATA" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in ${llvm_path}
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_path_ac_pt_MERGE_FDATA="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+ac_pt_MERGE_FDATA=$ac_cv_path_ac_pt_MERGE_FDATA
+if test -n "$ac_pt_MERGE_FDATA"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_MERGE_FDATA" >&5
+$as_echo "$ac_pt_MERGE_FDATA" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_pt_MERGE_FDATA" = x; then
+ MERGE_FDATA="''"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ MERGE_FDATA=$ac_pt_MERGE_FDATA
+ fi
+else
+ MERGE_FDATA="$ac_cv_path_MERGE_FDATA"
+fi
+
+ if test -n "${MERGE_FDATA}" -a -x "${MERGE_FDATA}"
+ then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"Found merge-fdata\"" >&5
+$as_echo "\"Found merge-fdata\"" >&6; }
+ else
+ as_fn_error $? "merge-fdata is required for a --enable-bolt build but could not be found." "$LINENO" 5
+ fi
+fi
+
# Enable PGO flags.