summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore5
-rw-r--r--Doc/using/configure.rst7
-rw-r--r--Makefile.pre.in65
-rw-r--r--Misc/NEWS.d/next/Build/2023-05-20-16-09-59.gh-issue-101282.FvRARb.rst4
-rwxr-xr-xconfigure147
-rw-r--r--configure.ac55
6 files changed, 153 insertions, 130 deletions
diff --git a/.gitignore b/.gitignore
index d9c4a79..ef7642b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,10 @@
*.gc??
*.profclang?
*.profraw
+# Copies of binaries before BOLT optimizations.
+*.prebolt
+# BOLT profile data.
+*.fdata
*.dyn
.gdb_history
.purify
@@ -124,6 +128,7 @@ Tools/unicode/data/
/platform
/profile-clean-stamp
/profile-run-stamp
+/profile-bolt-stamp
/Python/deepfreeze/*.c
/pybuilddir.txt
/pyconfig.h
diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst
index ce858ab..fbe280d 100644
--- a/Doc/using/configure.rst
+++ b/Doc/using/configure.rst
@@ -314,6 +314,13 @@ also be used to improve performance.
is dependent on a combination of the build environment + the other
optimization configure args + the CPU architecture, and not all combinations
are supported.
+ BOLT versions before LLVM 16 are known to crash BOLT under some scenarios.
+ Use of LLVM 16 or newer for BOLT optimization is strongly encouraged.
+
+ The :envvar:`!BOLT_INSTRUMENT_FLAGS` and :envvar:`!BOLT_APPLY_FLAGS`
+ :program:`configure` variables can be defined to override the default set of
+ arguments for :program:`llvm-bolt` to instrument and apply BOLT data to
+ binaries, respectively.
.. versionadded:: 3.12
diff --git a/Makefile.pre.in b/Makefile.pre.in
index da3a8f6..eb79c9c 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -672,21 +672,55 @@ profile-opt: profile-run-stamp
-rm -f profile-clean-stamp
$(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)"
-.PHONY: bolt-opt
-bolt-opt: @PREBOLT_RULE@
+# List of binaries that BOLT runs on.
+BOLT_BINARIES := @BOLT_BINARIES@
+
+BOLT_INSTRUMENT_FLAGS := @BOLT_INSTRUMENT_FLAGS@
+BOLT_APPLY_FLAGS := @BOLT_APPLY_FLAGS@
+
+.PHONY: clean-bolt
+clean-bolt:
+ # Profile data.
rm -f *.fdata
- @if $(READELF) -p .note.bolt_info $(BUILDPYTHON) | grep BOLT > /dev/null; then\
- echo "skip: $(BUILDPYTHON) is already BOLTed."; \
- else \
- @LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst; \
- ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true; \
- @MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata; \
- @LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot; \
- rm -f *.fdata; \
- rm -f $(BUILDPYTHON).bolt_inst; \
- mv $(BUILDPYTHON).bolt $(BUILDPYTHON); \
- fi
+ # Pristine binaries before BOLT optimization.
+ rm -f *.prebolt
+ # BOLT instrumented binaries.
+ rm -f *.bolt_inst
+
+profile-bolt-stamp: $(BUILDPYTHON)
+ # Ensure a pristine, pre-BOLT copy of the binary and no profile data from last run.
+ for bin in $(BOLT_BINARIES); do \
+ prebolt="$${bin}.prebolt"; \
+ if [ -e "$${prebolt}" ]; then \
+ echo "Restoring pre-BOLT binary $${prebolt}"; \
+ mv "$${bin}.prebolt" "$${bin}"; \
+ fi; \
+ cp "$${bin}" "$${prebolt}"; \
+ rm -f $${bin}.bolt.*.fdata $${bin}.fdata; \
+ done
+ # Instrument each binary.
+ for bin in $(BOLT_BINARIES); do \
+ @LLVM_BOLT@ "$${bin}" -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $${bin}.bolt) -o $${bin}.bolt_inst $(BOLT_INSTRUMENT_FLAGS); \
+ mv "$${bin}.bolt_inst" "$${bin}"; \
+ done
+ # Run instrumented binaries to collect data.
+ $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
+ # Merge all the data files together.
+ for bin in $(BOLT_BINARIES); do \
+ @MERGE_FDATA@ $${bin}.*.fdata > "$${bin}.fdata"; \
+ rm -f $${bin}.*.fdata; \
+ done
+ # Run bolt against the merged data to produce an optimized binary.
+ for bin in $(BOLT_BINARIES); do \
+ @LLVM_BOLT@ "$${bin}.prebolt" -o "$${bin}.bolt" -data="$${bin}.fdata" $(BOLT_APPLY_FLAGS); \
+ mv "$${bin}.bolt" "$${bin}"; \
+ done
+ touch $@
+.PHONY: bolt-opt
+bolt-opt:
+ $(MAKE) @PREBOLT_RULE@
+ $(MAKE) profile-bolt-stamp
# Compile and run with gcov
.PHONY: coverage
@@ -2623,10 +2657,11 @@ profile-removal:
rm -f $(COVERAGE_INFO)
rm -rf $(COVERAGE_REPORT)
rm -f profile-run-stamp
+ rm -f profile-bolt-stamp
.PHONY: clean
-clean: clean-retain-profile
- @if test @DEF_MAKE_ALL_RULE@ = profile-opt; then \
+clean: clean-retain-profile clean-bolt
+ @if test @DEF_MAKE_ALL_RULE@ = profile-opt -o @DEF_MAKE_ALL_RULE@ = bolt-opt; then \
rm -f profile-gen-stamp profile-clean-stamp; \
$(MAKE) profile-removal; \
fi
diff --git a/Misc/NEWS.d/next/Build/2023-05-20-16-09-59.gh-issue-101282.FvRARb.rst b/Misc/NEWS.d/next/Build/2023-05-20-16-09-59.gh-issue-101282.FvRARb.rst
new file mode 100644
index 0000000..cc70d47
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2023-05-20-16-09-59.gh-issue-101282.FvRARb.rst
@@ -0,0 +1,4 @@
+BOLT optimization is now applied to the libpython shared library if building
+a shared library. BOLT instrumentation and application settings can now be
+influenced via the ``BOLT_INSTRUMENT_FLAGS`` and ``BOLT_APPLY_FLAGS``
+configure variables.
diff --git a/configure b/configure
index 7aad4fe..2b863be 100755
--- a/configure
+++ b/configure
@@ -883,10 +883,11 @@ CFLAGS_NODIST
BASECFLAGS
CFLAGS_ALIASING
OPT
+BOLT_APPLY_FLAGS
+BOLT_INSTRUMENT_FLAGS
+BOLT_BINARIES
MERGE_FDATA
LLVM_BOLT
-ac_ct_READELF
-READELF
PREBOLT_RULE
LLVM_PROF_FOUND
LLVM_PROFDATA
@@ -1105,6 +1106,8 @@ CPPFLAGS
CPP
HOSTRUNNER
PROFILE_TASK
+BOLT_INSTRUMENT_FLAGS
+BOLT_APPLY_FLAGS
LIBUUID_CFLAGS
LIBUUID_LIBS
LIBFFI_CFLAGS
@@ -1916,6 +1919,10 @@ Some influential environment variables:
HOSTRUNNER Program to run CPython for the host platform
PROFILE_TASK
Python args for PGO generation task
+ BOLT_INSTRUMENT_FLAGS
+ Arguments to llvm-bolt when instrumenting binaries
+ BOLT_APPLY_FLAGS
+ Arguments to llvm-bolt when creating a BOLT optimized binary
LIBUUID_CFLAGS
C compiler flags for LIBUUID, overriding pkg-config
LIBUUID_LIBS
@@ -8106,112 +8113,6 @@ if test "$Py_BOLT" = 'true' ; then
DEF_MAKE_ALL_RULE="bolt-opt"
DEF_MAKE_RULE="build_all"
-
- if test -n "$ac_tool_prefix"; then
- for ac_prog in readelf
- do
- # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
-set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_READELF+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$READELF"; then
- ac_cv_prog_READELF="$READELF" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_READELF="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-READELF=$ac_cv_prog_READELF
-if test -n "$READELF"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $READELF" >&5
-$as_echo "$READELF" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$READELF" && break
- done
-fi
-if test -z "$READELF"; then
- ac_ct_READELF=$READELF
- for ac_prog in readelf
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_READELF+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_READELF"; then
- ac_cv_prog_ac_ct_READELF="$ac_ct_READELF" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_READELF="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_READELF=$ac_cv_prog_ac_ct_READELF
-if test -n "$ac_ct_READELF"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_READELF" >&5
-$as_echo "$ac_ct_READELF" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$ac_ct_READELF" && break
-done
-
- if test "x$ac_ct_READELF" = x; then
- READELF=""notfound""
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- READELF=$ac_ct_READELF
- fi
-fi
-
- if test "$READELF" == "notfound"
- then
- as_fn_error $? "readelf is required for a --enable-bolt build but could not be found." "$LINENO" 5
- fi
-
# -fno-reorder-blocks-and-partition is required for bolt to work.
# Possibly GCC only.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-reorder-blocks-and-partition" >&5
@@ -8474,6 +8375,36 @@ $as_echo "\"Found merge-fdata\"" >&6; }
fi
fi
+
+BOLT_BINARIES='$(BUILDPYTHON)'
+if test "x$enable_shared" = xyes; then :
+
+ BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)"
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_INSTRUMENT_FLAGS" >&5
+$as_echo_n "checking BOLT_INSTRUMENT_FLAGS... " >&6; }
+if test -z "${BOLT_INSTRUMENT_FLAGS}"
+then
+ BOLT_INSTRUMENT_FLAGS=
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_INSTRUMENT_FLAGS" >&5
+$as_echo "$BOLT_INSTRUMENT_FLAGS" >&6; }
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_APPLY_FLAGS" >&5
+$as_echo_n "checking BOLT_APPLY_FLAGS... " >&6; }
+if test -z "${BOLT_APPLY_FLAGS}"
+then
+ BOLT_APPLY_FLAGS=-update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_APPLY_FLAGS" >&5
+$as_echo "$BOLT_APPLY_FLAGS" >&6; }
+
# XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be
# merged with this chunk of code?
diff --git a/configure.ac b/configure.ac
index 115998e..786d341 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2028,13 +2028,6 @@ if test "$Py_BOLT" = 'true' ; then
DEF_MAKE_ALL_RULE="bolt-opt"
DEF_MAKE_RULE="build_all"
- AC_SUBST(READELF)
- AC_CHECK_TOOLS(READELF, [readelf], "notfound")
- if test "$READELF" == "notfound"
- then
- AC_MSG_ERROR([readelf is required for a --enable-bolt build but could not be found.])
- fi
-
# -fno-reorder-blocks-and-partition is required for bolt to work.
# Possibly GCC only.
AX_CHECK_COMPILE_FLAG([-fno-reorder-blocks-and-partition],[
@@ -2067,6 +2060,54 @@ if test "$Py_BOLT" = 'true' ; then
fi
fi
+dnl Enable BOLT of libpython if built.
+AC_SUBST(BOLT_BINARIES)
+BOLT_BINARIES='$(BUILDPYTHON)'
+AS_VAR_IF([enable_shared], [yes], [
+ BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)"
+])
+
+AC_ARG_VAR(
+ [BOLT_INSTRUMENT_FLAGS],
+ [Arguments to llvm-bolt when instrumenting binaries]
+)
+AC_MSG_CHECKING([BOLT_INSTRUMENT_FLAGS])
+if test -z "${BOLT_INSTRUMENT_FLAGS}"
+then
+ BOLT_INSTRUMENT_FLAGS=
+fi
+AC_MSG_RESULT([$BOLT_INSTRUMENT_FLAGS])
+
+AC_ARG_VAR(
+ [BOLT_APPLY_FLAGS],
+ [Arguments to llvm-bolt when creating a BOLT optimized binary]
+)
+AC_MSG_CHECKING([BOLT_APPLY_FLAGS])
+if test -z "${BOLT_APPLY_FLAGS}"
+then
+ AS_VAR_SET(
+ [BOLT_APPLY_FLAGS],
+ [m4_join([ ],
+ [-update-debug-sections],
+ [-reorder-blocks=ext-tsp],
+ [-reorder-functions=hfsort+],
+ [-split-functions],
+ [-icf=1],
+ [-inline-all],
+ [-split-eh],
+ [-reorder-functions-use-hot-size],
+ [-peepholes=none],
+ [-jump-tables=aggressive],
+ [-inline-ap],
+ [-indirect-call-promotion=all],
+ [-dyno-stats],
+ [-use-gnu-stack],
+ [-frame-opt=hot]
+ )]
+ )
+fi
+AC_MSG_RESULT([$BOLT_APPLY_FLAGS])
+
# XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be
# merged with this chunk of code?