From 8eb0b506c637d6a26fdb5651b31df3c2135f82f7 Mon Sep 17 00:00:00 2001
From: Gregory Szorc <gregory.szorc@gmail.com>
Date: Mon, 16 Jan 2023 09:50:57 -0800
Subject: [PATCH 1/6] gh-104523: inline minimal PGO rules

Various rules were only ever invoked once and had minimal bodies.
I don't see a benefit to the indirection.

So this commit inlines rules to simplify the PGO logic.

skip news
---
 Makefile.pre.in | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 7c44b7be5dbe67..2f9f4a1c5870ca 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -642,7 +642,7 @@ profile-gen-stamp: profile-clean-stamp
 		exit 1;\
 	fi
 	@echo "Building with support for profile generation:"
-	$(MAKE) build_all_generate_profile
+	$(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)"
 	touch $@
 
 # Run task with profile generation build to create profile information.
@@ -652,8 +652,9 @@ profile-run-stamp:
 	# enabled.
 	$(MAKE) profile-gen-stamp
 	# Next, run the profile task to generate the profile information.
-	$(MAKE) run_profile_task
-	$(MAKE) build_all_merge_profile
+	@ # FIXME: can't run for a cross build
+	$(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
+	$(LLVM_PROF_MERGER)
 	# Remove profile generation binary since we are done with it.
 	$(MAKE) clean-retain-profile
 	# This is an expensive target to build and it does not have proper
@@ -661,19 +662,6 @@ profile-run-stamp:
 	# to record its completion and avoid re-running it.
 	touch $@
 
-.PHONY: build_all_generate_profile
-build_all_generate_profile:
-	$(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)"
-
-.PHONY: run_profile_task
-run_profile_task:
-	@ # FIXME: can't run for a cross build
-	$(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
-
-.PHONY: build_all_merge_profile
-build_all_merge_profile:
-	$(LLVM_PROF_MERGER)
-
 # Compile Python binary with profile guided optimization.
 # To force re-running of the profile task, remove the profile-run-stamp file.
 .PHONY: profile-opt

From f215844ca3971c8fd1ec46a4c294f940d69543c9 Mon Sep 17 00:00:00 2001
From: Gregory Szorc <gregory.szorc@gmail.com>
Date: Mon, 16 Jan 2023 10:20:49 -0800
Subject: [PATCH 2/6] gh-xxxx: overhaul build rules for optimized binaries

This commit overhauls the make-based build system's rules for
building optimized binaries. Along the way it fixes a myriad of
bugs and shortcomings with the prior approach.

The old way of producing optimized binaries had various limitations:

* `make [all]` would do work when PGO was enabled because the phony
  `profile-opt` rule was non-empty. This prevented no-op PGO builds
   from working at all. This meant workflows like `make; make install`
   either incurred extra work or failed due to race conditions.
* Same thing for BOLT, as its `bolt-opt` rule was also non-empty
  and always ran during `make [all]`.
* BOLT could not be run multiple times without a full rebuild because
  `llvm-bolt` can't instrument binaries that have already received
  BOLT optimizations.
* It was difficult to run BOLT on its own because of how various make
  targets and their dependencies were structured.
* I found the old way that configure and make communicated the default
  targets to be confusing and hard to understand.

There are essentially 2 major changes going on in this commit:

1. A rework of the high-level make targets for performing a build and
   how they are defined.
2. A rework of all the make logic related to profile-based optimization
   (read: PGO and BOLT).

Build Target Rework
======================

Before, we essentially had `build_all`, `profile-opt`, `bolt-opt` and
`build_wasm` as our 3 targets for performing a build. `all` would alias
to one of these, as appropriate.

And there was another definition for which _simple_ make target to
evaluate for non-optimized builds. This was likely `build_all` or
`all`.

In the rework, we introduce 2 new high-level targets:

* `build-plain` - Perform a build without optimizations.
* `build-optimized` - Perform a build with optimizations.

`build-plain` is aliased to `build_all` in all configurations except
WASM, where it is `build_wasm`.

`build-optimized` by default is aliased to a target that prints an error
message when optimizations aren't enabled. If PGO or BOLT are enabled,
it is aliased to their respective target.

`build-optimized` is the logical successor to `profile-opt`.

I felt it best to delete `profile-opt` completely, as the new `build-*`
high-level targets feel more friendly to use. But if people lament its
loss, we can add a `profile-opt: build-optimized` to achieve almost the
same result.

Profiled-Based Optimization Rework
==================================

Most of the make logic related to profile-based optimization (read: PGO
and BOLT) has been touched in this change.

A major issue with the old way of doing things was we used phony,
always-executed make rules. This is a bad practice in make because it
undermines no-op builds.

Another issue is that the separation between the rules and what order
they ran in wasn't always clear. Both PGO and BOLT consist of the same
4 phase solution: instrument, run, analyze, and apply. However, these
steps weren't clearly expressed in the make logic. This is especially
true for BOLT, which only had 1 make rule.

Another issue with BOLT is that it was really easy to get things into
a bad state. e.g. if you applied BOLT to `pythonX.Y` you could not
run BOLT again unless you rebuilt `pythonX.Y` from source.

In the new world, we have separate `profile-<tool>-<stage>-stamp`
rules defining the 4 distinct `instrument`, `run`, `analyze`, and
`apply` stages for both PGO and BOLT. Each of these stages is tracked
by a _stamp_ semaphore file so progress can be captured. This should
all be pretty straightforward.

There is some minimal complexity here to handle BOLT's optional
dependency on PGO, as BOLT either depends on `build_all` or
`profile-pgo-apply-stamp`.

As part of the refactor to BOLT we also preserve the original input
binary before BOLT is applied. This original file is restored if
BOLT runs again. This greatly simplifies repeated BOLT invocations,
as make doesn't perform needless work. However, this is all best
effort, as it is possible for some make target evaluations to still
get things in a bad state.

Other Remarks
=============

If this change perturbs any bugs, they are likely around cleaning
behavior. The cleaning rules are a bit complicated and not clearly
documented. And I'm unsure which targets CPython developers often
iterate on. It is highly possible that state cleanup of PGO and/or
BOLT files isn't as robust as it needs to be.

I explicitly deleted some calls to PGO cleanup because those calls
prevented no-op `make [all]` from working. It is certainly possible
something somewhere (release automation?) relied on these files being
deleted when they no longer are. We still have targets to purge profile
files and it should be trivial to add these to appropriate make rules.
---
 .gitignore      |   3 +-
 Makefile.pre.in | 178 ++++++++++++++++++++++++++++++++++--------------
 configure       |  60 ++++++++++------
 configure.ac    |  58 ++++++++++------
 4 files changed, 204 insertions(+), 95 deletions(-)

diff --git a/.gitignore b/.gitignore
index d9c4a7972f076d..547e6746e1d91b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,8 +122,7 @@ Tools/unicode/data/
 # hendrikmuhs/ccache-action@v1
 /.ccache
 /platform
-/profile-clean-stamp
-/profile-run-stamp
+/profile-*-stamp
 /Python/deepfreeze/*.c
 /pybuilddir.txt
 /pyconfig.h
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 2f9f4a1c5870ca..9d2aaaae7fcdf6 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -601,13 +601,27 @@ LIBHACL_SHA2_HEADERS= \
 #########################################################################
 # Rules
 
-# Default target
-all:		@DEF_MAKE_ALL_RULE@
+# Default target.
+# Likely either `build-plain` or `build-optimized`.
+all: @MAKE_TARGET_ALL@
 
 # First target in Makefile is implicit default. So .PHONY needs to come after
 # all.
 .PHONY: all
 
+# Build without any optimizations or instrumented binaries.
+.PHONY: build-plain
+build-plain: @MAKE_TARGET_BUILD_PLAIN@
+
+# Build with optimizations (PGO, BOLT, etc).
+.PHONY: build-optimized
+build-optimized: @MAKE_TARGET_BUILD_OPTIMIZED@
+
+.PHONY: build-optimized-not-enabled
+build-optimized-not-enabled:
+	@echo "build-optimized requires --enable-optimizations in configure; aborting"
+	@exit 1
+
 .PHONY: build_all
 build_all:	check-clean-src $(BUILDPYTHON) platform sharedmods \
 		gdbhooks Programs/_testembed scripts checksharedmods rundsymutil
@@ -629,69 +643,145 @@ check-clean-src:
 		exit 1; \
 	fi
 
-# Profile generation build must start from a clean tree.
+# Profile-based optimization.
+#
+# PGO and BOLT profile-based optimization is supported. For each optimization,
+# roughly the following steps are done:
+#
+# 1. "Instrument" binaries with run-time data collection (e.g. build or modify
+#    a variant of the binary.)
+# 2. "Run" instrumented binaries (via subset of test suite) to collect data.
+# 3. "Analyze" / collect / merge data files from previous step.
+# 4. "Apply" collected data from above. (e.g. rebuild or modify a binary).
+#
+# 0, 1, or multiple profile based optimizations can be enabled.
+#
+# We track the progress of profile-based optimization using various "stamp"
+# files. An empty stamp file tracks the stage of optimization we're in.
+# Each *-stamp rule that follows is defined in execution / dependency order.
+
+# Remove files produced by or used for tracking profile-guided optimization.
+.PHONY: profile-remove
+profile-remove: clean-bolt
+	find . -name '*.gc??' -exec rm -f {} ';'
+	find . -name '*.profclang?' -exec rm -f {} ';'
+	find . -name '*.dyn' -exec rm -f {} ';'
+	rm -f $(COVERAGE_INFO)
+	rm -rf $(COVERAGE_REPORT)
+	# Remove all progress tracking stamps to ensure a clean slate.
+	rm -f profile-*-stamp
+
+# Profile-based optimization requires a fresh build environment.
 profile-clean-stamp:
-	$(MAKE) clean
+	$(MAKE) clean profile-remove
 	touch $@
 
-# Compile with profile generation enabled.
-profile-gen-stamp: profile-clean-stamp
+# Build with PGO instrumentation enabled.
+profile-pgo-instrument-stamp: profile-clean-stamp
 	@if [ $(LLVM_PROF_ERR) = yes ]; then \
 		echo "Error: Cannot perform PGO build because llvm-profdata was not found in PATH" ;\
 		echo "Please add it to PATH and run ./configure again" ;\
 		exit 1;\
 	fi
 	@echo "Building with support for profile generation:"
-	$(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)"
+	$(MAKE) @MAKE_TARGET_BUILD_PLAIN@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)"
 	touch $@
 
-# Run task with profile generation build to create profile information.
-profile-run-stamp:
+# Run PGO instrumented binaries and collect profile data.
+profile-pgo-run-stamp: profile-pgo-instrument-stamp
 	@echo "Running code to generate profile data (this can take a while):"
-	# First, we need to create a clean build with profile generation
-	# enabled.
-	$(MAKE) profile-gen-stamp
-	# Next, run the profile task to generate the profile information.
 	@ # FIXME: can't run for a cross build
 	$(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
+	touch $@
+
+# Collect data files produced by running PGO instrumented binaries.
+profile-pgo-analyze-stamp: profile-pgo-run-stamp
 	$(LLVM_PROF_MERGER)
 	# Remove profile generation binary since we are done with it.
 	$(MAKE) clean-retain-profile
-	# This is an expensive target to build and it does not have proper
-	# makefile dependency information.  So, we create a "stamp" file
-	# to record its completion and avoid re-running it.
 	touch $@
 
-# Compile Python binary with profile guided optimization.
-# To force re-running of the profile task, remove the profile-run-stamp file.
-.PHONY: profile-opt
-profile-opt: profile-run-stamp
+# Use collected PGO data to influence rebuild of binaries.
+profile-pgo-apply-stamp: profile-pgo-analyze-stamp
 	@echo "Rebuilding with profile guided optimizations:"
-	-rm -f profile-clean-stamp
-	$(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)"
-
-.PHONY: bolt-opt
-bolt-opt: @PREBOLT_RULE@
-	rm -f *.fdata
-	@if $(READELF) -p .note.bolt_info $(BUILDPYTHON) | grep BOLT > /dev/null; then\
-		echo "skip: $(BUILDPYTHON) is already BOLTed."; \
-	else \
-		@LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst; \
-		./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true; \
-		@MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata; \
-		@LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot; \
-		rm -f *.fdata; \
-		rm -f $(BUILDPYTHON).bolt_inst; \
-		mv $(BUILDPYTHON).bolt $(BUILDPYTHON); \
+	# Need to purge PGO instrumented build to force a rebuild.
+	$(MAKE) clean-retain-profile
+	$(MAKE) @MAKE_TARGET_BUILD_PLAIN@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)"
+	touch $@
+
+# BOLT supports instrumenting and applying changes to standalone binaries
+# without having to recompile.
+#
+# BOLT can run independently or in addition to PGO. If running with PGO,
+# it always runs after PGO. Care needs to be taken to preserve PGO state
+# when running BOLT so make doesn't re-apply PGO.
+#
+# BOLT also can't instrument binaries that have already had BOLT applied
+# to them. So we make an attempt to preserve and re-use the pristine
+# pre-BOLT binaries so developers can iterate on just BOLT optimization
+# passes.
+
+# List of binaries that BOLT runs on.
+BOLT_BINARIES = $(BUILDPYTHON)
+
+# Remove traces of bolt.
+.PHONY: clean-bolt
+clean-bolt:
+	# Instrumented binaries.
+	find . -name '*.bolt_inst' -exec rm -f {} ';'
+	# The data files they produce.
+	find . -name '*.fdata' -exec rm -f {} ';'
+	# Copied of binaries before BOLT application.
+	find . -name '*.prebolt' -exec rm -f {} ';'
+
+# BOLTs dependencies are a bit wonky.
+#
+# If PGO is enabled, we can take a native rule dependency on a stamp file.
+# If PGO isn't enabled, we don't have a stamp to key off of and the phony
+# target (e.g. build_all) will always force rebuilds. So we call out to
+# make externally to sidestep the dependency.
+#
+# We can simplify this hack if we ever get stamp files for plain builds.
+profile-bolt-prebuild-stamp: @MAKE_BOLT_NATIVE_DEPENDENCY@
+	if [ -n "@MAKE_BOLT_MAKE_DEPENDENCY@" ]; then \
+	    $(MAKE) @MAKE_BOLT_MAKE_DEPENDENCY@; \
 	fi
+	touch $@
 
+profile-bolt-instrument-stamp: profile-bolt-prebuild-stamp
+	for bin in $(BOLT_BINARIES); do \
+	    if [ -e "$${bin}.prebolt" ]; then \
+	        echo "Restoring pre-BOLT binary $${bin}.prebolt"; \
+	        mv "$${bin}.prebolt" "$${bin}"; \
+	    fi \
+	done
+	# Ensure prior BOLT state is purged.
+	$(MAKE) clean-bolt
+	@LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst
+	touch $@
+
+profile-bolt-run-stamp: profile-bolt-instrument-stamp
+	./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true
+	touch $@
+
+profile-bolt-analyze-stamp: profile-bolt-run-stamp
+	@MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata
+	touch $@
+
+profile-bolt-apply-stamp: profile-bolt-analyze-stamp
+	@LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot
+	mv $(BUILDPYTHON) $(BUILDPYTHON).prebolt
+	mv $(BUILDPYTHON).bolt $(BUILDPYTHON)
+	touch $@
+
+# End of profile-based optimization rules.
 
 # Compile and run with gcov
 .PHONY: coverage
 coverage:
 	@echo "Building with support for coverage checking:"
 	$(MAKE) clean
-	$(MAKE) @DEF_MAKE_RULE@ CFLAGS="$(CFLAGS) -O0 -pg --coverage" LDFLAGS="$(LDFLAGS) --coverage"
+	$(MAKE) @MAKE_SIMPLE_BUILD_TARGET@ CFLAGS="$(CFLAGS) -O0 -pg --coverage" LDFLAGS="$(LDFLAGS) --coverage"
 
 .PHONY: coverage-lcov
 coverage-lcov:
@@ -2610,23 +2700,9 @@ clean-retain-profile: pycremoval
 	-rm -f Python/frozen_modules/MANIFEST
 	-find build -type f -a ! -name '*.gc??' -exec rm -f {} ';'
 	-rm -f Include/pydtrace_probes.h
-	-rm -f profile-gen-stamp
-
-.PHONY: profile-removal
-profile-removal:
-	find . -name '*.gc??' -exec rm -f {} ';'
-	find . -name '*.profclang?' -exec rm -f {} ';'
-	find . -name '*.dyn' -exec rm -f {} ';'
-	rm -f $(COVERAGE_INFO)
-	rm -rf $(COVERAGE_REPORT)
-	rm -f profile-run-stamp
 
 .PHONY: clean
 clean: clean-retain-profile
-	@if test @DEF_MAKE_ALL_RULE@ = profile-opt; then \
-		rm -f profile-gen-stamp profile-clean-stamp; \
-		$(MAKE) profile-removal; \
-	fi
 
 .PHONY: clobber
 clobber: clean
diff --git a/configure b/configure
index 7aad4fe89e3cbf..d15dbe64bff0cb 100755
--- a/configure
+++ b/configure
@@ -887,7 +887,8 @@ MERGE_FDATA
 LLVM_BOLT
 ac_ct_READELF
 READELF
-PREBOLT_RULE
+MAKE_BOLT_MAKE_DEPENDENCY
+MAKE_BOLT_NATIVE_DEPENDENCY
 LLVM_PROF_FOUND
 LLVM_PROFDATA
 LLVM_PROF_ERR
@@ -898,8 +899,9 @@ PGO_PROF_GEN_FLAG
 LLVM_AR_FOUND
 LLVM_AR
 PROFILE_TASK
-DEF_MAKE_RULE
-DEF_MAKE_ALL_RULE
+MAKE_TARGET_BUILD_OPTIMIZED
+MAKE_TARGET_BUILD_PLAIN
+MAKE_TARGET_ALL
 ABIFLAGS
 LN
 MKDIR_P
@@ -7448,7 +7450,17 @@ $as_echo "no" >&6; }
 fi
 
 # Enable optimization flags
+# Which target `all` (the default make target) depends on.
 
+# Which target to evaluate for non-optimized builds.
+
+
+
+# Do a non-optimized generic build by default. Build configurations below
+# can override as appropriate.
+MAKE_TARGET_ALL="build-plain"
+MAKE_TARGET_BUILD_PLAIN="build_all"
+MAKE_TARGET_BUILD_OPTIMIZED="build-optimized-not-enabled"
 
 Py_OPT='false'
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for --enable-optimizations" >&5
@@ -7473,13 +7485,15 @@ fi
 
 
 if test "$Py_OPT" = 'true' ; then
+  # PGO is implied by optimizations mode.
+  PGO_ENABLED=1
+  MAKE_TARGET_ALL="build-optimized"
+  MAKE_TARGET_BUILD_OPTIMIZED="profile-pgo-apply-stamp"
+
   # Intentionally not forcing Py_LTO='true' here.  Too many toolchains do not
   # compile working code using it and both test_distutils and test_gdb are
   # broken when you do manage to get a toolchain that works with it.  People
   # who want LTO need to use --with-lto themselves.
-  DEF_MAKE_ALL_RULE="profile-opt"
-  REQUIRE_PGO="yes"
-  DEF_MAKE_RULE="build_all"
   case $CC in
     *gcc*)
       { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-semantic-interposition" >&5
@@ -7523,13 +7537,7 @@ fi
       ;;
   esac
 elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then
-      DEF_MAKE_ALL_RULE="build_wasm"
-  REQUIRE_PGO="no"
-  DEF_MAKE_RULE="all"
-else
-  DEF_MAKE_ALL_RULE="build_all"
-  REQUIRE_PGO="no"
-  DEF_MAKE_RULE="all"
+      MAKE_TARGET_BUILD_PLAIN="build_wasm"
 fi
 
 
@@ -8039,7 +8047,8 @@ case $CC in
     if test $LLVM_PROF_FOUND = not-found
     then
       LLVM_PROF_ERR=yes
-      if test "${REQUIRE_PGO}" = "yes"
+
+      if test -n "${ENABLE_PGO}"
       then
         as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5
       fi
@@ -8055,10 +8064,10 @@ case $CC in
         if test "${LLVM_PROF_FOUND}" = "not-found"
         then
           LLVM_PROF_ERR=yes
-          if test "${REQUIRE_PGO}" = "yes"
-	  then
-	    as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5
-	  fi
+          if test -n "${ENABLE_PGO}"
+          then
+            as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5
+          fi
         fi
         ;;
       *)
@@ -8101,10 +8110,19 @@ fi
 
 
 
+
+
 if test "$Py_BOLT" = 'true' ; then
-  PREBOLT_RULE="${DEF_MAKE_ALL_RULE}"
-  DEF_MAKE_ALL_RULE="bolt-opt"
-  DEF_MAKE_RULE="build_all"
+  MAKE_TARGET_BUILD_OPTIMIZED="profile-bolt-apply-stamp"
+
+  # Hook up make dependencies differently depending on whether PGO is
+  # enabled. See inline comment in Makefile.pre.in for how this works.
+  if test -n "${PGO_ENABLED}"
+  then
+    MAKE_BOLT_NATIVE_DEPENDENCY="profile-pgo-apply-stamp"
+  else
+    MAKE_BOLT_MAKE_DEPENDENCY="${MAKE_TARGET_BUILD_PLAIN}"
+  fi
 
 
   if test -n "$ac_tool_prefix"; then
diff --git a/configure.ac b/configure.ac
index 115998e0753b26..59b1734a746b5a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1722,8 +1722,18 @@ else
 fi
 
 # Enable optimization flags
-AC_SUBST(DEF_MAKE_ALL_RULE)
-AC_SUBST(DEF_MAKE_RULE)
+# Which target `all` (the default make target) depends on.
+AC_SUBST(MAKE_TARGET_ALL)
+# Which target to evaluate for non-optimized builds.
+AC_SUBST(MAKE_TARGET_BUILD_PLAIN)
+AC_SUBST(MAKE_TARGET_BUILD_OPTIMIZED)
+
+# Do a non-optimized generic build by default. Build configurations below
+# can override as appropriate.
+MAKE_TARGET_ALL="build-plain"
+MAKE_TARGET_BUILD_PLAIN="build_all"
+MAKE_TARGET_BUILD_OPTIMIZED="build-optimized-not-enabled"
+
 Py_OPT='false'
 AC_MSG_CHECKING(for --enable-optimizations)
 AC_ARG_ENABLE(optimizations, AS_HELP_STRING(
@@ -1741,13 +1751,15 @@ fi],
 [AC_MSG_RESULT(no)])
 
 if test "$Py_OPT" = 'true' ; then
+  # PGO is implied by optimizations mode.
+  PGO_ENABLED=1
+  MAKE_TARGET_ALL="build-optimized"
+  MAKE_TARGET_BUILD_OPTIMIZED="profile-pgo-apply-stamp"
+
   # Intentionally not forcing Py_LTO='true' here.  Too many toolchains do not
   # compile working code using it and both test_distutils and test_gdb are
   # broken when you do manage to get a toolchain that works with it.  People
   # who want LTO need to use --with-lto themselves.
-  DEF_MAKE_ALL_RULE="profile-opt"
-  REQUIRE_PGO="yes"
-  DEF_MAKE_RULE="build_all"
   case $CC in
     *gcc*)
       AX_CHECK_COMPILE_FLAG([-fno-semantic-interposition],[
@@ -1759,13 +1771,7 @@ if test "$Py_OPT" = 'true' ; then
 elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then
   dnl Emscripten does not support shared extensions yet. Build
   dnl "python.[js,wasm]", "pybuilddir.txt", and "platform" files.
-  DEF_MAKE_ALL_RULE="build_wasm"
-  REQUIRE_PGO="no"
-  DEF_MAKE_RULE="all"
-else
-  DEF_MAKE_ALL_RULE="build_all"
-  REQUIRE_PGO="no"
-  DEF_MAKE_RULE="all"
+  MAKE_TARGET_BUILD_PLAIN="build_wasm"
 fi
 
 AC_ARG_VAR(PROFILE_TASK, Python args for PGO generation task)
@@ -1967,7 +1973,8 @@ case $CC in
     if test $LLVM_PROF_FOUND = not-found
     then
       LLVM_PROF_ERR=yes
-      if test "${REQUIRE_PGO}" = "yes"
+
+      if test -n "${ENABLE_PGO}"
       then
         AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.])
       fi
@@ -1983,10 +1990,10 @@ case $CC in
         if test "${LLVM_PROF_FOUND}" = "not-found"
         then
           LLVM_PROF_ERR=yes
-          if test "${REQUIRE_PGO}" = "yes"
-	  then
-	    AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.])
-	  fi
+          if test -n "${ENABLE_PGO}"
+          then
+            AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.])
+          fi
         fi
         ;;
       *)
@@ -2022,11 +2029,20 @@ else
 fi],
 [AC_MSG_RESULT(no)])
 
-AC_SUBST(PREBOLT_RULE)
+AC_SUBST(MAKE_BOLT_NATIVE_DEPENDENCY)
+AC_SUBST(MAKE_BOLT_MAKE_DEPENDENCY)
+
 if test "$Py_BOLT" = 'true' ; then
-  PREBOLT_RULE="${DEF_MAKE_ALL_RULE}"
-  DEF_MAKE_ALL_RULE="bolt-opt"
-  DEF_MAKE_RULE="build_all"
+  MAKE_TARGET_BUILD_OPTIMIZED="profile-bolt-apply-stamp"
+
+  # Hook up make dependencies differently depending on whether PGO is
+  # enabled. See inline comment in Makefile.pre.in for how this works.
+  if test -n "${PGO_ENABLED}"
+  then
+    MAKE_BOLT_NATIVE_DEPENDENCY="profile-pgo-apply-stamp"
+  else
+    MAKE_BOLT_MAKE_DEPENDENCY="${MAKE_TARGET_BUILD_PLAIN}"
+  fi
 
   AC_SUBST(READELF)
   AC_CHECK_TOOLS(READELF, [readelf], "notfound")

From c8759fd135f0bd9eb7d294f917940d8c15c1826f Mon Sep 17 00:00:00 2001
From: Gregory Szorc <gregory.szorc@gmail.com>
Date: Mon, 16 Jan 2023 14:20:07 -0800
Subject: [PATCH 3/6] gh-xxxx: move BOLT arguments to configure

This allows easily customizing the arguments to `llvm-bolt` without
having to edit the Makefile.

Arguments can be passed to configure and are reflected in configure
output, which can be useful for log analysis.

When defined in the Makefile we use `?=` syntax so the flags can be
overridden via `make VAR=VALUE` syntax or via environment variables.
Super useful for iterating on different BOLT flags.
---
 Makefile.pre.in |  7 +++++--
 configure       | 28 ++++++++++++++++++++++++++++
 configure.ac    | 16 ++++++++++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 9d2aaaae7fcdf6..5ab1a5d8ca66e4 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -724,6 +724,9 @@ profile-pgo-apply-stamp: profile-pgo-analyze-stamp
 # List of binaries that BOLT runs on.
 BOLT_BINARIES = $(BUILDPYTHON)
 
+BOLT_INSTRUMENT_FLAGS ?= @BOLT_INSTRUMENT_FLAGS@
+BOLT_APPLY_FLAGS ?= @BOLT_APPLY_FLAGS@
+
 # Remove traces of bolt.
 .PHONY: clean-bolt
 clean-bolt:
@@ -757,7 +760,7 @@ profile-bolt-instrument-stamp: profile-bolt-prebuild-stamp
 	done
 	# Ensure prior BOLT state is purged.
 	$(MAKE) clean-bolt
-	@LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst
+	@LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst $(BOLT_INSTRUMENT_FLAGS)
 	touch $@
 
 profile-bolt-run-stamp: profile-bolt-instrument-stamp
@@ -769,7 +772,7 @@ profile-bolt-analyze-stamp: profile-bolt-run-stamp
 	touch $@
 
 profile-bolt-apply-stamp: profile-bolt-analyze-stamp
-	@LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot
+	@LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata $(BOLT_APPLY_FLAGS)
 	mv $(BUILDPYTHON) $(BUILDPYTHON).prebolt
 	mv $(BUILDPYTHON).bolt $(BUILDPYTHON)
 	touch $@
diff --git a/configure b/configure
index d15dbe64bff0cb..7a5a2af97096f7 100755
--- a/configure
+++ b/configure
@@ -883,6 +883,8 @@ CFLAGS_NODIST
 BASECFLAGS
 CFLAGS_ALIASING
 OPT
+BOLT_APPLY_FLAGS
+BOLT_INSTRUMENT_FLAGS
 MERGE_FDATA
 LLVM_BOLT
 ac_ct_READELF
@@ -1107,6 +1109,8 @@ CPPFLAGS
 CPP
 HOSTRUNNER
 PROFILE_TASK
+BOLT_INSTRUMENT_FLAGS
+BOLT_APPLY_FLAGS
 LIBUUID_CFLAGS
 LIBUUID_LIBS
 LIBFFI_CFLAGS
@@ -1918,6 +1922,10 @@ Some influential environment variables:
   HOSTRUNNER  Program to run CPython for the host platform
   PROFILE_TASK
               Python args for PGO generation task
+  BOLT_INSTRUMENT_FLAGS
+              Arguments to llvm-bolt when instrumenting binaries
+  BOLT_APPLY_FLAGS
+              Arguments to llvm-bolt when creating a BOLT optimized binary
   LIBUUID_CFLAGS
               C compiler flags for LIBUUID, overriding pkg-config
   LIBUUID_LIBS
@@ -8492,6 +8500,26 @@ $as_echo "\"Found merge-fdata\"" >&6; }
   fi
 fi
 
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_INSTRUMENT_FLAGS" >&5
+$as_echo_n "checking BOLT_INSTRUMENT_FLAGS... " >&6; }
+if test -z "${BOLT_INSTRUMENT_FLAGS}"
+then
+    BOLT_INSTRUMENT_FLAGS=
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_INSTRUMENT_FLAGS" >&5
+$as_echo "$BOLT_INSTRUMENT_FLAGS" >&6; }
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_APPLY_FLAGS" >&5
+$as_echo_n "checking BOLT_APPLY_FLAGS... " >&6; }
+if test -z "${BOLT_APPLY_FLAGS}"
+then
+    BOLT_APPLY_FLAGS="-update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_APPLY_FLAGS" >&5
+$as_echo "$BOLT_APPLY_FLAGS" >&6; }
+
 # XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be
 # merged with this chunk of code?
 
diff --git a/configure.ac b/configure.ac
index 59b1734a746b5a..afcd40532e0472 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2083,6 +2083,22 @@ if test "$Py_BOLT" = 'true' ; then
   fi
 fi
 
+AC_ARG_VAR(BOLT_INSTRUMENT_FLAGS, Arguments to llvm-bolt when instrumenting binaries)
+AC_MSG_CHECKING(BOLT_INSTRUMENT_FLAGS)
+if test -z "${BOLT_INSTRUMENT_FLAGS}"
+then
+    BOLT_INSTRUMENT_FLAGS=
+fi
+AC_MSG_RESULT($BOLT_INSTRUMENT_FLAGS)
+
+AC_ARG_VAR(BOLT_APPLY_FLAGS, Arguments to llvm-bolt when creating a BOLT optimized binary)
+AC_MSG_CHECKING(BOLT_APPLY_FLAGS)
+if test -z "${BOLT_APPLY_FLAGS}"
+then
+    BOLT_APPLY_FLAGS="-update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot"
+fi
+AC_MSG_RESULT($BOLT_APPLY_FLAGS)
+
 # XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be
 # merged with this chunk of code?
 

From 02418fde929452e3fdddc76bf6d5aa620c0ad04b Mon Sep 17 00:00:00 2001
From: Gregory Szorc <gregory.szorc@gmail.com>
Date: Mon, 16 Jan 2023 14:47:08 -0800
Subject: [PATCH 4/6] gh-xxxx: run BOLT instrumented binary with `$(RUNSHARED)`

This ensures `LD_LIBRARY_PATH` is set. Without this, I was able to
tickle a libpythonX.Y.so not found error.
---
 Makefile.pre.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 5ab1a5d8ca66e4..cdc65df71101e6 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -764,7 +764,7 @@ profile-bolt-instrument-stamp: profile-bolt-prebuild-stamp
 	touch $@
 
 profile-bolt-run-stamp: profile-bolt-instrument-stamp
-	./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true
+	$(RUNSHARED) ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true
 	touch $@
 
 profile-bolt-analyze-stamp: profile-bolt-run-stamp

From a212332e85c4e6454460301dd3d84108dfd9f7d0 Mon Sep 17 00:00:00 2001
From: Gregory Szorc <gregory.szorc@gmail.com>
Date: Mon, 16 Jan 2023 15:14:28 -0800
Subject: [PATCH 5/6] gh-xxxx: enable BOLT optimization of libpython

Before, we only supported running BOLT on the main `python` binary.
If a shared library was in play, it wouldn't be optimized. That was
leaving a ton of optimization opportunities on the floor.

This commit adds support for running BOLT on libpython.

Functionality is disabled by default because BOLT asserts on LLVM 15,
which is the latest LLVM. I've built LLVM tip and it is able to
process libpython just fine. So it is known to work.
---
 Makefile.pre.in | 30 ++++++++++++++++++------------
 configure       | 27 +++++++++++++++++++++++++++
 configure.ac    | 17 +++++++++++++++++
 3 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/Makefile.pre.in b/Makefile.pre.in
index cdc65df71101e6..3d2386a62e0092 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -722,7 +722,7 @@ profile-pgo-apply-stamp: profile-pgo-analyze-stamp
 # passes.
 
 # List of binaries that BOLT runs on.
-BOLT_BINARIES = $(BUILDPYTHON)
+BOLT_BINARIES = @BOLT_BINARIES@
 
 BOLT_INSTRUMENT_FLAGS ?= @BOLT_INSTRUMENT_FLAGS@
 BOLT_APPLY_FLAGS ?= @BOLT_APPLY_FLAGS@
@@ -734,8 +734,6 @@ clean-bolt:
 	find . -name '*.bolt_inst' -exec rm -f {} ';'
 	# The data files they produce.
 	find . -name '*.fdata' -exec rm -f {} ';'
-	# Copied of binaries before BOLT application.
-	find . -name '*.prebolt' -exec rm -f {} ';'
 
 # BOLTs dependencies are a bit wonky.
 #
@@ -753,28 +751,36 @@ profile-bolt-prebuild-stamp: @MAKE_BOLT_NATIVE_DEPENDENCY@
 
 profile-bolt-instrument-stamp: profile-bolt-prebuild-stamp
 	for bin in $(BOLT_BINARIES); do \
-	    if [ -e "$${bin}.prebolt" ]; then \
-	        echo "Restoring pre-BOLT binary $${bin}.prebolt"; \
+	    prebolt="$${bin}.prebolt"; \
+	    if [ -e "$${prebolt}" ]; then \
+	        echo "Restoring pre-BOLT binary $${prebolt}"; \
 	        mv "$${bin}.prebolt" "$${bin}"; \
-	    fi \
+	    fi; \
+	    cp "$${bin}" "$${prebolt}"; \
 	done
 	# Ensure prior BOLT state is purged.
 	$(MAKE) clean-bolt
-	@LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst $(BOLT_INSTRUMENT_FLAGS)
+	for bin in $(BOLT_BINARIES); do \
+	    @LLVM_BOLT@ $${bin} -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $${bin}.bolt) -o $${bin}.bolt_inst $(BOLT_INSTRUMENT_FLAGS); \
+	    mv "$${bin}.bolt_inst" "$${bin}"; \
+    done
 	touch $@
 
 profile-bolt-run-stamp: profile-bolt-instrument-stamp
-	$(RUNSHARED) ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true
+	$(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
 	touch $@
 
 profile-bolt-analyze-stamp: profile-bolt-run-stamp
-	@MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata
+	for bin in $(BOLT_BINARIES); do \
+	    @MERGE_FDATA@ $${bin}.*.fdata > $${bin}.fdata; \
+	done
 	touch $@
 
 profile-bolt-apply-stamp: profile-bolt-analyze-stamp
-	@LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata $(BOLT_APPLY_FLAGS)
-	mv $(BUILDPYTHON) $(BUILDPYTHON).prebolt
-	mv $(BUILDPYTHON).bolt $(BUILDPYTHON)
+	for bin in $(BOLT_BINARIES); do \
+	    @LLVM_BOLT@ "$${bin}.prebolt" -o "$${bin}.bolt" -data="$${bin}.fdata" $(BOLT_APPLY_FLAGS); \
+	    mv "$${bin}.bolt" "$${bin}"; \
+	done
 	touch $@
 
 # End of profile-based optimization rules.
diff --git a/configure b/configure
index 7a5a2af97096f7..7e961b092134ae 100755
--- a/configure
+++ b/configure
@@ -885,6 +885,7 @@ CFLAGS_ALIASING
 OPT
 BOLT_APPLY_FLAGS
 BOLT_INSTRUMENT_FLAGS
+BOLT_BINARIES
 MERGE_FDATA
 LLVM_BOLT
 ac_ct_READELF
@@ -1060,6 +1061,7 @@ with_assertions
 enable_optimizations
 with_lto
 enable_bolt
+with_bolt_libpython
 with_strict_overflow
 with_dsymutil
 with_address_sanitizer
@@ -1834,6 +1836,8 @@ Optional Packages:
   --with-lto=[full|thin|no|yes]
                           enable Link-Time-Optimization in any build (default
                           is no)
+  --with-bolt-libpython   enable BOLT optimization of libpython (WARNING:
+                          known to crash BOLT)
   --with-strict-overflow  if 'yes', add -fstrict-overflow to CFLAGS, else add
                           -fno-strict-overflow (default is no)
   --with-dsymutil         link debug information into final executable with
@@ -8500,6 +8504,29 @@ $as_echo "\"Found merge-fdata\"" >&6; }
   fi
 fi
 
+# Enable BOLT optimizations of libpython. Optional for now due to known
+# crashes on LLVM 15. Seems to be fixed in LLVM 16.
+
+BOLT_BINARIES='$(BUILDPYTHON)'
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-bolt-libpython" >&5
+$as_echo_n "checking for --with-bolt-libpython... " >&6; }
+
+# Check whether --with-bolt_libpython was given.
+if test "${with_bolt_libpython+set}" = set; then :
+  withval=$with_bolt_libpython; with_bolt_libpython="yes"
+else
+  with_bolt_libpython="no"
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_bolt_libpython" >&5
+$as_echo "$with_bolt_libpython" >&6; }
+
+if test "${enable_shared}" = "yes" -a "${with_bolt_libpython}" = "yes"
+then
+  BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)"
+fi
+
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_INSTRUMENT_FLAGS" >&5
 $as_echo_n "checking BOLT_INSTRUMENT_FLAGS... " >&6; }
diff --git a/configure.ac b/configure.ac
index afcd40532e0472..11e3814f451dce 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2083,6 +2083,23 @@ if test "$Py_BOLT" = 'true' ; then
   fi
 fi
 
+# Enable BOLT optimizations of libpython. Optional for now due to known
+# crashes on LLVM 15. Seems to be fixed in LLVM 16.
+AC_SUBST(BOLT_BINARIES)
+BOLT_BINARIES='$(BUILDPYTHON)'
+
+AC_MSG_CHECKING(for --with-bolt-libpython)
+AC_ARG_WITH(bolt_libpython,
+            AS_HELP_STRING([--with-bolt-libpython], [enable BOLT optimization of libpython (WARNING: known to crash BOLT)]),
+            [with_bolt_libpython="yes"],
+            [with_bolt_libpython="no"])
+AC_MSG_RESULT($with_bolt_libpython)
+
+if test "${enable_shared}" = "yes" -a "${with_bolt_libpython}" = "yes"
+then
+  BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)"
+fi
+
 AC_ARG_VAR(BOLT_INSTRUMENT_FLAGS, Arguments to llvm-bolt when instrumenting binaries)
 AC_MSG_CHECKING(BOLT_INSTRUMENT_FLAGS)
 if test -z "${BOLT_INSTRUMENT_FLAGS}"

From 9e50ee0e681881d9505033de82f950b53d4a5d59 Mon Sep 17 00:00:00 2001
From: Gregory Szorc <gregory.szorc@gmail.com>
Date: Mon, 16 Jan 2023 19:46:06 -0800
Subject: [PATCH 6/6] gh-xxxx: expose _PYTHON_HOST_PLATFORM as a settable
 variable

The generated sysconfig data during builds encodes a PEP-425
platform tag. During native (target=host) builds, the bootstrapped
compiler runs Python code in sysconfig to derive an appropriate value.

For cross compiles, we fall back to logic in configure (that code
lives around the changed lines) to derive an appropriate platform
tag, which is exported as an environment variable during builds.
And there is a "backdoor" in `sysconfig.py` that causes
`sysconfig.get_platform()` to return that value.

The logic in configure for deriving an appropriate platform tag
is a far cry from what's in `sysconfig.py`. Ideally that logic
would be fully (re)implemented in configure. But that's a
non-trivial amount of work.

Recognizing that configure makes inadequate platform tag decisions
during cross-compiles, this commit switches `_PYTHON_HOST_PLATFORM`
from a regular output variable to a "precious variable" (in autoconf
speak). This has the side-effect of allowing invokers to define the
variable, effectively allowing them to explicitly set the platform
tag during builds to a correct value when configure otherwise wouldn't
set one.
---
 configure    | 69 ++++++++++++++++++++++++++++++++--------------------
 configure.ac | 66 +++++++++++++++++++++++++++++--------------------
 2 files changed, 82 insertions(+), 53 deletions(-)

diff --git a/configure b/configure
index 7e961b092134ae..b2cd8c24a4f0c4 100755
--- a/configure
+++ b/configure
@@ -1103,6 +1103,7 @@ PKG_CONFIG
 PKG_CONFIG_PATH
 PKG_CONFIG_LIBDIR
 MACHDEP
+_PYTHON_HOST_PLATFORM
 CC
 CFLAGS
 LDFLAGS
@@ -1915,6 +1916,14 @@ Some influential environment variables:
   PKG_CONFIG_LIBDIR
               path overriding pkg-config's built-in search path
   MACHDEP     name for machine-dependent library files
+  _PYTHON_HOST_PLATFORM
+              Forces a platform tag value for use in sysconfig data. This will
+              be calculated automatically in non-cross builds by running
+              sysconfig code in the bootstrapped interpreter. In cross builds,
+              an attempt will be made to derive an appropriate value in
+              configure. But some targets may derive incorrect values. This
+              variable can be set to force a value. Example values:
+              linux-x86_64, macosx-10.9-universal2, win-amd64
   CC          C compiler command
   CFLAGS      C compiler flags
   LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
@@ -3928,34 +3937,42 @@ fi
 $as_echo "\"$MACHDEP\"" >&6; }
 
 
-if test "$cross_compiling" = yes; then
-	case "$host" in
-	*-*-linux*)
-		case "$host_cpu" in
-		arm*)
-			_host_cpu=arm
-			;;
-		*)
-			_host_cpu=$host_cpu
-		esac
-		;;
-	*-*-cygwin*)
-		_host_cpu=
-		;;
-	*-*-vxworks*)
-		_host_cpu=$host_cpu
-		;;
-	wasm32-*-* | wasm64-*-*)
-		_host_cpu=$host_cpu
-		;;
-	*)
-		# for now, limit cross builds to known configurations
-		MACHDEP="unknown"
-		as_fn_error $? "cross build not supported for $host" "$LINENO" 5
-	esac
-	_PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking _PYTHON_HOST_PLATFORM" >&5
+$as_echo_n "checking _PYTHON_HOST_PLATFORM... " >&6; }
+
+if test -z "${_PYTHON_HOST_PLATFORM}"; then
+    if test "$cross_compiling" = yes; then
+        case "$host" in
+        *-*-linux*)
+            case "$host_cpu" in
+            arm*)
+                _host_cpu=arm
+                ;;
+            *)
+                _host_cpu=$host_cpu
+            esac
+            ;;
+        *-*-cygwin*)
+            _host_cpu=
+            ;;
+        *-*-vxworks*)
+            _host_cpu=$host_cpu
+            ;;
+        wasm32-*-* | wasm64-*-*)
+            _host_cpu=$host_cpu
+            ;;
+        *)
+            # for now, limit cross builds to known configurations
+            MACHDEP="unknown"
+            as_fn_error $? "cross build not supported for $host" "$LINENO" 5
+        esac
+        _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}"
+    fi
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_PYTHON_HOST_PLATFORM" >&5
+$as_echo "$_PYTHON_HOST_PLATFORM" >&6; }
+
 # Some systems cannot stand _XOPEN_SOURCE being defined at all; they
 # disable features if it is defined, without any means to access these
 # features as extensions. For these systems, we skip the definition of
diff --git a/configure.ac b/configure.ac
index 11e3814f451dce..e9f927c6de4ea4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -584,35 +584,47 @@ then
 fi
 AC_MSG_RESULT("$MACHDEP")
 
-AC_SUBST(_PYTHON_HOST_PLATFORM)
-if test "$cross_compiling" = yes; then
-	case "$host" in
-	*-*-linux*)
-		case "$host_cpu" in
-		arm*)
-			_host_cpu=arm
-			;;
-		*)
-			_host_cpu=$host_cpu
-		esac
-		;;
-	*-*-cygwin*)
-		_host_cpu=
-		;;
-	*-*-vxworks*)
-		_host_cpu=$host_cpu
-		;;
-	wasm32-*-* | wasm64-*-*)
-		_host_cpu=$host_cpu
-		;;
-	*)
-		# for now, limit cross builds to known configurations
-		MACHDEP="unknown"
-		AC_MSG_ERROR([cross build not supported for $host])
-	esac
-	_PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}"
+AC_ARG_VAR(_PYTHON_HOST_PLATFORM, [
+  Forces a platform tag value for use in sysconfig data. This will be calculated
+  automatically in non-cross builds by running sysconfig code in the
+  bootstrapped interpreter. In cross builds, an attempt will be made to
+  derive an appropriate value in configure. But some targets may derive
+  incorrect values. This variable can be set to force a value. Example
+  values: linux-x86_64, macosx-10.9-universal2, win-amd64])
+AC_MSG_CHECKING(_PYTHON_HOST_PLATFORM)
+
+if test -z "${_PYTHON_HOST_PLATFORM}"; then
+    if test "$cross_compiling" = yes; then
+        case "$host" in
+        *-*-linux*)
+            case "$host_cpu" in
+            arm*)
+                _host_cpu=arm
+                ;;
+            *)
+                _host_cpu=$host_cpu
+            esac
+            ;;
+        *-*-cygwin*)
+            _host_cpu=
+            ;;
+        *-*-vxworks*)
+            _host_cpu=$host_cpu
+            ;;
+        wasm32-*-* | wasm64-*-*)
+            _host_cpu=$host_cpu
+            ;;
+        *)
+            # for now, limit cross builds to known configurations
+            MACHDEP="unknown"
+            AC_MSG_ERROR([cross build not supported for $host])
+        esac
+        _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}"
+    fi
 fi
 
+AC_MSG_RESULT([$_PYTHON_HOST_PLATFORM])
+
 # Some systems cannot stand _XOPEN_SOURCE being defined at all; they
 # disable features if it is defined, without any means to access these
 # features as extensions. For these systems, we skip the definition of