Add benchmark tooling

mrueg · ivanvc · commit b53f11a55017 · 2024-05-31T16:14:40.000-07:00
This adds a way to compare benchmarks that we use in kube-state-metrics
already, hopefully allowing for better comparisons when applying
changes.

Co-authored-by: Manuel Rüger &lt;manuel@rueg.eu&gt;
Signed-off-by: Ivan Valdes &lt;ivan@vald.es&gt;
diff --git a/.github/workflows/benchmark-pr.yaml b/.github/workflows/benchmark-pr.yaml
@@ -0,0 +1,51 @@
+---
+name: Benchmarks on AMD64
+permissions: read-all
+on: [pull_request]
+jobs:
+  benchmark-pull-request:
+    runs-on: ubuntu-latest-8-cores
+    steps:
+    - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
+      with:
+        fetch-depth: 0
+    - id: goversion
+      run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+    - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
+      with:
+        go-version: ${{ steps.goversion.outputs.goversion }}
+    - name: Run Benchmarks
+      run: |
+        BENCHSTAT_OUTPUT_FILE=result.txt make test-benchmark-compare REF=${{ github.event.pull_request.head.sha }}
+    - run: |
+        echo "\`\`\`" >> "$GITHUB_STEP_SUMMARY"
+        cat output.txt >> "$GITHUB_STEP_SUMMARY"
+        echo "\`\`\`" >> "$GITHUB_STEP_SUMMARY"
+        cat <<EOL >> "$GITHUB_STEP_SUMMARY"
+        <hr />
+
+        This section contains three tables generated by benchstat:
+
+        1. Seconds per operation.
+        2. Bytes per operation.
+        3. Allocations per operation.
+
+        The tables show the median and 75% confidence interval (CI) summaries for each benchmark comparing the HEAD and the BASE of the Pull Request, and an A/B comparison under "vs base". The last column shows the statistical p-value with three runs (n=3).
+
+        The last row has the Geometric Mean (geomean) for the given rows in the table.
+
+        Refer to [benchstat's documentation](https://pkg.go.dev/golang.org/x/perf/cmd/benchstat) for more help.
+        EOL
+    - name: Validate results under acceptable limit
+      run: |
+        export MAX_ACCEPTABLE_DIFFERENCE=5
+        while IFS= read -r line; do
+          # Get fourth value, which is the comparison with the base.
+          value="$(echo "$line" | awk '{print $4}')"
+          if [[ "$value" = +* ]] || [[ "$value" = -* ]]; then
+            if (( $(echo "${value//[^0-9.]/}"'>'"$MAX_ACCEPTABLE_DIFFERENCE" | bc -l) )); then
+              echo "::error::$value is above the maximum acceptable difference ($MAX_ACCEPTABLE_DIFFERENCE)"
+              exit 1
+            fi
+          fi
+        done < <(grep geomean output.txt)
diff --git a/Makefile b/Makefile
@@ -94,3 +94,14 @@ test-failpoint:
 test-robustness: gofail-enable build
 	sudo env PATH=$$PATH go test -v ${TESTFLAGS} ./tests/dmflakey -test.root
 	sudo env PATH=$(PWD)/bin:$$PATH go test -v ${TESTFLAGS} ${ROBUSTNESS_TESTFLAGS} ./tests/robustness -test.root
+
+.PHONY: test-benchmark-compare
+# Runs benchmark tests on the current git ref and the given REF, and compares
+# the two.
+test-benchmark-compare: install-benchstat
+	@git fetch
+	./scripts/compare_benchmarks.sh $(REF)
+
+.PHONY: install-benchstat
+install-benchstat:
+	go install golang.org/x/perf/cmd/benchstat@latest
diff --git a/scripts/compare_benchmarks.sh b/scripts/compare_benchmarks.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# https://github.com/kubernetes/kube-state-metrics/blob/main/tests/compare_benchmarks.sh (originally written by mxinden)
+
+# exit immediately when a command fails
+set -e
+# only exit with zero if all commands of the pipeline exit successfully
+set -o pipefail
+# error on unset variables
+set -u
+
+[[ "$#" -eq 1 ]] || echo "One argument required, $# provided."
+
+REF_CURRENT="$(git rev-parse --abbrev-ref HEAD)"
+BASE_TO_COMPARE=$1
+
+RESULT_CURRENT="$(mktemp)-${REF_CURRENT}"
+RESULT_TO_COMPARE="$(mktemp)-${BASE_TO_COMPARE}"
+
+TIMEOUT=${TIMEOUT:-30m}
+BENCH_COUNT=${BENCH_COUNT:-3}
+BENCHSTAT_CONFIDENCE_LEVEL=${BENCHSTAT_CONFIDENCE_LEVEL:-0.75}
+BENCHSTAT_FORMAT=${BENCHSTAT_FORMAT:-"text"}
+
+if [[ "${BENCHSTAT_FORMAT}" == "csv" ]] && [[ -z "${BENCHSTAT_OUTPUT_FILE}" ]]; then
+  echo "BENCHSTAT_FORMAT is set to csv, but BENCHSTAT_OUTPUT_FILE is not set."
+  exit 1
+fi
+
+function main() {
+  echo ""
+  echo "### Testing ${REF_CURRENT}"
+
+  go test -timeout="${TIMEOUT}" -count="${BENCH_COUNT}" -benchmem -run=NONE -bench=. ./... | tee "${RESULT_CURRENT}"
+
+  # Filter benchark lines, so benchstat can parse the output.
+  grep ^Benchmark "${RESULT_CURRENT}" > "${RESULT_CURRENT}".tmp && mv "${RESULT_CURRENT}".tmp "${RESULT_CURRENT}"
+
+  echo ""
+  echo "### Done testing ${REF_CURRENT}"
+
+  echo ""
+  echo "### Testing ${BASE_TO_COMPARE}"
+
+  git checkout "${BASE_TO_COMPARE}"
+
+  go test -timeout="${TIMEOUT}" -count="${BENCH_COUNT}" -benchmem -run=NONE -bench=. ./... | tee "${RESULT_TO_COMPARE}"
+
+  # Filter benchark lines, so benchstat can parse the output.
+  grep ^Benchmark "${RESULT_TO_COMPARE}" > "${RESULT_TO_COMPARE}".tmp && mv "${RESULT_TO_COMPARE}".tmp "${RESULT_TO_COMPARE}"
+
+  echo ""
+  echo "### Done testing ${BASE_TO_COMPARE}"
+
+  git checkout -
+
+  echo ""
+  echo "### Result"
+  echo "base=${BASE_TO_COMPARE} head=${REF_CURRENT}"
+
+  if [[ "${BENCHSTAT_FORMAT}" == "csv" ]]; then
+    benchstat -format=csv -confidence="${BENCHSTAT_CONFIDENCE_LEVEL}" BASE="${RESULT_TO_COMPARE}" HEAD="${RESULT_CURRENT}" 2>/dev/null 1>"${BENCHSTAT_OUTPUT_FILE}"
+  else
+    if [[ -z "${BENCHSTAT_OUTPUT_FILE}" ]]; then
+      benchstat -confidence="${BENCHSTAT_CONFIDENCE_LEVEL}" BASE="${RESULT_TO_COMPARE}" HEAD="${RESULT_CURRENT}"
+    else
+      benchstat -confidence="${BENCHSTAT_CONFIDENCE_LEVEL}" BASE="${RESULT_TO_COMPARE}" HEAD="${RESULT_CURRENT}" 1>"${BENCHSTAT_OUTPUT_FILE}"
+    fi
+  fi
+}
+
+main