diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml index 5c1e8e425111b..6b748ec9c7ebb 100644 --- a/.github/workflows/sycl-docs.yml +++ b/.github/workflows/sycl-docs.yml @@ -49,7 +49,13 @@ jobs: mkdir clang mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* . mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/ + cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks touch .nojekyll + # Update benchmarking dashboard configuration + cat << 'EOF' > benchmarks/config.js + remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json'; + defaultCompareNames = ["Baseline_PVC_L0"]; + EOF # Upload the generated docs as an artifact and deploy to GitHub Pages. - name: Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 2f3c02bf334ed..3a93c2aae254c 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -114,6 +114,15 @@ on: default: '' required: False + benchmark_upload_results: + type: string + default: 'false' + required: False + benchmark_build_hash: + type: string + default: '' + required: False + workflow_dispatch: inputs: runner: @@ -126,6 +135,7 @@ on: - '["cts-cpu"]' - '["Linux", "build"]' - '["cuda"]' + - '["Linux", "bmg"]' - '["PVC_PERF"]' image: type: choice @@ -154,6 +164,7 @@ on: - e2e - cts - compute-benchmarks + - benchmark_v2 env: description: | @@ -337,3 +348,14 @@ jobs: env: RUNNER_TAG: ${{ inputs.runner }} GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} + + - name: Run benchmarks + if: inputs.tests_selector == 'benchmark_v2' + uses: ./devops/actions/run-tests/benchmark_v2 + with: + target_devices: ${{ inputs.target_devices }} + upload_results: ${{ inputs.benchmark_upload_results }} + build_hash: ${{ inputs.benchmark_build_hash }} + env: + RUNNER_TAG: ${{ inputs.runner }} + GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml new file mode 100644 index 0000000000000..8e860bce6a384 --- /dev/null +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -0,0 +1,129 @@ +name: Run Benchmarks + +on: + schedule: + - cron: '0 1 * * *' # 2 hrs earlier than sycl-nightly.yml + workflow_call: + inputs: + commit_hash: + type: string + required: false + default: '' + upload_results: + type: string # true/false: workflow_dispatch does not support booleans + required: true + runner: + type: string + required: true + backend: + type: string + required: true + reset_intel_gpu: + type: string # true/false: workflow_dispatch does not support booleans + required: true + default: true + + workflow_dispatch: + inputs: + commit_hash: + description: Commit hash to build intel/llvm from + type: string + required: false + default: '' + upload_results: + description: 'Save and upload results' + type: choice + options: + - false + - true + default: true + runner: + type: choice + options: + - '["PVC_PERF"]' + backend: + description: Backend to use + type: choice + options: + - 'level_zero:gpu' + # TODO L0 V2 support + reset_intel_gpu: + description: Reset Intel GPUs + type: choice + options: + - false + - true + default: true + +permissions: read-all + +jobs: + build_sycl: + name: Build SYCL from PR + if: inputs.commit_hash != '' + uses: ./.github/workflows/sycl-linux-build.yml + with: + build_ref: ${{ inputs.commit_hash }} + build_cache_root: "/__w/" + build_artifact_suffix: "default" + build_cache_suffix: "default" + # Docker image has last nightly pre-installed and added to the PATH + build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest" + cc: clang + cxx: clang++ + changes: '[]' + + run_benchmarks_build: + name: Run Benchmarks (on PR Build) + needs: [ build_sycl ] + if: inputs.commit_hash != '' + strategy: + matrix: + # Set default values if not specified: + include: + - runner: ${{ inputs.runner || '["PVC_PERF"]' }} + backend: ${{ inputs.backend || 'level_zero:gpu' }} + reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }} + ref: ${{ inputs.commit_hash }} + uses: ./.github/workflows/sycl-linux-run-tests.yml + secrets: inherit + with: + # TODO support other benchmarks + name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) + runner: ${{ matrix.runner }} + image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: ${{ matrix.backend }} + reset_intel_gpu: ${{ matrix.reset_intel_gpu }} + tests_selector: benchmark_v2 + benchmark_upload_results: ${{ inputs.upload_results }} + benchmark_build_hash: ${{ inputs.commit_hash }} + repo_ref: ${{ matrix.ref }} + devops_ref: ${{ github.ref }} + sycl_toolchain_artifact: sycl_linux_default + sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }} + sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }} + + run_benchmarks_nightly: + name: Run Benchmarks (on Nightly Build) + if: inputs.commit_hash == '' + strategy: + matrix: + # Set default values if not specified: + include: + - runner: ${{ inputs.runner || '["PVC_PERF"]' }} + backend: ${{ inputs.backend || 'level_zero:gpu' }} + reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }} + uses: ./.github/workflows/sycl-linux-run-tests.yml + secrets: inherit + with: + # TODO support other benchmarks + name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) + runner: ${{ matrix.runner }} + image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: ${{ matrix.backend }} + reset_intel_gpu: ${{ matrix.reset_intel_gpu }} + tests_selector: benchmark_v2 + benchmark_upload_results: ${{ inputs.upload_results }} + repo_ref: ${{ github.ref }} diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml index 66ffcecd70314..d7c32edfdfc2a 100644 --- a/.github/workflows/ur-benchmarks-reusable.yml +++ b/.github/workflows/ur-benchmarks-reusable.yml @@ -1,12 +1,220 @@ name: Benchmarks Reusable -# This workflow is a WIP: This workflow file acts as a placeholder. +on: + workflow_call: + inputs: + str_name: + required: true + type: string + pr_no: + required: true + # even though this is a number, this is a workaround for issues with + # reusable workflow calls that result in "Unexpected value '0'" error. + type: string + bench_script_params: + required: false + type: string + default: '' + sycl_config_params: + required: false + type: string + default: '' + upload_report: + required: false + type: boolean + default: false + compute_runtime_commit: + required: false + type: string + default: '' -on: [ workflow_call ] +permissions: + contents: read + pull-requests: write jobs: - do-nothing: - runs-on: ubuntu-latest + bench-run: + name: Build SYCL, Run Benchmarks + strategy: + matrix: + adapter: [ + {str_name: "${{ inputs.str_name }}", + sycl_config: "${{ inputs.sycl_config_params }}" + } + ] + build_type: [Release] + compiler: [{c: clang, cxx: clang++}] + + runs-on: "PVC_PERF" + steps: - - run: echo 'This workflow is a WIP.' - + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + const pr_no = '${{ inputs.pr_no }}'; + const adapter = '${{ matrix.adapter.str_name }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Compute Benchmarks ${adapter} run (with params: ${params}):\n${url}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Checkout SYCL + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + path: sycl-repo + + # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + - name: Fetch PR's merge commit + if: ${{ inputs.pr_no != 0 }} + working-directory: ${{github.workspace}}/sycl-repo + run: | + git fetch -- https://github.com/${{github.repository}} +refs/pull/${{ inputs.pr_no }}/*:refs/remotes/origin/pr/${{ inputs.pr_no }}/* + git checkout origin/pr/${{ inputs.pr_no }}/merge + git rev-parse origin/pr/${{ inputs.pr_no }}/merge + + # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location + - name: Establish bench scripts location + run: | + if [ -d "${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" ]; then + echo "Bench scripts are in devops/scripts" + echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" >> $GITHUB_ENV + elif [ -d "${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" ]; then + echo "Bench scripts are in unified-runtime/scripts" + echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" >> $GITHUB_ENV + else + echo "Bench scripts are absent...?" + exit 1 + fi + + - name: Create virtual environment + run: python -m venv .venv + + - name: Activate virtual environment and install pip packages + run: | + source .venv/bin/activate + pip install -r ${BENCH_SCRIPTS_DIR}/requirements.txt + + - name: Configure SYCL + run: > + python3 sycl-repo/buildbot/configure.py + -t ${{matrix.build_type}} + -o ${{github.workspace}}/sycl_build + --cmake-gen "Ninja" + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" + --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache + --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache + ${{matrix.adapter.sycl_config}} + + - name: Build SYCL + run: cmake --build ${{github.workspace}}/sycl_build -j $(nproc) + + # We need a complete installed UR for compute-benchmarks. + - name: Configure UR + run: > + cmake -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -S${{github.workspace}}/sycl-repo/unified-runtime + -B${{github.workspace}}/ur_build + -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/ur_install + -DUR_BUILD_TESTS=OFF + -DUR_BUILD_ADAPTER_L0=ON + -DUR_BUILD_ADAPTER_L0_V2=ON + -DUMF_DISABLE_HWLOC=ON + + - name: Build UR + run: cmake --build ${{github.workspace}}/ur_build -j $(nproc) + + - name: Install UR + run: cmake --install ${{github.workspace}}/ur_build + + - name: Compute core range + run: | + # Compute the core range for the first NUMA node; second node is for UMF jobs. + # Skip the first 4 cores - the kernel is likely to schedule more work on these. + CORES="$(lscpu | awk ' + /NUMA node0 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }')" + echo "Selected core: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV + + ZE_AFFINITY_MASK=0 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + + - name: Run benchmarks + working-directory: ${{ github.workspace }} + id: benchmarks + run: > + source .venv/bin/activate && + taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py + ~/llvm_bench_workdir + --sycl ${{ github.workspace }}/sycl_build + --ur ${{ github.workspace }}/ur_install + --adapter ${{ matrix.adapter.str_name }} + --compare baseline + --compute-runtime ${{ inputs.compute_runtime_commit }} + --build-igc + ${{ inputs.upload_report && '--output-html' || '' }} + ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} + ${{ inputs.bench_script_params }} + + - name: Print benchmark results + run: | + cat ${{ github.workspace }}/benchmark_results.md || true + + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + let markdown = "" + try { + const fs = require('fs'); + markdown = fs.readFileSync('benchmark_results.md', 'utf8'); + } catch(err) { + } + + const pr_no = '${{ inputs.pr_no }}'; + const adapter = '${{ matrix.adapter.str_name }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const test_status = '${{ steps.benchmarks.outcome }}'; + const job_status = '${{ job.status }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Benchmarks ${adapter} run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Rename benchmark results file + if: ${{ always() && inputs.upload_report }} + run: mv benchmark_results.html benchmark_results_${{ inputs.pr_no }}.html + + - name: Upload HTML report + if: ${{ always() && inputs.upload_report }} + uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: benchmark_results_${{ inputs.pr_no }}.html + key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }} + + # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location + - name: Get information about platform + if: ${{ always() }} + run: | + ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh || true + ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh || true diff --git a/.github/workflows/ur-benchmarks.yml b/.github/workflows/ur-benchmarks.yml index 23fbb1ad903b4..cde4bfa828d71 100644 --- a/.github/workflows/ur-benchmarks.yml +++ b/.github/workflows/ur-benchmarks.yml @@ -1,12 +1,53 @@ name: Benchmarks -# This workflow is a WIP: this workflow file acts as a placeholder. +on: + workflow_dispatch: + inputs: + str_name: + description: Adapter + type: choice + required: true + default: 'level_zero' + options: + - level_zero + - level_zero_v2 + pr_no: + description: PR number (0 is sycl main branch) + type: number + required: true + bench_script_params: + description: Benchmark script arguments + type: string + required: false + default: '' + sycl_config_params: + description: Extra params for SYCL configuration + type: string + required: false + default: '' + compute_runtime_commit: + description: 'Compute Runtime commit' + type: string + required: false + default: '' + upload_report: + description: 'Upload HTML report' + type: boolean + required: false + default: false -on: [ workflow_dispatch ] +permissions: + contents: read + pull-requests: write jobs: - do-nothing: - runs-on: ubuntu-latest - steps: - - run: echo 'This workflow is a WIP.' - + manual: + name: Compute Benchmarks + uses: ./.github/workflows/ur-benchmarks-reusable.yml + with: + str_name: ${{ inputs.str_name }} + pr_no: ${{ inputs.pr_no }} + bench_script_params: ${{ inputs.bench_script_params }} + sycl_config_params: ${{ inputs.sycl_config_params }} + compute_runtime_commit: ${{ inputs.compute_runtime_commit }} + upload_report: ${{ inputs.upload_report }} diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml index a0f94ab10f538..eebac4e424a4b 100644 --- a/.github/workflows/ur-build-hw.yml +++ b/.github/workflows/ur-build-hw.yml @@ -156,4 +156,4 @@ jobs: - name: Get information about platform if: ${{ always() }} - run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh + run: ${{github.workspace}}/devops/scripts/get_system_info.sh diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index e357e2bddec30..03b7d4ad776fd 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -95,7 +95,6 @@ runs: if: always() shell: bash run: | - # TODO -- waiting on security clearance # Load configuration values $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml new file mode 100644 index 0000000000000..bab571ec16ff2 --- /dev/null +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -0,0 +1,135 @@ +name: 'Run benchmarks' + +# This action assumes the following prerequisites: +# +# - SYCL is placed in ./toolchain -- TODO change this +# - /devops has been checked out in ./devops. +# - env.GITHUB_TOKEN was properly set, because according to Github, that's +# apparently the recommended way to pass a secret into a github action: + +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets +# +# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently, +# only specific runners are fully supported. + +inputs: + target_devices: + type: string + required: True + upload_results: + type: string + required: True + build_hash: + type: string + required: False + default: '' + +runs: + using: "composite" + steps: + - name: Check specified runner type / target backend + shell: bash + env: + TARGET_DEVICE: ${{ inputs.target_devices }} + RUNNER_NAME: ${{ runner.name }} + run: | + case "$RUNNER_TAG" in + '["PVC_PERF"]' ) ;; + *) + echo "#" + echo "# WARNING: Only specific tuned runners are fully supported." + echo "# This workflow is not guaranteed to work with other runners." + echo "#" ;; + esac + + # Ensure runner name has nothing injected + # TODO: in terms of security, is this overkill? + if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then + echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." + exit 1 + fi + echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV + + # input.target_devices is not directly used, as this allows code injection + case "$TARGET_DEVICE" in + level_zero:*) ;; + *) + echo "#" + echo "# WARNING: Only level_zero backend is fully supported." + echo "# This workflow is not guaranteed to work with other backends." + echo "#" ;; + esac + echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV + + - name: Compute CPU core range to run benchmarks on + shell: bash + run: | + # Compute the core range for the first NUMA node; second node is used by + # UMF. Skip the first 4 cores as the kernel is likely to schedule more + # work on these. + CORES="$(lscpu | awk ' + /NUMA node0 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }')" + echo "CPU core range to use: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV + + ZE_AFFINITY_MASK=0 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + - name: Checkout results repo + shell: bash + run: | + git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results + - name: Run compute-benchmarks + env: + BUILD_HASH: ${{ inputs.build_hash }} + shell: bash + run: | + # TODO generate summary + display helpful message here + export CMPLR_ROOT=./toolchain + echo "-----" + sycl-ls + echo "-----" + pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt + echo "-----" + mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" + + # TODO accomodate for different GPUs and backends + SAVE_NAME="Baseline_PVC_L0" + if [ -n "$BUILD_HASH" ]; then + SAVE_NAME="Commit_PVC_$BUILD_HASH" + fi + + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --sycl "$(realpath ./toolchain)" \ + --save "$SAVE_NAME" \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --preset Minimal + echo "-----" + - name: Push compute-benchmarks results + if: inputs.upload_results == 'true' && always() + shell: bash + run: | + # TODO redo configuration + # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) + + cd "./llvm-ci-perf-results" + git config user.name "SYCL Benchmarking Bot" + git config user.email "sys_sycl_benchmarks@intel.com" + git pull + git add . + # Make sure changes have been made + if git diff --quiet && git diff --cached --quiet; then + echo "No new results added, skipping push." + else + git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" + git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci + fi + diff --git a/unified-runtime/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md similarity index 91% rename from unified-runtime/scripts/benchmarks/README.md rename to devops/scripts/benchmarks/README.md index 004fe14eca35b..fcadded3cad51 100644 --- a/unified-runtime/scripts/benchmarks/README.md +++ b/devops/scripts/benchmarks/README.md @@ -6,6 +6,8 @@ Scripts for running performance tests on SYCL and Unified Runtime. - [Velocity Bench](https://github.com/oneapi-src/Velocity-Bench) - [Compute Benchmarks](https://github.com/intel/compute-benchmarks/) +- [LlamaCpp Benchmarks](https://github.com/ggerganov/llama.cpp) +- [SYCL-Bench](https://github.com/unisa-hpc/sycl-bench) ## Running @@ -27,8 +29,6 @@ You can also include additional benchmark parameters, such as environment variab Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request. -By default, all benchmark runs are compared against `baseline`, which is a well-established set of the latest data. - You must be a member of the `oneapi-src` organization to access these features. ## Comparing results @@ -37,8 +37,8 @@ By default, the benchmark results are not stored. To store them, use the option You can compare benchmark results using `--compare` option. The comparison will be presented in a markdown output file (see below). If you want to calculate the relative performance of the new results against the previously saved data, use `--compare ` (i.e. `--compare baseline`). In case of comparing only stored data without generating new results, use `--dry-run --compare --compare --relative-perf `, where `name1` indicates the baseline for the relative performance calculation and `--dry-run` prevents the script for running benchmarks. Listing more than two `--compare` options results in displaying only execution time, without statistical analysis. -Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results -are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html). +Baseline_L0, as well as Baseline_L0v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results +are stored [here](https://oneapi-src.github.io/unified-runtime/performance/). ## Output formats You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`. diff --git a/unified-runtime/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py similarity index 52% rename from unified-runtime/scripts/benchmarks/benches/base.py rename to devops/scripts/benchmarks/benches/base.py index d1bb5fb53b83a..4c2973d250e3d 100644 --- a/unified-runtime/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -1,16 +1,37 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +from dataclasses import dataclass import os import shutil from pathlib import Path -from .result import Result +from utils.result import BenchmarkMetadata, BenchmarkTag, Result from options import options from utils.utils import download, run -import urllib.request -import tarfile + +benchmark_tags = [ + BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"), + BenchmarkTag("UR", "Benchmark uses Unified Runtime API"), + BenchmarkTag("L0", "Benchmark uses Level Zero API directly"), + BenchmarkTag("UMF", "Benchmark uses Unified Memory Framework directly"), + BenchmarkTag("micro", "Microbenchmark focusing on a specific functionality"), + BenchmarkTag("application", "Real application-based performance test"), + BenchmarkTag("proxy", "Benchmark that simulates real application use-cases"), + BenchmarkTag("submit", "Tests kernel submission performance"), + BenchmarkTag("math", "Tests math computation performance"), + BenchmarkTag("memory", "Tests memory transfer or bandwidth performance"), + BenchmarkTag("allocation", "Tests memory allocation performance"), + BenchmarkTag("graph", "Tests graph-based execution performance"), + BenchmarkTag("latency", "Measures operation latency"), + BenchmarkTag("throughput", "Measures operation throughput"), + BenchmarkTag("inference", "Tests ML/AI inference performance"), + BenchmarkTag("image", "Image processing benchmark"), + BenchmarkTag("simulation", "Physics or scientific simulation benchmark"), +] + +benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags} class Benchmark: @@ -55,19 +76,25 @@ def create_data_path(self, name, skip_data_dir=False): data_path = os.path.join(self.directory, name) else: data_path = os.path.join(self.directory, "data", name) - if options.rebuild and Path(data_path).exists(): + if options.redownload and Path(data_path).exists(): shutil.rmtree(data_path) Path(data_path).mkdir(parents=True, exist_ok=True) return data_path - def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False): + def download( + self, + name, + url, + file, + untar=False, + unzip=False, + skip_data_dir=False, + checksum="", + ): self.data_path = self.create_data_path(name, skip_data_dir) - return download(self.data_path, url, file, untar, unzip) - - def name(self): - raise NotImplementedError() + return download(self.data_path, url, file, untar, unzip, checksum) def lower_is_better(self): return True @@ -87,6 +114,30 @@ def stddev_threshold(self): def get_suite_name(self) -> str: return self.suite.name() + def name(self): + raise NotImplementedError() + + def description(self): + return "" + + def notes(self) -> str: + return None + + def unstable(self) -> str: + return None + + def get_tags(self) -> list[str]: + return [] + + def get_metadata(self) -> BenchmarkMetadata: + return BenchmarkMetadata( + type="benchmark", + description=self.description(), + notes=self.notes(), + unstable=self.unstable(), + tags=self.get_tags(), + ) + class Suite: def benchmarks(self) -> list[Benchmark]: @@ -97,3 +148,6 @@ def name(self) -> str: def setup(self): return + + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return {} diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py new file mode 100644 index 0000000000000..8dc7315af9e76 --- /dev/null +++ b/devops/scripts/benchmarks/benches/compute.py @@ -0,0 +1,704 @@ +# Copyright (C) 2024-2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import csv +import io +from utils.utils import run, git_clone, create_build_path +from .base import Benchmark, Suite +from utils.result import BenchmarkMetadata, Result +from options import options +from enum import Enum + + +class RUNTIMES(Enum): + SYCL = "sycl" + LEVEL_ZERO = "l0" + UR = "ur" + + +def runtime_to_name(runtime: RUNTIMES) -> str: + return { + RUNTIMES.SYCL: "SYCL", + RUNTIMES.LEVEL_ZERO: "Level Zero", + RUNTIMES.UR: "Unified Runtime", + }[runtime] + + +def runtime_to_tag_name(runtime: RUNTIMES) -> str: + return { + RUNTIMES.SYCL: "SYCL", + RUNTIMES.LEVEL_ZERO: "L0", + RUNTIMES.UR: "UR", + }[runtime] + + +class ComputeBench(Suite): + def __init__(self, directory): + self.directory = directory + + def name(self) -> str: + return "Compute Benchmarks" + + def git_url(self) -> str: + return "https://github.com/intel/compute-benchmarks.git" + + def git_hash(self) -> str: + return "b5cc46acf61766ab00da04e85bd4da4f7591eb21" + + def setup(self): + if options.sycl is None: + return + + repo_path = git_clone( + self.directory, + "compute-benchmarks-repo", + self.git_url(), + self.git_hash(), + ) + build_path = create_build_path(self.directory, "compute-benchmarks-build") + + configure_command = [ + "cmake", + f"-B {build_path}", + f"-S {repo_path}", + f"-DCMAKE_BUILD_TYPE=Release", + f"-DBUILD_SYCL=ON", + f"-DSYCL_COMPILER_ROOT={options.sycl}", + f"-DALLOW_WARNINGS=ON", + ] + + if options.ur is not None: + configure_command += [ + f"-DBUILD_UR=ON", + f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime", + ] + + run(configure_command, add_sycl=True) + + run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True) + + self.built = True + + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + # TODO: group metadata should be automatically generated based on the benchmarks... + submit_kernel_metadata = BenchmarkMetadata( + type="group", + description="Measures CPU time overhead of submitting kernels through different APIs.", + notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n" + "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" + "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" + "Work is ongoing to reduce the overhead of the SYCL API\n", + tags=["submit", "micro", "SYCL", "UR", "L0"], + ) + + return { + "SubmitKernel In Order": submit_kernel_metadata, + "SubmitKernel Out Of Order": submit_kernel_metadata, + "SubmitKernel In Order With Completion": submit_kernel_metadata, + "SubmitKernel Out Of Order With Completion": submit_kernel_metadata, + "SinKernelGraph": BenchmarkMetadata( + type="group", + unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", + tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"], + ), + "SubmitGraph": BenchmarkMetadata( + type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"] + ), + } + + def enabled_runtimes(self, supported_runtimes=None): + # all runtimes in the RUNTIMES enum + runtimes = supported_runtimes or list(RUNTIMES) + + # Filter out UR if not available + if options.ur is None: + runtimes = [r for r in runtimes if r != RUNTIMES.UR] + + return runtimes + + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + if options.ur_adapter == "cuda": + return [] + + benches = [] + + # Add SubmitKernel benchmarks using loops + for runtime in self.enabled_runtimes(): + for in_order_queue in [0, 1]: + for measure_completion in [0, 1]: + benches.append( + SubmitKernel(self, runtime, in_order_queue, measure_completion) + ) + + # Add SinKernelGraph benchmarks + for runtime in self.enabled_runtimes(): + for with_graphs in [0, 1]: + for num_kernels in [5, 100]: + benches.append( + GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels) + ) + + # Add ULLS benchmarks + for runtime in self.enabled_runtimes([RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]): + benches.append(UllsEmptyKernel(self, runtime, 1000, 256)) + benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1)) + + # Add GraphApiSubmitGraph benchmarks + for runtime in self.enabled_runtimes([RUNTIMES.SYCL]): + for in_order_queue in [0, 1]: + for num_kernels in [4, 10, 32]: + for measure_completion_time in [0, 1]: + benches.append( + GraphApiSubmitGraph( + self, + runtime, + in_order_queue, + num_kernels, + measure_completion_time, + ) + ) + + # Add other benchmarks + benches += [ + QueueInOrderMemcpy(self, 0, "Device", "Device", 1024), + QueueInOrderMemcpy(self, 0, "Host", "Device", 1024), + QueueMemcpy(self, "Device", "Device", 1024), + StreamMemory(self, "Triad", 10 * 1024, "Device"), + ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024), + ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024), + VectorSum(self), + ] + + # Add UR-specific benchmarks + if options.ur is not None: + benches += [ + MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), + MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), + ] + + return benches + + +def parse_unit_type(compute_unit): + if "[count]" in compute_unit: + return "instr" + elif "[us]" in compute_unit: + return "μs" + return compute_unit.replace("[", "").replace("]", "") + + +class ComputeBenchmark(Benchmark): + def __init__(self, bench, name, test): + super().__init__(bench.directory, bench) + self.bench = bench + self.bench_name = name + self.test = test + + def bin_args(self) -> list[str]: + return [] + + def extra_env_vars(self) -> dict: + return {} + + def setup(self): + self.benchmark_bin = os.path.join( + self.bench.directory, "compute-benchmarks-build", "bin", self.bench_name + ) + + def explicit_group(self): + return "" + + def description(self) -> str: + return "" + + def run(self, env_vars) -> list[Result]: + command = [ + f"{self.benchmark_bin}", + f"--test={self.test}", + "--csv", + "--noHeaders", + ] + + command += self.bin_args() + env_vars.update(self.extra_env_vars()) + + result = self.run_bench(command, env_vars) + parsed_results = self.parse_output(result) + ret = [] + for label, median, stddev, unit in parsed_results: + extra_label = " CPU count" if parse_unit_type(unit) == "instr" else "" + explicit_group = ( + self.explicit_group() + extra_label + if self.explicit_group() != "" + else "" + ) + ret.append( + Result( + label=self.name() + extra_label, + explicit_group=explicit_group, + value=median, + stddev=stddev, + command=command, + env=env_vars, + stdout=result, + unit=parse_unit_type(unit), + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), + ) + ) + return ret + + def parse_output(self, output): + csv_file = io.StringIO(output) + reader = csv.reader(csv_file) + next(reader, None) + results = [] + while True: + data_row = next(reader, None) + if data_row is None: + break + try: + label = data_row[0] + mean = float(data_row[1]) + median = float(data_row[2]) + # compute benchmarks report stddev as % + stddev = mean * (float(data_row[3].strip("%")) / 100.0) + unit = data_row[7] + results.append((label, median, stddev, unit)) + except (ValueError, IndexError) as e: + raise ValueError(f"Error parsing output: {e}") + if len(results) == 0: + raise ValueError("Benchmark output does not contain data.") + return results + + def teardown(self): + return + + +class SubmitKernel(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0): + self.ioq = ioq + self.runtime = runtime + self.measure_completion = measure_completion + super().__init__( + bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel" + ) + + def get_tags(self): + return ["submit", "latency", runtime_to_tag_name(self.runtime), "micro"] + + def name(self): + order = "in order" if self.ioq else "out of order" + completion_str = " with measure completion" if self.measure_completion else "" + return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}" + + def explicit_group(self): + order = "In Order" if self.ioq else "Out Of Order" + completion_str = " With Completion" if self.measure_completion else "" + return f"SubmitKernel {order}{completion_str}" + + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + runtime_name = runtime_to_name(self.runtime) + + completion_desc = "" + if self.runtime == RUNTIMES.UR: + completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time" + + l0_specific = "" + if self.runtime == RUNTIMES.LEVEL_ZERO: + l0_specific = " Uses immediate command lists" + + return ( + f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. " + f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}" + ) + + def bin_args(self) -> list[str]: + return [ + f"--Ioq={self.ioq}", + "--DiscardEvents=0", + f"--MeasureCompletion={self.measure_completion}", + "--iterations=100000", + "--Profiling=0", + "--NumKernels=10", + "--KernelExecTime=1", + ] + + +class ExecImmediateCopyQueue(ComputeBenchmark): + def __init__(self, bench, ioq, isCopyOnly, source, destination, size): + self.ioq = ioq + self.isCopyOnly = isCopyOnly + self.source = source + self.destination = destination + self.size = size + super().__init__(bench, "api_overhead_benchmark_sycl", "ExecImmediateCopyQueue") + + def name(self): + order = "in order" if self.ioq else "out of order" + return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" + + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + operation = "copy-only" if self.isCopyOnly else "copy and command submission" + return ( + f"Measures SYCL {order} queue overhead for {operation} from {self.source} to " + f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads." + ) + + def get_tags(self): + return ["memory", "submit", "latency", "SYCL", "micro"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=100000", + f"--ioq={self.ioq}", + f"--IsCopyOnly={self.isCopyOnly}", + "--MeasureCompletionTime=0", + f"--src={self.destination}", + f"--dst={self.destination}", + f"--size={self.size}", + ] + + +class QueueInOrderMemcpy(ComputeBenchmark): + def __init__(self, bench, isCopyOnly, source, destination, size): + self.isCopyOnly = isCopyOnly + self.source = source + self.destination = destination + self.size = size + super().__init__(bench, "memory_benchmark_sycl", "QueueInOrderMemcpy") + + def name(self): + return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" + + def description(self) -> str: + operation = "copy-only" if self.isCopyOnly else "copy and command submission" + return ( + f"Measures SYCL in-order queue memory copy performance for {operation} from " + f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration." + ) + + def get_tags(self): + return ["memory", "latency", "SYCL", "micro"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--IsCopyOnly={self.isCopyOnly}", + f"--sourcePlacement={self.source}", + f"--destinationPlacement={self.destination}", + f"--size={self.size}", + "--count=100", + ] + + +class QueueMemcpy(ComputeBenchmark): + def __init__(self, bench, source, destination, size): + self.source = source + self.destination = destination + self.size = size + super().__init__(bench, "memory_benchmark_sycl", "QueueMemcpy") + + def name(self): + return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}" + + def description(self) -> str: + return ( + f"Measures general SYCL queue memory copy performance from {self.source} to " + f"{self.destination} with {self.size} bytes per operation." + ) + + def get_tags(self): + return ["memory", "latency", "SYCL", "micro"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--sourcePlacement={self.source}", + f"--destinationPlacement={self.destination}", + f"--size={self.size}", + ] + + +class StreamMemory(ComputeBenchmark): + def __init__(self, bench, type, size, placement): + self.type = type + self.size = size + self.placement = placement + super().__init__(bench, "memory_benchmark_sycl", "StreamMemory") + + def name(self): + return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" + + def description(self) -> str: + return ( + f"Measures {self.placement} memory bandwidth using {self.type} pattern with " + f"{self.size} bytes. Higher values (GB/s) indicate better performance." + ) + + # measurement is in GB/s + def lower_is_better(self): + return False + + def get_tags(self): + return ["memory", "throughput", "SYCL", "micro"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--type={self.type}", + f"--size={self.size}", + f"--memoryPlacement={self.placement}", + "--useEvents=0", + "--contents=Zeros", + "--multiplier=1", + "--vectorSize=1", + ] + + +class VectorSum(ComputeBenchmark): + def __init__(self, bench): + super().__init__(bench, "miscellaneous_benchmark_sycl", "VectorSum") + + def name(self): + return f"miscellaneous_benchmark_sycl VectorSum" + + def description(self) -> str: + return ( + "Measures performance of vector addition across 3D grid (512x256x256 elements) " + "using SYCL." + ) + + def get_tags(self): + return ["math", "throughput", "SYCL", "micro"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=1000", + "--numberOfElementsX=512", + "--numberOfElementsY=256", + "--numberOfElementsZ=256", + ] + + +class MemcpyExecute(ComputeBenchmark): + def __init__( + self, + bench, + numOpsPerThread, + numThreads, + allocSize, + iterations, + srcUSM, + dstUSM, + useEvent, + ): + self.numOpsPerThread = numOpsPerThread + self.numThreads = numThreads + self.allocSize = allocSize + self.iterations = iterations + self.srcUSM = srcUSM + self.dstUSM = dstUSM + self.useEvents = useEvent + super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute") + + def name(self): + return ( + f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" + + (" without events" if not self.useEvents else "") + ) + + def description(self) -> str: + src_type = "device" if self.srcUSM == 1 else "host" + dst_type = "device" if self.dstUSM == 1 else "host" + events = "with" if self.useEvents else "without" + return ( + f"Measures multithreaded memory copy performance with {self.numThreads} threads " + f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes " + f"from {src_type} to {dst_type} memory {events} events." + ) + + def get_tags(self): + return ["memory", "latency", "UR", "micro"] + + def bin_args(self) -> list[str]: + return [ + "--Ioq=1", + f"--UseEvents={self.useEvents}", + "--MeasureCompletion=1", + "--UseQueuePerThread=1", + f"--AllocSize={self.allocSize}", + f"--NumThreads={self.numThreads}", + f"--NumOpsPerThread={self.numOpsPerThread}", + f"--iterations={self.iterations}", + f"--SrcUSM={self.srcUSM}", + f"--DstUSM={self.dstUSM}", + ] + + +class GraphApiSinKernelGraph(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels): + self.withGraphs = withGraphs + self.numKernels = numKernels + self.runtime = runtime + super().__init__( + bench, f"graph_api_benchmark_{runtime.value}", "SinKernelGraph" + ) + + def explicit_group(self): + return f"SinKernelGraph {self.numKernels}" + + def description(self) -> str: + execution = "using graphs" if self.withGraphs else "without graphs" + return ( + f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"sin kernels {execution}. Tests overhead and benefits of graph-based execution." + ) + + def name(self): + return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}" + + def unstable(self) -> str: + return "This benchmark combines both eager and graph execution, and may not be representative of real use cases." + + def get_tags(self): + return [ + "graph", + runtime_to_tag_name(self.runtime), + "proxy", + "submit", + "memory", + "latency", + ] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--numKernels={self.numKernels}", + f"--withGraphs={self.withGraphs}", + "--withCopyOffload=1", + "--immediateAppendCmdList=0", + ] + + +class GraphApiSubmitGraph(ComputeBenchmark): + def __init__( + self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime + ): + self.inOrderQueue = inOrderQueue + self.numKernels = numKernels + self.runtime = runtime + self.measureCompletionTime = measureCompletionTime + super().__init__(bench, f"graph_api_benchmark_{runtime.value}", "SubmitGraph") + + def explicit_group(self): + return f"SubmitGraph {self.numKernels}" + + def description(self) -> str: + return ( + f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution." + ) + + def name(self): + return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}" + + def get_tags(self): + return [ + "graph", + runtime_to_tag_name(self.runtime), + "micro", + "submit", + "latency", + ] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--NumKernels={self.numKernels}", + f"--MeasureCompletionTime={self.measureCompletionTime}", + f"--InOrderQueue={self.inOrderQueue}", + "--Profiling=0", + "--KernelExecutionTime=1", + ] + + +class UllsEmptyKernel(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, wgc, wgs): + self.wgc = wgc + self.wgs = wgs + self.runtime = runtime + super().__init__(bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel") + + def explicit_group(self): + return f"EmptyKernel {self.wgc} {self.wgs}" + + def description(self) -> str: + return "" + + def name(self): + return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}" + + def get_tags(self): + return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--wgs={self.wgs}", + f"--wgc={self.wgs}", + ] + + +class UllsKernelSwitch(ComputeBenchmark): + def __init__( + self, + bench, + runtime: RUNTIMES, + count, + kernelTime, + barrier, + hostVisible, + ioq, + ctrBasedEvents, + ): + self.count = count + self.kernelTime = kernelTime + self.barrier = barrier + self.hostVisible = hostVisible + self.ctrBasedEvents = ctrBasedEvents + self.runtime = runtime + self.ioq = ioq + super().__init__(bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch") + + def explicit_group(self): + return f"KernelSwitch {self.count} {self.kernelTime}" + + def description(self) -> str: + return "" + + def name(self): + return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}" + + def get_tags(self): + return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=1000", + f"--count={self.count}", + f"--kernelTime={self.kernelTime}", + f"--barrier={self.barrier}", + f"--hostVisible={self.hostVisible}", + f"--ioq={self.ioq}", + f"--ctrBasedEvents={self.ctrBasedEvents}", + ] diff --git a/unified-runtime/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py similarity index 77% rename from unified-runtime/scripts/benchmarks/benches/llamacpp.py rename to devops/scripts/benchmarks/benches/llamacpp.py index 6524c95a9f56f..86d41ed525292 100644 --- a/unified-runtime/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,10 +8,10 @@ from pathlib import Path from utils.utils import download, git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import os @@ -25,6 +25,12 @@ def __init__(self, directory): def name(self) -> str: return "llama.cpp bench" + def git_url(self) -> str: + return "https://github.com/ggerganov/llama.cpp" + + def git_hash(self) -> str: + return "1ee9eea094fe5846c7d8d770aa7caa749d246b23" + def setup(self): if options.sycl is None: return @@ -32,8 +38,8 @@ def setup(self): repo_path = git_clone( self.directory, "llamacpp-repo", - "https://github.com/ggerganov/llama.cpp", - "1ee9eea094fe5846c7d8d770aa7caa749d246b23", + self.git_url(), + self.git_hash(), ) self.models_dir = os.path.join(self.directory, "models") @@ -43,6 +49,7 @@ def setup(self): self.models_dir, "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", "Phi-3-mini-4k-instruct-q4.gguf", + checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4", ) self.oneapi = get_oneapi() @@ -62,11 +69,11 @@ def setup(self): f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"', f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}", ] - print(f"{self.__class__.__name__}: Run {configure_command}") + run(configure_command, add_sycl=True) - print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j") + run( - f"cmake --build {self.build_path} -j", + f"cmake --build {self.build_path} -j {options.build_jobs}", add_sycl=True, ld_library=self.oneapi.ld_libraries(), ) @@ -92,6 +99,17 @@ def setup(self): def name(self): return f"llama.cpp" + def description(self) -> str: + return ( + "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. " + "Runs both prompt processing (initial context processing) and text generation benchmarks with " + "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct " + "quantized model and leverages SYCL with oneDNN for acceleration." + ) + + def get_tags(self): + return ["SYCL", "application", "inference", "throughput"] + def lower_is_better(self): return False @@ -130,6 +148,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit="token/s", + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) return results diff --git a/unified-runtime/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py similarity index 86% rename from unified-runtime/scripts/benchmarks/benches/syclbench.py rename to devops/scripts/benchmarks/benches/syclbench.py index f7cf571a7ecd7..9854c92d338fc 100644 --- a/unified-runtime/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,7 +8,7 @@ import io from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from options import options @@ -23,6 +23,12 @@ def __init__(self, directory): def name(self) -> str: return "SYCL-Bench" + def git_url(self) -> str: + return "https://github.com/unisa-hpc/sycl-bench.git" + + def git_hash(self) -> str: + return "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b" + def setup(self): if options.sycl is None: return @@ -31,8 +37,8 @@ def setup(self): repo_path = git_clone( self.directory, "sycl-bench-repo", - "https://github.com/mateuszpn/sycl-bench.git", - "1e6ab2cfd004a72c5336c26945965017e06eab71", + self.git_url(), + self.git_hash(), ) configure_command = [ @@ -51,7 +57,7 @@ def setup(self): ] run(configure_command, add_sycl=True) - run(f"cmake --build {build_path} -j", add_sycl=True) + run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True) self.built = True @@ -65,14 +71,14 @@ def benchmarks(self) -> list[Benchmark]: DagTaskS(self), HostDevBandwidth(self), LocalMem(self), - Pattern_L2(self), - Reduction(self), + # Pattern_L2(self), # validation failure + # Reduction(self), # validation failure ScalarProd(self), SegmentReduction(self), - UsmAccLatency(self), + # UsmAccLatency(self), # validation failure UsmAllocLatency(self), - UsmInstrMix(self), - UsmPinnedOverhead(self), + # UsmInstrMix(self), # validation failure + # UsmPinnedOverhead(self), # validation failure VecAdd(self), # *** sycl-bench single benchmarks # TwoDConvolution(self), # run time < 1ms @@ -82,20 +88,20 @@ def benchmarks(self) -> list[Benchmark]: Atax(self), # Atomic_reduction(self), # run time < 1ms Bicg(self), - Correlation(self), - Covariance(self), - Gemm(self), - Gesumv(self), - Gramschmidt(self), + # Correlation(self), # validation failure + # Covariance(self), # validation failure + # Gemm(self), # validation failure + # Gesumv(self), # validation failure + # Gramschmidt(self), # validation failure KMeans(self), LinRegCoeff(self), # LinRegError(self), # run time < 1ms - MatmulChain(self), + # MatmulChain(self), # validation failure MolDyn(self), - Mvt(self), + # Mvt(self), # validation failure Sf(self), - Syr2k(self), - Syrk(self), + # Syr2k(self), # validation failure + # Syrk(self), # validation failure ] @@ -105,7 +111,6 @@ def __init__(self, bench, name, test): self.bench = bench self.bench_name = name self.test = test - self.done = False def bin_args(self) -> list[str]: return [] @@ -113,16 +118,26 @@ def bin_args(self) -> list[str]: def extra_env_vars(self) -> dict: return {} + def get_tags(self): + base_tags = ["SYCL", "micro"] + if "Memory" in self.bench_name or "mem" in self.bench_name.lower(): + base_tags.append("memory") + if "Reduction" in self.bench_name: + base_tags.append("math") + if "Bandwidth" in self.bench_name: + base_tags.append("throughput") + if "Latency" in self.bench_name: + base_tags.append("latency") + return base_tags + def setup(self): self.benchmark_bin = os.path.join( self.directory, "sycl-bench-build", self.bench_name ) def run(self, env_vars) -> list[Result]: - if self.done: - return self.outputfile = os.path.join(self.bench.directory, self.test + ".csv") - print(f"{self.__class__.__name__}: Results in {self.outputfile}") + command = [ f"{self.benchmark_bin}", f"--warmup-run", @@ -143,25 +158,27 @@ def run(self, env_vars) -> list[Result]: if not row[0].startswith("#"): res_list.append( Result( - label=row[0], + label=f"{self.name()} {row[0]}", value=float(row[12]) * 1000, # convert to ms passed=(row[1] == "PASS"), command=command, env=env_vars, stdout=row, unit="ms", + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) - self.done = True - return res_list - def teardown(self): - print(f"Removing {self.outputfile}...") os.remove(self.outputfile) - return + + return res_list def name(self): - return self.test + return f"{self.bench.name()} {self.test}" + + def teardown(self): + return # multi benchmarks diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py new file mode 100644 index 0000000000000..ad1e8c9e57735 --- /dev/null +++ b/devops/scripts/benchmarks/benches/test.py @@ -0,0 +1,106 @@ +# Copyright (C) 2024-2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import random +from utils.utils import git_clone +from .base import Benchmark, Suite +from utils.result import BenchmarkMetadata, Result +from utils.utils import run, create_build_path +from options import options +import os + + +class TestSuite(Suite): + def __init__(self): + return + + def setup(self): + return + + def name(self) -> str: + return "Test Suite" + + def benchmarks(self) -> list[Benchmark]: + bench_configs = [ + ("Memory Bandwidth", 2000, 200, "Foo Group", None, None), + ("Latency", 100, 20, "Bar Group", "A Latency test note!", None), + ("Throughput", 1500, 150, "Foo Group", None, None), + ("FLOPS", 3000, 300, "Foo Group", None, "Unstable FLOPS test!"), + ("Cache Miss Rate", 250, 25, "Bar Group", "Test Note", "And another note!"), + ] + + result = [] + for base_name, base_value, base_diff, group, notes, unstable in bench_configs: + for variant in range(6): + value_multiplier = 1.0 + (variant * 0.2) + name = f"{base_name} {variant+1}" + value = base_value * value_multiplier + diff = base_diff * value_multiplier + + result.append( + TestBench(self, name, value, diff, group, notes, unstable) + ) + + return result + + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return { + "Foo Group": BenchmarkMetadata( + type="group", + description="This is a test benchmark for Foo Group.", + notes="This is a test note for Foo Group.\n" "Look, multiple lines!", + ), + "Bar Group": BenchmarkMetadata( + type="group", + description="This is a test benchmark for Bar Group.", + unstable="This is an unstable note for Bar Group.", + ), + } + + +class TestBench(Benchmark): + def __init__(self, suite, name, value, diff, group="", notes=None, unstable=None): + super().__init__("", suite) + self.bname = name + self.value = value + self.diff = diff + self.group = group + self.notes_text = notes + self.unstable_text = unstable + + def name(self): + return self.bname + + def lower_is_better(self): + return True + + def setup(self): + return + + def description(self) -> str: + return f"This is a test benchmark for {self.bname}." + + def notes(self) -> str: + return self.notes_text + + def unstable(self) -> str: + return self.unstable_text + + def run(self, env_vars) -> list[Result]: + random_value = self.value + random.uniform(-1 * (self.diff), self.diff) + return [ + Result( + label=self.name(), + explicit_group=self.group, + value=random_value, + command=["test", "--arg1", "foo"], + env={"A": "B"}, + stdout="no output", + unit="ms", + ) + ] + + def teardown(self): + return diff --git a/unified-runtime/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py similarity index 55% rename from unified-runtime/scripts/benchmarks/benches/umf.py rename to devops/scripts/benchmarks/benches/umf.py index c7b767f02bbe1..1b7726b4db819 100644 --- a/unified-runtime/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -6,13 +6,14 @@ import random from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import os import csv import io +import re def isUMFAvailable(): @@ -22,8 +23,6 @@ def isUMFAvailable(): class UMFSuite(Suite): def __init__(self, directory): self.directory = directory - if not isUMFAvailable(): - print("UMF not provided. Related benchmarks will not run") def name(self) -> str: return "UMF" @@ -47,87 +46,36 @@ def benchmarks(self) -> list[Benchmark]: return benches -class ComputeUMFBenchmark(Benchmark): - def __init__(self, bench, name): +class GBench(Benchmark): + def __init__(self, bench): super().__init__(bench.directory, bench) self.bench = bench - self.bench_name = name + self.bench_name = "umf-benchmark" self.oneapi = get_oneapi() + self.umf_lib = options.umf + "lib" - self.col_name = None - self.col_iterations = None - self.col_real_time = None - self.col_cpu_time = None - self.col_time_unit = None + self.fragmentation_prefix = "FRAGMENTATION_" - self.col_statistics_time = None + self.num_cols_with_memory = 13 - def bin_args(self) -> list[str]: - return [] - - def extra_env_vars(self) -> dict: - return {} - - def setup(self): - if not isUMFAvailable(): - print("UMF prefix path not provided") - return - - self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name) - - def run(self, env_vars) -> list[Result]: - command = [ - f"{self.benchmark_bin}", - ] - - command += self.bin_args() - env_vars.update(self.extra_env_vars()) - - result = self.run_bench( - command, env_vars, add_sycl=False, ld_library=[self.oneapi.tbb_lib()] - ) - parsed = self.parse_output(result) - results = [] - for r in parsed: - (config, pool, mean) = r - label = f"{config} {pool}" - results.append( - Result( - label=label, - value=mean, - command=command, - env=env_vars, - stdout=result, - unit="ns", - explicit_group=config, - ) - ) - return results - - # Implementation with self.col_* indices could lead to the division by None - def get_mean(self, datarow): - raise NotImplementedError() - - def teardown(self): - return - - -class GBench(ComputeUMFBenchmark): - def __init__(self, bench): - super().__init__(bench, "umf-benchmark") - - self.col_name = 0 - self.col_iterations = 1 - self.col_real_time = 2 - self.col_cpu_time = 3 - self.col_time_unit = 4 + self.col_name = "name" + self.col_iterations = "iterations" + self.col_real_time = "real_time" + self.col_cpu_time = "cpu_time" + self.col_time_unit = "time_unit" + self.col_memory_overhead = "memory_overhead" self.idx_pool = 0 self.idx_config = 1 self.name_separator = "/" self.col_statistics_time = self.col_real_time + self.col_statistics_memory = self.col_memory_overhead + + self.is_preloaded = False + + self.lib_to_be_replaced = None def name(self): return self.bench_name @@ -138,17 +86,23 @@ def name(self): def bin_args(self): return ["--benchmark_format=csv"] - # the default unit - # might be changed globally with --benchmark_time_unit={ns|us|ms|s} - # the change affects only benchmark where time unit has not been set - # explicitly - def unit(self): - return "ns" - # these benchmarks are not stable, so set this at a large value def stddev_threshold(self) -> float: return 0.2 # 20% + def extra_env_vars(self) -> dict: + return {} + + def setup(self): + if not isUMFAvailable(): + print("UMF prefix path not provided") + return + + self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name) + + def is_memory_statistics_included(self, data_row): + return len(data_row) == self.num_cols_with_memory + def get_pool_and_config(self, full_name): list_split = full_name.split(self.name_separator, 1) if len(list_split) != 2: @@ -156,71 +110,115 @@ def get_pool_and_config(self, full_name): return list_split[self.idx_pool], list_split[self.idx_config] - def get_mean(self, datarow): + def get_mean_time(self, datarow): return float(datarow[self.col_statistics_time]) - def parse_output(self, output): - csv_file = io.StringIO(output) - reader = csv.reader(csv_file) + def get_memory_overhead(self, datarow): + return float(datarow[self.col_statistics_memory]) - data_row = next(reader, None) - if data_row is None: - raise ValueError("Benchmark output does not contain data.") + def get_unit_time_or_overhead(self, config): + if re.search(f"^{self.fragmentation_prefix}", config): + return "%" - results = [] - for row in reader: - try: - full_name = row[self.col_name] - pool, config = self.get_pool_and_config(full_name) - mean = self.get_mean(row) - results.append((config, pool, mean)) - except KeyError as e: - raise ValueError(f"Error parsing output: {e}") + # the default time unit + # might be changed globally with --benchmark_time_unit={ns|us|ms|s} + # the change affects only benchmark where time unit has not been set + # explicitly + return "ns" - return results + def get_names_of_benchmarks_to_be_run(self, command, env_vars): + list_all_command = command + ["--benchmark_list_tests"] + if self.is_preloaded: + list_all_command += ["--benchmark_filter=" + self.lib_to_be_replaced] -class GBenchPreloaded(GBench): - def __init__(self, bench, lib_to_be_replaced, replacing_lib): - super().__init__(bench) + all_names = self.run_bench( + list_all_command, env_vars, add_sycl=False, ld_library=[self.umf_lib] + ).splitlines() - self.lib_to_be_replaced = lib_to_be_replaced - self.replacing_lib = replacing_lib + return all_names - def bin_args(self): - full_args = super().bin_args() - full_args.append(f"--benchmark_filter={self.lib_to_be_replaced}") + def run(self, env_vars) -> list[Result]: + command = [f"{self.benchmark_bin}"] - return full_args + all_names = self.get_names_of_benchmarks_to_be_run(command, env_vars) - def get_preloaded_name(self, pool_name) -> str: - new_pool_name = pool_name.replace(self.lib_to_be_replaced, self.replacing_lib) + command += self.bin_args() + env_vars.update(self.extra_env_vars()) - return new_pool_name + results = [] + + for name in all_names: + specific_benchmark = command + ["--benchmark_filter=^" + name + "$"] + + result = self.run_bench( + specific_benchmark, env_vars, add_sycl=False, ld_library=[self.umf_lib] + ) + + parsed = self.parse_output(result) + for r in parsed: + (explicit_group, pool, value) = r + label = f"{explicit_group} {pool}" + results.append( + Result( + label=label, + value=value, + command=command, + env=env_vars, + stdout=result, + unit=self.get_unit_time_or_overhead(explicit_group), + explicit_group=explicit_group, + ) + ) + + return results def parse_output(self, output): csv_file = io.StringIO(output) - reader = csv.reader(csv_file) - - data_row = next(reader, None) - if data_row is None: - raise ValueError("Benchmark output does not contain data.") + reader = csv.DictReader(csv_file) results = [] + for row in reader: try: full_name = row[self.col_name] pool, config = self.get_pool_and_config(full_name) - mean = self.get_mean(row) - updated_pool = self.get_preloaded_name(pool) - updated_config = self.get_preloaded_name(config) + statistics_time = self.get_mean_time(row) + + if self.is_preloaded: + pool = self.get_preloaded_pool_name(pool) + + results.append((config, pool, statistics_time)) + + if self.is_memory_statistics_included(row): + statistics_overhead = self.get_memory_overhead(row) + config = self.fragmentation_prefix + config + + results.append((config, pool, statistics_overhead)) - results.append((updated_config, updated_pool, mean)) except KeyError as e: raise ValueError(f"Error parsing output: {e}") return results + def teardown(self): + return + + +class GBenchPreloaded(GBench): + def __init__(self, bench, lib_to_be_replaced, replacing_lib): + super().__init__(bench) + + self.is_preloaded = True + + self.lib_to_be_replaced = lib_to_be_replaced + self.replacing_lib = replacing_lib + + def get_preloaded_pool_name(self, pool_name) -> str: + new_pool_name = pool_name.replace(self.lib_to_be_replaced, self.replacing_lib) + + return new_pool_name + class GBenchGlibc(GBenchPreloaded): def __init__(self, bench, replacing_lib): diff --git a/unified-runtime/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py similarity index 74% rename from unified-runtime/scripts/benchmarks/benches/velocity.py rename to devops/scripts/benchmarks/benches/velocity.py index b7d06cbe4a3a2..493298dea8b10 100644 --- a/unified-runtime/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,10 +7,10 @@ import shutil from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import shutil import os @@ -26,6 +26,12 @@ def __init__(self, directory): def name(self) -> str: return "Velocity Bench" + def git_url(self) -> str: + return "https://github.com/oneapi-src/Velocity-Bench/" + + def git_hash(self) -> str: + return "b22215c16f789100449c34bf4eaa3fb178983d69" + def setup(self): if options.sycl is None: return @@ -33,8 +39,8 @@ def setup(self): self.repo_path = git_clone( self.directory, "velocity-bench-repo", - "https://github.com/oneapi-src/Velocity-Bench/", - "b22215c16f789100449c34bf4eaa3fb178983d69", + self.git_url(), + self.git_hash(), ) def benchmarks(self) -> list[Benchmark]: @@ -101,7 +107,7 @@ def setup(self): run(configure_command, {"CC": "clang", "CXX": "clang++"}, add_sycl=True) run( - f"cmake --build {build_path} -j", + f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True, ld_library=self.ld_libraries(), ) @@ -115,6 +121,12 @@ def extra_env_vars(self) -> dict: def parse_output(self, stdout: str) -> float: raise NotImplementedError() + def description(self) -> str: + return "" + + def get_tags(self): + return ["SYCL", "application"] + def run(self, env_vars) -> list[Result]: env_vars.update(self.extra_env_vars()) @@ -133,6 +145,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=self.unit, + git_url=self.vb.git_url(), + git_hash=self.vb.git_hash(), ) ] @@ -147,6 +161,12 @@ def __init__(self, vb: VelocityBench): def name(self): return "Velocity-Bench Hashtable" + def description(self) -> str: + return ( + "Measures hash table search performance using an efficient lock-free algorithm with linear probing. " + "Reports throughput in millions of keys processed per second. Higher values indicate better performance." + ) + def bin_args(self) -> list[str]: return ["--no-verify"] @@ -162,6 +182,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse keys per second from benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "throughput"] + class Bitcracker(VelocityBase): def __init__(self, vb: VelocityBench): @@ -170,6 +193,13 @@ def __init__(self, vb: VelocityBench): def name(self): return "Velocity-Bench Bitcracker" + def description(self) -> str: + return ( + "Password-cracking application for BitLocker-encrypted memory units. " + "Uses dictionary attack to find user or recovery passwords. " + "Measures total time required to process 60000 passwords." + ) + def bin_args(self) -> list[str]: self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass") @@ -193,6 +223,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "throughput"] + class SobelFilter(VelocityBase): def __init__(self, vb: VelocityBench): @@ -204,11 +237,19 @@ def download_deps(self): "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz", untar=True, + checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66", ) def name(self): return "Velocity-Bench Sobel Filter" + def description(self) -> str: + return ( + "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter " + "to reduce edge artifacts. Processes a large 32K x 32K image and measures " + "the time required to apply the filter." + ) + def bin_args(self) -> list[str]: return [ "-i", @@ -231,6 +272,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "image", "throughput"] + class QuickSilver(VelocityBase): def __init__(self, vb: VelocityBench): @@ -249,6 +293,13 @@ def run(self, env_vars) -> list[Result]: def name(self): return "Velocity-Bench QuickSilver" + def description(self) -> str: + return ( + "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. " + "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. " + "Reports a figure of merit in MMS/CTT where higher values indicate better performance." + ) + def lower_is_better(self): return False @@ -271,6 +322,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "simulation", "throughput"] + class Easywave(VelocityBase): def __init__(self, vb: VelocityBench): @@ -279,14 +333,22 @@ def __init__(self, vb: VelocityBench): def download_deps(self): self.download( "easywave", - "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", + "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz", untar=True, + checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1", ) def name(self): return "Velocity-Bench Easywave" + def description(self) -> str: + return ( + "A tsunami wave simulator used for researching tsunami generation and wave propagation. " + "Measures the elapsed time in milliseconds to simulate a specified tsunami event " + "based on real-world data." + ) + def bin_args(self) -> list[str]: return [ "-grid", @@ -327,6 +389,9 @@ def parse_output(self, stdout: str) -> float: os.path.join(options.benchmark_cwd, "easywave.log") ) + def get_tags(self): + return ["SYCL", "application", "simulation"] + class CudaSift(VelocityBase): def __init__(self, vb: VelocityBench): @@ -341,6 +406,13 @@ def download_deps(self): def name(self): return "Velocity-Bench CudaSift" + def description(self) -> str: + return ( + "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm " + "for detecting, describing, and matching local features in images. " + "Measures average processing time in milliseconds." + ) + def parse_output(self, stdout: str) -> float: match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout) if match: @@ -348,6 +420,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ["SYCL", "application", "image"] + class DLCifar(VelocityBase): def __init__(self, vb: VelocityBench): @@ -364,6 +439,7 @@ def download_deps(self): "cifar-10-binary.tar.gz", untar=True, skip_data_dir=True, + checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814", ) return @@ -382,6 +458,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench dl-cifar" + def description(self) -> str: + return ( + "Deep learning image classification workload based on the CIFAR-10 dataset " + "of 60,000 32x32 color images in 10 classes. Uses neural networks to " + "classify input images and measures total calculation time." + ) + def parse_output(self, stdout: str) -> float: match = re.search( r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout @@ -391,6 +474,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ["SYCL", "application", "inference", "image"] + class DLMnist(VelocityBase): def __init__(self, vb: VelocityBench): @@ -407,6 +493,7 @@ def download_deps(self): "train-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee", ) self.download( "datasets", @@ -414,6 +501,7 @@ def download_deps(self): "train-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00", ) self.download( "datasets", @@ -421,6 +509,7 @@ def download_deps(self): "t10k-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c", ) self.download( "datasets", @@ -428,6 +517,7 @@ def download_deps(self): "t10k-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d", ) def extra_cmake_args(self): @@ -445,6 +535,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench dl-mnist" + def description(self) -> str: + return ( + "Digit recognition based on the MNIST database, one of the oldest and most popular " + "databases of handwritten digits. Uses neural networks to identify digits " + "and measures total calculation time." + ) + def bin_args(self): return ["-conv_algo", "ONEDNN_AUTO"] @@ -465,6 +562,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ["SYCL", "application", "inference", "image"] + class SVM(VelocityBase): def __init__(self, vb: VelocityBench): @@ -488,6 +588,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench svm" + def description(self) -> str: + return ( + "Implementation of Support Vector Machine, a popular classical machine learning technique. " + "Uses supervised learning models with associated algorithms to analyze data " + "for classification and regression analysis. Measures total elapsed time." + ) + def bin_args(self): return [ f"{self.code_path}/a9a", @@ -500,3 +607,6 @@ def parse_output(self, stdout: str) -> float: return float(match.group(1)) else: raise ValueError("Failed to parse benchmark output.") + + def get_tags(self): + return ["SYCL", "application", "inference"] diff --git a/unified-runtime/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py similarity index 68% rename from unified-runtime/scripts/benchmarks/history.py rename to devops/scripts/benchmarks/history.py index 7902aa4f04c35..0b80c54ad7393 100644 --- a/unified-runtime/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -6,14 +6,14 @@ import os import json from pathlib import Path -from benches.result import Result, BenchmarkRun +import socket +from utils.result import Result, BenchmarkRun from options import Compare, options from datetime import datetime, timezone from utils.utils import run class BenchmarkHistory: - benchmark_run_index_max = 0 runs = [] def __init__(self, dir): @@ -35,42 +35,55 @@ def load(self, n: int): # Get all JSON files in the results directory benchmark_files = list(results_dir.glob("*.json")) - # Extract index numbers and sort files by index number - def extract_index(file_path: Path) -> int: + # Extract timestamp and sort files by it + def extract_timestamp(file_path: Path) -> str: try: - return int(file_path.stem.split("_")[0]) - except (IndexError, ValueError): - return -1 + return file_path.stem.split("_")[-1] + except IndexError: + return "" - benchmark_files = [ - file for file in benchmark_files if extract_index(file) != -1 - ] - benchmark_files.sort(key=extract_index) + benchmark_files.sort(key=extract_timestamp, reverse=True) # Load the first n benchmark files benchmark_runs = [] - for file_path in benchmark_files[n::-1]: + for file_path in benchmark_files[:n]: benchmark_run = self.load_result(file_path) if benchmark_run: benchmark_runs.append(benchmark_run) - if benchmark_files: - self.benchmark_run_index_max = extract_index(benchmark_files[-1]) - self.runs = benchmark_runs def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: try: - result = run("git rev-parse --short HEAD") + script_dir = os.path.dirname(os.path.abspath(__file__)) + result = run("git rev-parse --short HEAD", cwd=script_dir) git_hash = result.stdout.decode().strip() + + # Get the GitHub repo URL from git remote + remote_result = run("git remote get-url origin", cwd=script_dir) + remote_url = remote_result.stdout.decode().strip() + + # Convert SSH or HTTPS URL to owner/repo format + if remote_url.startswith("git@github.com:"): + # SSH format: git@github.com:owner/repo.git + github_repo = remote_url.split("git@github.com:")[1].rstrip(".git") + elif remote_url.startswith("https://github.com/"): + # HTTPS format: https://github.com/owner/repo.git + github_repo = remote_url.split("https://github.com/")[1].rstrip(".git") + else: + github_repo = None + except: git_hash = "unknown" + github_repo = None return BenchmarkRun( name=name, git_hash=git_hash, + github_repo=github_repo, date=datetime.now(tz=timezone.utc), results=results, + hostname=socket.gethostname(), ) def save(self, save_name, results: list[Result], to_file=True): @@ -84,12 +97,9 @@ def save(self, save_name, results: list[Result], to_file=True): results_dir = Path(os.path.join(self.dir, "results")) os.makedirs(results_dir, exist_ok=True) - self.benchmark_run_index_max += 1 - file_path = Path( - os.path.join( - results_dir, f"{self.benchmark_run_index_max}_{save_name}.json" - ) - ) + # Use formatted timestamp for the filename + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json")) with file_path.open("w") as file: json.dump(serialized, file, indent=4) print(f"Benchmark results saved to {file_path}") @@ -120,6 +130,7 @@ def compute_average(self, data: list[BenchmarkRun]): name=first_run.name, git_hash="average", date=first_run.date, # should this be different? + hostname=first_run.hostname, ) return average_benchmark_run diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js new file mode 100644 index 0000000000000..3e67ae1dce8e5 --- /dev/null +++ b/devops/scripts/benchmarks/html/config.js @@ -0,0 +1,2 @@ +//remoteDataUrl = 'https://example.com/data.json'; +//defaultCompareNames = ['baseline']; diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js new file mode 100644 index 0000000000000..a5b96c72834ba --- /dev/null +++ b/devops/scripts/benchmarks/html/data.js @@ -0,0 +1,3 @@ +benchmarkRuns = []; + +defaultCompareNames = []; diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html new file mode 100644 index 0000000000000..c0b4cd3d06b4f --- /dev/null +++ b/devops/scripts/benchmarks/html/index.html @@ -0,0 +1,82 @@ + + + + + + + Benchmark Results + + + + + + + + +
+

Benchmark Results

+ +
+ +
+
+ + +
+
+
+ Options +
+
+

Display Options

+
+ + +
+
+ +
+

Suites

+
+ +
+
+ +
+

Tags

+
+ +
+
+
+
+
+ Historical Results +
+
+
+ Historical Layer Comparisons +
+
+
+ Comparisons +
+
+
+ + diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js new file mode 100644 index 0000000000000..74716bec6b82f --- /dev/null +++ b/devops/scripts/benchmarks/html/scripts.js @@ -0,0 +1,976 @@ +// Copyright (C) 2024-2025 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Core state +let activeRuns = new Set(defaultCompareNames); +let chartInstances = new Map(); +let suiteNames = new Set(); +let timeseriesData, barChartsData, allRunNames; +let activeTags = new Set(); +let layerComparisonsData; + +// DOM Elements +let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer; + +const colorPalette = [ + 'rgb(255, 50, 80)', + 'rgb(255, 145, 15)', + 'rgb(255, 220, 0)', + 'rgb(20, 200, 50)', + 'rgb(0, 130, 255)', + 'rgb(180, 60, 255)', + 'rgb(255, 40, 200)', + 'rgb(0, 210, 180)', + 'rgb(255, 90, 0)', + 'rgb(110, 220, 0)', + 'rgb(240, 100, 170)', + 'rgb(30, 175, 255)', + 'rgb(180, 210, 0)', + 'rgb(130, 0, 220)', + 'rgb(255, 170, 0)', + 'rgb(0, 170, 110)', + 'rgb(220, 80, 60)', + 'rgb(80, 115, 230)', + 'rgb(210, 190, 0)', +]; + +// Run selector functions +function updateSelectedRuns(forceUpdate = true) { + selectedRunsDiv.innerHTML = ''; + activeRuns.forEach(name => { + selectedRunsDiv.appendChild(createRunElement(name)); + }); + if (forceUpdate) + updateCharts(); +} + +function createRunElement(name) { + const runElement = document.createElement('span'); + runElement.className = 'selected-run'; + runElement.innerHTML = `${name} `; + return runElement; +} + +function addSelectedRun() { + const selectedRun = runSelect.value; + if (selectedRun && !activeRuns.has(selectedRun)) { + activeRuns.add(selectedRun); + updateSelectedRuns(); + } +} + +function removeRun(name) { + activeRuns.delete(name); + updateSelectedRuns(); +} + +// Chart creation and update +function createChart(data, containerId, type) { + if (chartInstances.has(containerId)) { + chartInstances.get(containerId).destroy(); + } + + const ctx = document.getElementById(containerId).getContext('2d'); + const options = { + responsive: true, + plugins: { + title: { + display: true, + text: data.label + }, + subtitle: { + display: true, + text: data.lower_is_better ? "Lower is better" : "Higher is better" + }, + tooltip: { + callbacks: { + label: (context) => { + if (type === 'time') { + const point = context.raw; + return [ + `${point.seriesName}:`, + `Value: ${point.y.toFixed(2)} ${data.unit}`, + `Stddev: ${point.stddev.toFixed(2)} ${data.unit}`, + `Git Hash: ${point.gitHash}`, + ]; + } else { + return [`${context.dataset.label}:`, + `Value: ${context.parsed.y.toFixed(2)} ${data.unit}`, + ]; + } + } + } + } + }, + scales: { + y: { + title: { + display: true, + text: data.unit + }, + grace: '20%', + } + } + }; + + if (type === 'time') { + options.interaction = { + mode: 'nearest', + intersect: false + }; + options.onClick = (event, elements) => { + if (elements.length > 0) { + const point = elements[0].element.$context.raw; + if (point.gitHash && point.gitRepo) { + window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank'); + } + } + }; + options.scales.x = { + type: 'timeseries', + time: { + unit: 'day' + }, + ticks: { + maxRotation: 45, + minRotation: 45, + autoSkip: true, + maxTicksLimit: 10 + } + }; + } + + const chartConfig = { + type: type === 'time' ? 'line' : 'bar', + data: type === 'time' ? { + datasets: createTimeseriesDatasets(data) + } : { + labels: data.labels, + datasets: data.datasets + }, + options: options + }; + + const chart = new Chart(ctx, chartConfig); + chartInstances.set(containerId, chart); + return chart; +} + +function createTimeseriesDatasets(data) { + return Object.entries(data.runs).map(([name, runData], index) => ({ + label: name, + data: runData.points.map(p => ({ + seriesName: name, + x: p.date, + y: p.value, + gitHash: p.git_hash, + gitRepo: p.github_repo, + stddev: p.stddev + })), + borderColor: colorPalette[index % colorPalette.length], + backgroundColor: colorPalette[index % colorPalette.length], + borderWidth: 1, + pointRadius: 3, + pointStyle: 'circle', + pointHoverRadius: 5 + })); +} + +function updateCharts() { + const filterRunData = (chart) => ({ + ...chart, + runs: Object.fromEntries( + Object.entries(chart.runs).filter(([_, data]) => + activeRuns.has(data.runName) + ) + ) + }); + + const filteredTimeseriesData = timeseriesData.map(filterRunData); + const filteredLayerComparisonsData = layerComparisonsData.map(filterRunData); + + const filteredBarChartsData = barChartsData.map(chart => ({ + ...chart, + labels: chart.labels.filter(label => activeRuns.has(label)), + datasets: chart.datasets.map(dataset => ({ + ...dataset, + data: dataset.data.filter((_, i) => activeRuns.has(chart.labels[i])) + })) + })); + + drawCharts(filteredTimeseriesData, filteredBarChartsData, filteredLayerComparisonsData); +} + +function drawCharts(filteredTimeseriesData, filteredBarChartsData, filteredLayerComparisonsData) { + // Clear existing charts + document.querySelectorAll('.charts').forEach(container => container.innerHTML = ''); + chartInstances.forEach(chart => chart.destroy()); + chartInstances.clear(); + + // Create timeseries charts + filteredTimeseriesData.forEach((data, index) => { + const containerId = `timeseries-${index}`; + const container = createChartContainer(data, containerId, 'benchmark'); + document.querySelector('.timeseries .charts').appendChild(container); + createChart(data, containerId, 'time'); + }); + + // Create layer comparison charts + filteredLayerComparisonsData.forEach((data, index) => { + const containerId = `layer-comparison-${index}`; + const container = createChartContainer(data, containerId, 'group'); + document.querySelector('.layer-comparisons .charts').appendChild(container); + createChart(data, containerId, 'time'); + }); + + // Create bar charts + filteredBarChartsData.forEach((data, index) => { + const containerId = `barchart-${index}`; + const container = createChartContainer(data, containerId, 'group'); + document.querySelector('.bar-charts .charts').appendChild(container); + createChart(data, containerId, 'bar'); + }); + + // Apply current filters + filterCharts(); +} + +function createChartContainer(data, canvasId, type) { + const container = document.createElement('div'); + container.className = 'chart-container'; + container.setAttribute('data-label', data.label); + container.setAttribute('data-suite', data.suite); + + // Check if this benchmark is marked as unstable + const metadata = metadataForLabel(data.label, type); + if (metadata && metadata.unstable) { + container.setAttribute('data-unstable', 'true'); + + // Add unstable warning + const unstableWarning = document.createElement('div'); + unstableWarning.className = 'benchmark-unstable'; + unstableWarning.textContent = metadata.unstable; + unstableWarning.style.display = isUnstableEnabled() ? 'block' : 'none'; + container.appendChild(unstableWarning); + } + + // Add description if present in metadata (moved outside of details) + if (metadata && metadata.description) { + const descElement = document.createElement('div'); + descElement.className = 'benchmark-description'; + descElement.textContent = metadata.description; + container.appendChild(descElement); + } + + // Add notes if present + if (metadata && metadata.notes) { + const noteElement = document.createElement('div'); + noteElement.className = 'benchmark-note'; + noteElement.textContent = metadata.notes; + noteElement.style.display = isNotesEnabled() ? 'block' : 'none'; + container.appendChild(noteElement); + } + + // Add tags if present + if (metadata && metadata.tags) { + container.setAttribute('data-tags', metadata.tags.join(',')); + + // Add tags display + const tagsContainer = document.createElement('div'); + tagsContainer.className = 'benchmark-tags'; + + metadata.tags.forEach(tag => { + const tagElement = document.createElement('span'); + tagElement.className = 'tag'; + tagElement.textContent = tag; + tagElement.setAttribute('data-tag', tag); + + // Add tooltip with tag description + if (benchmarkTags[tag]) { + tagElement.setAttribute('title', benchmarkTags[tag].description); + } + + tagsContainer.appendChild(tagElement); + }); + + container.appendChild(tagsContainer); + } + + const canvas = document.createElement('canvas'); + canvas.id = canvasId; + container.appendChild(canvas); + + // Create details section for extra info + const details = document.createElement('details'); + const summary = document.createElement('summary'); + summary.textContent = "Details"; + + // Add subtle download button to the summary + const downloadButton = document.createElement('button'); + downloadButton.className = 'download-button'; + downloadButton.textContent = 'Download'; + downloadButton.onclick = (event) => { + event.stopPropagation(); // Prevent details toggle + downloadChart(canvasId, data.label); + }; + summary.appendChild(downloadButton); + details.appendChild(summary); + + // Create and append extra info + const extraInfo = document.createElement('div'); + extraInfo.className = 'extra-info'; + latestRunsLookup = createLatestRunsLookup(benchmarkRuns); + extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data, 'benchmark'); + details.appendChild(extraInfo); + + container.appendChild(details); + + return container; +} + +function metadataForLabel(label, type) { + for (const [key, metadata] of Object.entries(benchmarkMetadata)) { + if (metadata.type === type && label.startsWith(key)) { + return metadata; + } + } + + return null; +} + +// Pre-compute a lookup for the latest run per label +function createLatestRunsLookup(benchmarkRuns) { + const latestRunsMap = new Map(); + + benchmarkRuns.forEach(run => { + // Yes, we need to convert the date every time. I checked. + const runDate = new Date(run.date); + run.results.forEach(result => { + const label = result.label; + if (!latestRunsMap.has(label) || runDate > new Date(latestRunsMap.get(label).date)) { + latestRunsMap.set(label, { + run, + result + }); + } + }); + }); + + return latestRunsMap; +} + +function extractLabels(data) { + // For layer comparison charts + if (data.benchmarkLabels) { + return data.benchmarkLabels; + } + + // For bar charts + if (data.datasets) { + return data.datasets.map(dataset => dataset.label); + } + + // For time series charts + return [data.label]; +} + +function generateExtraInfo(latestRunsLookup, data) { + const labels = extractLabels(data); + + return labels.map(label => { + const metadata = metadataForLabel(label, 'benchmark'); + const latestRun = latestRunsLookup.get(label); + + let html = '
'; + + if (metadata && latestRun) { + html += `${label}: ${formatCommand(latestRun.result)}
`; + + if (metadata.description) { + html += `Description: ${metadata.description}`; + } + + if (metadata.notes) { + html += `
Notes: ${metadata.notes}`; + } + + if (metadata.unstable) { + html += `
⚠️ Unstable: ${metadata.unstable}`; + } + } else { + html += `${label}: No data available`; + } + + html += '
'; + return html; + }).join(''); +} + +function formatCommand(run) { + const envVars = Object.entries(run.env || {}).map(([key, value]) => `${key}=${value}`).join(' '); + let command = run.command ? [...run.command] : []; + + return `${envVars} ${command.join(' ')}`.trim(); +} + +function downloadChart(canvasId, label) { + const chart = chartInstances.get(canvasId); + if (chart) { + const link = document.createElement('a'); + link.href = chart.toBase64Image('image/png', 1) + link.download = `${label}.png`; + link.click(); + } +} + +// URL and filtering functions +function getQueryParam(param) { + const urlParams = new URLSearchParams(window.location.search); + return urlParams.get(param); +} + +function updateURL() { + const url = new URL(window.location); + const regex = document.getElementById('bench-filter').value; + const activeSuites = getActiveSuites(); + const activeRunsList = Array.from(activeRuns); + const activeTagsList = Array.from(activeTags); + + if (regex) { + url.searchParams.set('regex', regex); + } else { + url.searchParams.delete('regex'); + } + + if (activeSuites.length > 0 && activeSuites.length != suiteNames.size) { + url.searchParams.set('suites', activeSuites.join(',')); + } else { + url.searchParams.delete('suites'); + } + + // Add tags to URL + if (activeTagsList.length > 0) { + url.searchParams.set('tags', activeTagsList.join(',')); + } else { + url.searchParams.delete('tags'); + } + + // Handle the runs parameter + if (activeRunsList.length > 0) { + // Check if the active runs are the same as default runs + const defaultRuns = new Set(defaultCompareNames || []); + const isDefaultRuns = activeRunsList.length === defaultRuns.size && + activeRunsList.every(run => defaultRuns.has(run)); + + if (isDefaultRuns) { + // If it's just the default runs, omit the parameter entirely + url.searchParams.delete('runs'); + } else { + url.searchParams.set('runs', activeRunsList.join(',')); + } + } else { + url.searchParams.delete('runs'); + } + + // Add toggle states to URL + if (isNotesEnabled()) { + url.searchParams.delete('notes'); + } else { + url.searchParams.set('notes', 'false'); + } + + if (!isUnstableEnabled()) { + url.searchParams.delete('unstable'); + } else { + url.searchParams.set('unstable', 'true'); + } + + history.replaceState(null, '', url); +} + +function filterCharts() { + const regexInput = document.getElementById('bench-filter').value; + const regex = new RegExp(regexInput, 'i'); + const activeSuites = getActiveSuites(); + + document.querySelectorAll('.chart-container').forEach(container => { + const label = container.getAttribute('data-label'); + const suite = container.getAttribute('data-suite'); + const isUnstable = container.getAttribute('data-unstable') === 'true'; + const tags = container.getAttribute('data-tags') ? + container.getAttribute('data-tags').split(',') : []; + + // Check if benchmark has all active tags (if any are selected) + const hasAllActiveTags = activeTags.size === 0 || + Array.from(activeTags).every(tag => tags.includes(tag)); + + // Hide unstable benchmarks if showUnstable is false + const shouldShow = regex.test(label) && + activeSuites.includes(suite) && + (isUnstableEnabled() || !isUnstable) && + hasAllActiveTags; + + container.style.display = shouldShow ? '' : 'none'; + }); + + updateURL(); +} + +function getActiveSuites() { + return Array.from(document.querySelectorAll('.suite-checkbox:checked')) + .map(checkbox => checkbox.getAttribute('data-suite')); +} + +// Data processing +function processTimeseriesData(benchmarkRuns) { + const resultsByLabel = {}; + + benchmarkRuns.forEach(run => { + run.results.forEach(result => { + if (!resultsByLabel[result.label]) { + resultsByLabel[result.label] = { + label: result.label, + suite: result.suite, + unit: result.unit, + lower_is_better: result.lower_is_better, + runs: {} + }; + } + + addRunDataPoint(resultsByLabel[result.label], run, result, run.name); + }); + }); + + return Object.values(resultsByLabel); +} + +function processBarChartsData(benchmarkRuns) { + const groupedResults = {}; + + benchmarkRuns.forEach(run => { + run.results.forEach(result => { + if (!result.explicit_group) return; + + if (!groupedResults[result.explicit_group]) { + // Look up group metadata + const groupMetadata = metadataForLabel(result.explicit_group); + + groupedResults[result.explicit_group] = { + label: result.explicit_group, + suite: result.suite, + unit: result.unit, + lower_is_better: result.lower_is_better, + labels: [], + datasets: [], + // Add metadata if available + description: groupMetadata?.description || null, + notes: groupMetadata?.notes || null, + unstable: groupMetadata?.unstable || null + }; + } + + const group = groupedResults[result.explicit_group]; + + if (!group.labels.includes(run.name)) { + group.labels.push(run.name); + } + + let dataset = group.datasets.find(d => d.label === result.label); + if (!dataset) { + const datasetIndex = group.datasets.length; + dataset = { + label: result.label, + data: new Array(group.labels.length).fill(null), + backgroundColor: colorPalette[datasetIndex % colorPalette.length], + borderColor: colorPalette[datasetIndex % colorPalette.length], + borderWidth: 1 + }; + group.datasets.push(dataset); + } + + const runIndex = group.labels.indexOf(run.name); + if (dataset.data[runIndex] == null) + dataset.data[runIndex] = result.value; + }); + }); + + return Object.values(groupedResults); +} + +function getLayerTags(metadata) { + const layerTags = new Set(); + if (metadata?.tags) { + metadata.tags.forEach(tag => { + if (tag.startsWith('SYCL') || tag.startsWith('UR') || tag === 'L0') { + layerTags.add(tag); + } + }); + } + return layerTags; +} + +function processLayerComparisonsData(benchmarkRuns) { + const groupedResults = {}; + + benchmarkRuns.forEach(run => { + run.results.forEach(result => { + if (!result.explicit_group) return; + + // Skip if no metadata available + const metadata = metadataForLabel(result.explicit_group, 'group'); + if (!metadata) return; + + // Get all benchmark labels in this group + const labelsInGroup = new Set( + benchmarkRuns.flatMap(r => + r.results + .filter(res => res.explicit_group === result.explicit_group) + .map(res => res.label) + ) + ); + + // Check if this group compares different layers + const uniqueLayers = new Set(); + labelsInGroup.forEach(label => { + const labelMetadata = metadataForLabel(label, 'benchmark'); + const layerTags = getLayerTags(labelMetadata); + layerTags.forEach(tag => uniqueLayers.add(tag)); + }); + + // Only process groups that compare different layers + if (uniqueLayers.size <= 1) return; + + if (!groupedResults[result.explicit_group]) { + groupedResults[result.explicit_group] = { + label: result.explicit_group, + suite: result.suite, + unit: result.unit, + lower_is_better: result.lower_is_better, + runs: {}, + benchmarkLabels: [], + description: metadata?.description || null, + notes: metadata?.notes || null, + unstable: metadata?.unstable || null + }; + } + + const group = groupedResults[result.explicit_group]; + const name = result.label + ' (' + run.name + ')'; + + // Add the benchmark label if it's not already in the array + if (!group.benchmarkLabels.includes(result.label)) { + group.benchmarkLabels.push(result.label); + } + + addRunDataPoint(group, run, result, name); + }); + }); + + return Object.values(groupedResults); +} + +function createRunDataStructure(run, result, label) { + return { + runName: run.name, + points: [{ + date: new Date(run.date), + value: result.value, + stddev: result.stddev, + git_hash: run.git_hash, + github_repo: run.github_repo, + label: label || result.label + }] + }; +} + +function addRunDataPoint(group, run, result, name = null) { + const runKey = name || result.label + ' (' + run.name + ')'; + + if (!group.runs[runKey]) { + group.runs[runKey] = { + runName: run.name, + points: [] + }; + } + + group.runs[runKey].points.push({ + date: new Date(run.date), + value: result.value, + stddev: result.stddev, + git_hash: run.git_hash, + github_repo: run.github_repo, + }); + + return group; +} + +// Setup functions +function setupRunSelector() { + runSelect = document.getElementById('run-select'); + selectedRunsDiv = document.getElementById('selected-runs'); + + allRunNames.forEach(name => { + const option = document.createElement('option'); + option.value = name; + option.textContent = name; + runSelect.appendChild(option); + }); + + updateSelectedRuns(false); +} + +function setupSuiteFilters() { + suiteFiltersContainer = document.getElementById('suite-filters'); + + benchmarkRuns.forEach(run => { + run.results.forEach(result => { + suiteNames.add(result.suite); + }); + }); + + suiteNames.forEach(suite => { + const label = document.createElement('label'); + const checkbox = document.createElement('input'); + checkbox.type = 'checkbox'; + checkbox.className = 'suite-checkbox'; + checkbox.dataset.suite = suite; + checkbox.checked = true; + label.appendChild(checkbox); + label.appendChild(document.createTextNode(' ' + suite)); + suiteFiltersContainer.appendChild(label); + suiteFiltersContainer.appendChild(document.createTextNode(' ')); + }); +} + +function isNotesEnabled() { + const notesToggle = document.getElementById('show-notes'); + return notesToggle.checked; +} + +function isUnstableEnabled() { + const unstableToggle = document.getElementById('show-unstable'); + return unstableToggle.checked; +} + +function setupToggles() { + const notesToggle = document.getElementById('show-notes'); + const unstableToggle = document.getElementById('show-unstable'); + + notesToggle.addEventListener('change', function() { + // Update all note elements visibility + document.querySelectorAll('.benchmark-note').forEach(note => { + note.style.display = isNotesEnabled() ? 'block' : 'none'; + }); + updateURL(); + }); + + unstableToggle.addEventListener('change', function() { + // Update all unstable warning elements visibility + document.querySelectorAll('.benchmark-unstable').forEach(warning => { + warning.style.display = isUnstableEnabled() ? 'block' : 'none'; + }); + filterCharts(); + }); + + // Initialize from URL params if present + const notesParam = getQueryParam('notes'); + const unstableParam = getQueryParam('unstable'); + + if (notesParam !== null) { + let showNotes = notesParam === 'true'; + notesToggle.checked = showNotes; + } + + if (unstableParam !== null) { + let showUnstable = unstableParam === 'true'; + unstableToggle.checked = showUnstable; + } +} + +function setupTagFilters() { + tagFiltersContainer = document.getElementById('tag-filters'); + + const allTags = []; + + if (benchmarkTags) { + for (const tag in benchmarkTags) { + if (!allTags.includes(tag)) { + allTags.push(tag); + } + } + } + + // Create tag filter elements + allTags.forEach(tag => { + const tagContainer = document.createElement('div'); + tagContainer.className = 'tag-filter'; + + const checkbox = document.createElement('input'); + checkbox.type = 'checkbox'; + checkbox.id = `tag-${tag}`; + checkbox.className = 'tag-checkbox'; + checkbox.dataset.tag = tag; + + const label = document.createElement('label'); + label.htmlFor = `tag-${tag}`; + label.textContent = tag; + + // Add info icon with tooltip if tag description exists + if (benchmarkTags[tag]) { + const infoIcon = document.createElement('span'); + infoIcon.className = 'tag-info'; + infoIcon.textContent = 'ⓘ'; + infoIcon.title = benchmarkTags[tag].description; + label.appendChild(infoIcon); + } + + checkbox.addEventListener('change', function() { + if (this.checked) { + activeTags.add(tag); + } else { + activeTags.delete(tag); + } + filterCharts(); + }); + + tagContainer.appendChild(checkbox); + tagContainer.appendChild(label); + tagFiltersContainer.appendChild(tagContainer); + }); +} + +function toggleAllTags(select) { + const checkboxes = document.querySelectorAll('.tag-checkbox'); + + checkboxes.forEach(checkbox => { + checkbox.checked = select; + const tag = checkbox.dataset.tag; + + if (select) { + activeTags.add(tag); + } else { + activeTags.delete(tag); + } + }); + + filterCharts(); +} + +function initializeCharts() { + // Process raw data + timeseriesData = processTimeseriesData(benchmarkRuns); + barChartsData = processBarChartsData(benchmarkRuns); + layerComparisonsData = processLayerComparisonsData(benchmarkRuns); + allRunNames = [...new Set(benchmarkRuns.map(run => run.name))]; + + // Set up active runs + const runsParam = getQueryParam('runs'); + if (runsParam) { + const runsFromUrl = runsParam.split(','); + + // Start with an empty set + activeRuns = new Set(); + + // Process each run from URL + runsFromUrl.forEach(run => { + if (run === 'default') { + // Special case: include all default runs + (defaultCompareNames || []).forEach(defaultRun => { + if (allRunNames.includes(defaultRun)) { + activeRuns.add(defaultRun); + } + }); + } else if (allRunNames.includes(run)) { + // Add the specific run if it exists + activeRuns.add(run); + } + }); + } else { + // No runs parameter, use defaults + activeRuns = new Set(defaultCompareNames || []); + } + + // Setup UI components + setupRunSelector(); + setupSuiteFilters(); + setupTagFilters(); + setupToggles(); + + // Apply URL parameters + const regexParam = getQueryParam('regex'); + const suitesParam = getQueryParam('suites'); + const tagsParam = getQueryParam('tags'); + + if (regexParam) { + document.getElementById('bench-filter').value = regexParam; + } + + if (suitesParam) { + const suites = suitesParam.split(','); + document.querySelectorAll('.suite-checkbox').forEach(checkbox => { + checkbox.checked = suites.includes(checkbox.getAttribute('data-suite')); + }); + } + + // Apply tag filters from URL + if (tagsParam) { + const tags = tagsParam.split(','); + tags.forEach(tag => { + const checkbox = document.querySelector(`.tag-checkbox[data-tag="${tag}"]`); + if (checkbox) { + checkbox.checked = true; + activeTags.add(tag); + } + }); + } + + // Setup event listeners + document.querySelectorAll('.suite-checkbox').forEach(checkbox => { + checkbox.addEventListener('change', filterCharts); + }); + document.getElementById('bench-filter').addEventListener('input', filterCharts); + + // Draw initial charts + updateCharts(); +} + +// Make functions available globally for onclick handlers +window.addSelectedRun = addSelectedRun; +window.removeRun = removeRun; +window.toggleAllTags = toggleAllTags; + +// Load data based on configuration +function loadData() { + const loadingIndicator = document.getElementById('loading-indicator'); + loadingIndicator.style.display = 'block'; // Show loading indicator + + if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') { + // Fetch data from remote URL + fetch(remoteDataUrl) + .then(response => response.json()) + .then(data => { + benchmarkRuns = data.runs || data; + benchmarkMetadata = data.metadata || benchmarkMetadata || {}; + benchmarkTags = data.tags || benchmarkTags || {}; + initializeCharts(); + }) + .catch(error => { + console.error('Error fetching remote data:', error); + loadingIndicator.textContent = 'Fetching remote data failed.'; + }) + .finally(() => { + loadingIndicator.style.display = 'none'; // Hide loading indicator + }); + } else { + // Use local data (benchmarkRuns and benchmarkMetadata should be defined in data.js) + initializeCharts(); + loadingIndicator.style.display = 'none'; // Hide loading indicator + } +} + +// Initialize when DOM is ready +document.addEventListener('DOMContentLoaded', () => { + loadData(); +}); diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css new file mode 100644 index 0000000000000..3e9c3bd22fc37 --- /dev/null +++ b/devops/scripts/benchmarks/html/styles.css @@ -0,0 +1,357 @@ +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + margin: 0; + padding: 16px; + background: #f8f9fa; +} +.container { + max-width: 1100px; + margin: 0 auto; +} +h1, h2 { + color: #212529; + text-align: center; + margin-bottom: 24px; + font-weight: 500; +} +.chart-container { + background: white; + border-radius: 8px; + padding: 24px; + margin-bottom: 24px; + box-shadow: 0 1px 3px rgba(0,0,0,0.1); +} +@media (max-width: 768px) { + body { + padding: 12px; + } + .chart-container { + padding: 16px; + border-radius: 6px; + } + h1 { + font-size: 24px; + margin-bottom: 16px; + } +} +.filter-container { + text-align: center; + margin-bottom: 24px; +} +.filter-container input { + padding: 8px; + font-size: 16px; + border: 1px solid #ccc; + border-radius: 4px; + width: 400px; + max-width: 100%; +} +.suite-filter-container { + text-align: center; + margin-bottom: 24px; + padding: 16px; + background: #e9ecef; + border-radius: 8px; +} +.suite-checkbox { + margin: 0 8px; +} +details { + margin-bottom: 24px; +} +summary { + display: flex; + justify-content: space-between; + align-items: center; + font-size: 16px; + font-weight: 500; + cursor: pointer; + padding: 12px 16px; + background: #dee2e6; + border-radius: 8px; + user-select: none; +} +summary:hover { + background: #ced4da; +} +summary::marker { + display: none; +} +summary::-webkit-details-marker { + display: none; +} +summary::after { + content: "▼"; + font-size: 12px; + margin-left: 8px; + transition: transform 0.3s; +} +details[open] summary::after { + transform: rotate(180deg); +} +.extra-info { + padding: 8px; + background: #f8f9fa; + border-radius: 8px; + margin-top: 8px; +} +.run-selector { + text-align: center; + margin-bottom: 24px; + padding: 16px; + background: #e9ecef; + border-radius: 8px; +} +.run-selector select { + width: 300px; + padding: 8px; + margin-right: 8px; +} +.run-selector button { + padding: 8px 16px; + background: #0068B5; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; +} +.run-selector button:hover { + background: #00C7FD; +} +.selected-runs { + margin-top: 12px; +} +.selected-run { + display: inline-block; + padding: 4px 8px; + margin: 4px; + background: #e2e6ea; + border-radius: 4px; +} +.selected-run button { + margin-left: 8px; + padding: 0 4px; + background: none; + border: none; + color: #dc3545; + cursor: pointer; +} +.download-button { + background: none; + border: none; + color: #0068B5; + cursor: pointer; + font-size: 16px; + padding: 4px; + margin-left: 8px; +} +.download-button:hover { + color: #00C7FD; +} +.loading-indicator { + text-align: center; + font-size: 18px; + color: #0068B5; + margin-bottom: 20px; +} +.extra-info-entry { + border: 1px solid #ddd; + padding: 10px; + margin-bottom: 10px; + background-color: #f9f9f9; + border-radius: 5px; +} +.extra-info-entry strong { + display: block; + margin-bottom: 5px; +} +.extra-info-entry em { + color: #555; +} +.display-options-container { + text-align: center; + margin-bottom: 24px; + padding: 16px; + background: #e9ecef; + border-radius: 8px; +} +.display-options-container label { + margin: 0 12px; + cursor: pointer; +} +.display-options-container input { + margin-right: 8px; +} +.benchmark-note { + background-color: #cfe2ff; + color: #084298; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #084298; + white-space: pre-line; +} +.benchmark-unstable { + background-color: #f8d7da; + color: #842029; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #842029; + white-space: pre-line; +} +.note-text { + color: #084298; +} +.unstable-warning { + color: #842029; + font-weight: bold; +} +.unstable-text { + color: #842029; +} +.options-container { + margin-bottom: 24px; + background: #e9ecef; + border-radius: 8px; + overflow: hidden; +} +.options-container summary { + padding: 12px 16px; + font-weight: 500; + cursor: pointer; + background: #dee2e6; + user-select: none; +} +.options-container summary:hover { + background: #ced4da; +} +.options-content { + padding: 16px; + display: flex; + flex-wrap: wrap; + gap: 24px; +} +.filter-section { + flex: 1; + min-width: 300px; +} +.filter-section h3 { + margin-top: 0; + margin-bottom: 12px; + font-size: 18px; + font-weight: 500; + text-align: left; + display: flex; + align-items: center; +} +#suite-filters { + display: flex; + flex-wrap: wrap; + max-height: 200px; + overflow-y: auto; + border: 1px solid #dee2e6; + border-radius: 4px; + padding: 8px; + background-color: #f8f9fa; +} +.display-options { + display: flex; + flex-direction: column; + gap: 8px; +} +.display-options label { + display: flex; + align-items: center; + cursor: pointer; +} +.display-options input { + margin-right: 8px; +} +.benchmark-description { + background-color: #f2f2f2; + color: #333; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #6c757d; + white-space: pre-line; + font-style: italic; +} +/* Tag styles */ +.benchmark-tags { + display: flex; + flex-wrap: wrap; + gap: 4px; + margin-bottom: 10px; +} + +.tag { + display: inline-block; + background-color: #e2e6ea; + color: #495057; + padding: 2px 8px; + border-radius: 12px; + font-size: 12px; + cursor: help; +} + +.tag-filter { + display: inline-flex; + align-items: center; + margin: 4px; +} + +.tag-filter label { + margin-left: 4px; + cursor: pointer; + display: flex; + align-items: center; +} + +.tag-info { + color: #0068B5; + margin-left: 4px; + cursor: help; + font-size: 12px; +} + +#tag-filters { + display: flex; + flex-wrap: wrap; + max-height: 200px; + overflow-y: auto; + border: 1px solid #dee2e6; + border-radius: 4px; + padding: 8px; + background-color: #f8f9fa; +} + +.tag-action-button { + padding: 2px 8px; + background: #e2e6ea; + border: none; + border-radius: 4px; + cursor: pointer; + font-size: 12px; + margin-left: 8px; + vertical-align: middle; +} + +.tag-action-button:hover { + background: #ced4da; +} + +.remove-tag { + background: none; + border: none; + color: white; + margin-left: 4px; + cursor: pointer; + font-size: 16px; + padding: 0 4px; +} + +.remove-tag:hover { + color: #f8d7da; +} diff --git a/unified-runtime/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py similarity index 74% rename from unified-runtime/scripts/benchmarks/main.py rename to devops/scripts/benchmarks/main.py index 4ad90b39b9001..14e5fe1a04624 100755 --- a/unified-runtime/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,6 +17,7 @@ from history import BenchmarkHistory from utils.utils import prepare_workdir from utils.compute_runtime import * +from presets import enabled_suites, presets import argparse import re @@ -27,23 +28,27 @@ def run_iterations( - benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]] + benchmark: Benchmark, + env_vars, + iters: int, + results: dict[str, list[Result]], + failures: dict[str, str], ): for iter in range(iters): - print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True) + print(f"running {benchmark.name()}, iteration {iter}... ", flush=True) bench_results = benchmark.run(env_vars) if bench_results is None: - print(f"did not finish (OK for sycl-bench).") + failures[benchmark.name()] = "benchmark produced no results!" break for bench_result in bench_results: - # TODO: report failures in markdown/html ? if not bench_result.passed: - print(f"complete ({bench_result.label}: verification FAILED)") + failures[bench_result.label] = "verification failed" + print(f"complete ({bench_result.label}: verification failed).") continue print( - f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})." + f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})." ) bench_result.name = bench_result.label @@ -132,6 +137,18 @@ def process_results( return valid_results, processed +def collect_metadata(suites): + metadata = {} + + for s in suites: + metadata.update(s.additionalMetadata()) + suite_benchmarks = s.benchmarks() + for benchmark in suite_benchmarks: + metadata[benchmark.name()] = benchmark.get_metadata() + + return metadata + + def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) @@ -142,22 +159,29 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): options.extra_ld_libraries.extend(cr.ld_libraries()) options.extra_env_vars.update(cr.env_vars()) - suites = ( - [ - ComputeBench(directory), - VelocityBench(directory), - SyclBench(directory), - LlamaCppBench(directory), - UMFSuite(directory), - # TestSuite() - ] - if not options.dry_run - else [] - ) + suites = [ + ComputeBench(directory), + VelocityBench(directory), + SyclBench(directory), + LlamaCppBench(directory), + UMFSuite(directory), + TestSuite(), + ] + + # Collect metadata from all benchmarks without setting them up + metadata = collect_metadata(suites) + + # If dry run, we're done + if options.dry_run: + suites = [] benchmarks = [] + failures = {} for s in suites: + if s.name() not in enabled_suites(options.preset): + continue + suite_benchmarks = s.benchmarks() if filter: suite_benchmarks = [ @@ -170,25 +194,26 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): print(f"Setting up {type(s).__name__}") try: s.setup() - except: + except Exception as e: + failures[s.name()] = f"Suite setup failure: {e}" print(f"{type(s).__name__} setup failed. Benchmarks won't be added.") else: print(f"{type(s).__name__} setup complete.") benchmarks += suite_benchmarks - for b in benchmarks: - print(b.name()) - for benchmark in benchmarks: try: - print(f"Setting up {benchmark.name()}... ") + if options.verbose: + print(f"Setting up {benchmark.name()}... ") benchmark.setup() - print(f"{benchmark.name()} setup complete.") + if options.verbose: + print(f"{benchmark.name()} setup complete.") except Exception as e: if options.exit_on_failure: raise e else: + failures[benchmark.name()] = f"Benchmark setup failure: {e}" print(f"failed: {e}") results = [] @@ -199,7 +224,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): processed: list[Result] = [] for _ in range(options.iterations_stddev): run_iterations( - benchmark, merged_env_vars, options.iterations, intermediate_results + benchmark, + merged_env_vars, + options.iterations, + intermediate_results, + failures, ) valid, processed = process_results( intermediate_results, benchmark.stddev_threshold() @@ -211,12 +240,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.exit_on_failure: raise e else: + failures[benchmark.name()] = f"Benchmark run failure: {e}" print(f"failed: {e}") for benchmark in benchmarks: - print(f"tearing down {benchmark.name()}... ", end="", flush=True) + # this never has any useful information anyway, so hide it behind verbose + if options.verbose: + print(f"tearing down {benchmark.name()}... ", flush=True) benchmark.teardown() - print("complete.") + if options.verbose: + print("{benchmark.name()} teardown complete.") this_name = options.current_run_name chart_data = {} @@ -224,7 +257,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if not options.dry_run: chart_data = {this_name: results} - history = BenchmarkHistory(directory) + results_dir = directory + if options.custom_results_dir: + results_dir = Path(options.custom_results_dir) + history = BenchmarkHistory(results_dir) # limit how many files we load. # should this be configurable? history.load(1000) @@ -241,14 +277,18 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.output_markdown: markdown_content = generate_markdown( - this_name, chart_data, options.output_markdown + this_name, chart_data, failures, options.output_markdown ) - with open("benchmark_results.md", "w") as file: + md_path = options.output_directory + if options.output_directory is None: + md_path = os.getcwd() + + with open(os.path.join(md_path, "benchmark_results.md"), "w") as file: file.write(markdown_content) print( - f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md" + f"Markdown with benchmark results has been written to {md_path}/benchmark_results.md" ) saved_name = save_name if save_name is not None else this_name @@ -262,14 +302,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): compare_names.append(saved_name) if options.output_html: - html_content = generate_html(history.runs, "intel/llvm", compare_names) - - with open("benchmark_results.html", "w") as file: - file.write(html_content) - - print( - f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html" - ) + html_path = options.output_directory + if options.output_directory is None: + html_path = os.path.join(os.path.dirname(__file__), "html") + generate_html(history.runs, compare_names, html_path, metadata) def validate_and_parse_env_args(env_args): @@ -297,7 +333,7 @@ def validate_and_parse_env_args(env_args): parser.add_argument( "--adapter", type=str, - help="Options to build the Unified Runtime as part of the benchmark", + help="Unified Runtime adapter to use.", default="level_zero", ) parser.add_argument( @@ -305,6 +341,11 @@ def validate_and_parse_env_args(env_args): help="Do not rebuild the benchmarks from scratch.", action="store_true", ) + parser.add_argument( + "--redownload", + help="Always download benchmark data dependencies, even if they already exist.", + action="store_true", + ) parser.add_argument( "--env", type=str, @@ -347,12 +388,6 @@ def validate_and_parse_env_args(env_args): help="Regex pattern to filter benchmarks by name.", default=None, ) - parser.add_argument( - "--epsilon", - type=float, - help="Threshold to consider change of performance significant", - default=options.epsilon, - ) parser.add_argument( "--verbose", help="Print output of all the commands.", action="store_true" ) @@ -379,7 +414,17 @@ def validate_and_parse_env_args(env_args): help="Specify whether markdown output should fit the content size limit for request validation", ) parser.add_argument( - "--output-html", help="Create HTML output", action="store_true", default=False + "--output-html", + help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.", + nargs="?", + const=options.output_html, + choices=["local", "remote"], + ) + parser.add_argument( + "--output-dir", + type=str, + help="Location for output files, if --output-html or --output_markdown was specified.", + default=None, ) parser.add_argument( "--dry-run", @@ -423,6 +468,25 @@ def validate_and_parse_env_args(env_args): help="Directory for cublas library", default=None, ) + parser.add_argument( + "--preset", + type=str, + choices=[p for p in presets.keys()], + help="Benchmark preset to run.", + default=options.preset, + ) + parser.add_argument( + "--results-dir", + type=str, + help="Specify a custom results directory", + default=options.custom_results_dir, + ) + parser.add_argument( + "--build-jobs", + type=int, + help="Number of build jobs to run simultaneously", + default=options.build_jobs, + ) args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -430,10 +494,10 @@ def validate_and_parse_env_args(env_args): options.workdir = args.benchmark_directory options.verbose = args.verbose options.rebuild = not args.no_rebuild + options.redownload = args.redownload options.sycl = args.sycl options.iterations = args.iterations options.timeout = args.timeout - options.epsilon = args.epsilon options.ur = args.ur options.ur_adapter = args.adapter options.exit_on_failure = args.exit_on_failure @@ -448,12 +512,19 @@ def validate_and_parse_env_args(env_args): options.current_run_name = args.relative_perf options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory + options.preset = args.preset + options.custom_results_dir = args.results_dir + options.build_jobs = args.build_jobs if args.build_igc and args.compute_runtime is None: parser.error("--build-igc requires --compute-runtime to be set") if args.compute_runtime is not None: options.build_compute_runtime = True options.compute_runtime_tag = args.compute_runtime + if args.output_dir is not None: + if not os.path.isdir(args.output_dir): + parser.error("Specified --output-dir is not a valid path") + options.output_directory = os.path.abspath(args.output_dir) benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/unified-runtime/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py similarity index 76% rename from unified-runtime/scripts/benchmarks/options.py rename to devops/scripts/benchmarks/options.py index 2e92675264544..c852e50c71372 100644 --- a/unified-runtime/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -1,5 +1,8 @@ from dataclasses import dataclass, field from enum import Enum +import multiprocessing + +from presets import presets class Compare(Enum): @@ -21,6 +24,7 @@ class Options: ur_adapter: str = None umf: str = None rebuild: bool = True + redownload: bool = False benchmark_cwd: str = "INVALID" timeout: float = 600 iterations: int = 3 @@ -28,18 +32,20 @@ class Options: compare: Compare = Compare.LATEST compare_max: int = 10 # average/median over how many results output_markdown: MarkdownSize = MarkdownSize.SHORT - output_html: bool = False + output_html: str = "local" + output_directory: str = None dry_run: bool = False - # these two should probably be merged into one setting stddev_threshold: float = 0.02 - epsilon: float = 0.02 iterations_stddev: int = 5 build_compute_runtime: bool = False extra_ld_libraries: list[str] = field(default_factory=list) extra_env_vars: dict = field(default_factory=dict) - compute_runtime_tag: str = "25.05.32567.12" + compute_runtime_tag: str = "25.05.32567.18" build_igc: bool = False current_run_name: str = "This PR" + preset: str = "Full" + custom_results_dir = None + build_jobs: int = multiprocessing.cpu_count() options = Options() diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py new file mode 100644 index 0000000000000..319e796a3831d --- /dev/null +++ b/devops/scripts/benchmarks/output_html.py @@ -0,0 +1,58 @@ +# Copyright (C) 2024-2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import json +import os +from options import options +from utils.result import BenchmarkMetadata, BenchmarkOutput +from benches.base import benchmark_tags, benchmark_tags_dict + + +def generate_html( + benchmark_runs: list, + compare_names: list[str], + html_path: str, + metadata: dict[str, BenchmarkMetadata], +): + benchmark_runs.sort(key=lambda run: run.date, reverse=True) + + # Create the comprehensive output object + output = BenchmarkOutput( + runs=benchmark_runs, + metadata=metadata, + tags=benchmark_tags_dict, + default_compare_names=compare_names, + ) + + if options.output_html == "local": + data_path = os.path.join(html_path, "data.js") + with open(data_path, "w") as f: + # For local format, we need to write JavaScript variable assignments + f.write("benchmarkRuns = ") + json.dump(json.loads(output.to_json())["runs"], f, indent=2) + f.write(";\n\n") + + f.write("benchmarkMetadata = ") + json.dump(json.loads(output.to_json())["metadata"], f, indent=2) + f.write(";\n\n") + + f.write("benchmarkTags = ") + json.dump(json.loads(output.to_json())["tags"], f, indent=2) + f.write(";\n\n") + + f.write("defaultCompareNames = ") + json.dump(output.default_compare_names, f, indent=2) + f.write(";\n") + + print(f"See {os.getcwd()}/html/index.html for the results.") + else: + # For remote format, we write a single JSON file + data_path = os.path.join(html_path, "data.json") + with open(data_path, "w") as f: + json.dump(json.loads(output.to_json()), f, indent=2) + + print( + f"Upload {data_path} to a location set in config.js remoteDataUrl argument." + ) diff --git a/unified-runtime/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py similarity index 92% rename from unified-runtime/scripts/benchmarks/output_markdown.py rename to devops/scripts/benchmarks/output_markdown.py index dd6711cec6365..3295968603d0c 100644 --- a/unified-runtime/scripts/benchmarks/output_markdown.py +++ b/devops/scripts/benchmarks/output_markdown.py @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import collections -from benches.result import Result +from utils.result import Result from options import options, MarkdownSize import ast @@ -79,7 +79,7 @@ def get_improved_regressed_summary(is_improved: bool, rows_count: int): "\n
\n" "\n" f"{title} {rows_count} " - f"(threshold {options.epsilon*100:.2f}%)\n" + f"(threshold {options.stddev_threshold*100:.2f}%)\n" "\n\n" ) @@ -138,17 +138,6 @@ def generate_markdown_details( env_dict = res.env command = res.command - # If data is collected from already saved results, - # the content is parsed as strings - if isinstance(res.env, str): - # Since the scripts would be used solely on data prepared - # by our scripts, this should be safe - # However, maybe needs an additional blessing - # https://docs.python.org/3/library/ast.html#ast.literal_eval - env_dict = ast.literal_eval(res.env) - if isinstance(res.command, str): - command = ast.literal_eval(res.command) - section = ( "\n
\n" f"{res.label}\n\n" @@ -179,7 +168,7 @@ def generate_markdown_details( return "\nBenchmark details contain too many chars to display\n" -def generate_summary_table_and_chart( +def generate_summary_table( chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize ): summary_table = get_chart_markdown_header( @@ -276,7 +265,7 @@ def generate_summary_table_and_chart( delta = oln.diff - 1 oln.row += f" {delta*100:.2f}%" - if abs(delta) > options.epsilon: + if abs(delta) > options.stddev_threshold: if delta > 0: improved_rows.append(oln.row + " | \n") else: @@ -374,10 +363,27 @@ def generate_summary_table_and_chart( return "\n# Summary\n" "Benchmark output is too large to display\n\n" +def generate_failures_section(failures: dict[str, str]) -> str: + if not failures: + return "" + + section = "\n# Failures\n" + section += "| Name | Failure |\n" + section += "|---|---|\n" + + for name, failure in failures.items(): + section += f"| {name} | {failure} |\n" + + return section + + def generate_markdown( - name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize + name: str, + chart_data: dict[str, list[Result]], + failures: dict[str, str], + markdown_size: MarkdownSize, ): - (summary_line, summary_table) = generate_summary_table_and_chart( + (summary_line, summary_table) = generate_summary_table( chart_data, name, markdown_size ) @@ -396,4 +402,6 @@ def generate_markdown( ) generated_markdown += "\n# Details\n" f"{markdown_details}\n" - return generated_markdown + failures_section = generate_failures_section(failures) + + return failures_section + generated_markdown diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py new file mode 100644 index 0000000000000..3f191766deb8c --- /dev/null +++ b/devops/scripts/benchmarks/presets.py @@ -0,0 +1,38 @@ +# Copyright (C) 2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +presets: dict[str, list[str]] = { + "Full": [ + "Compute Benchmarks", + "llama.cpp bench", + "SYCL-Bench", + "Velocity Bench", + "UMF", + ], + "SYCL": [ + "Compute Benchmarks", + "llama.cpp bench", + "SYCL-Bench", + "Velocity Bench", + ], + "Minimal": [ + "Compute Benchmarks", + ], + "Normal": [ + "Compute Benchmarks", + "llama.cpp bench", + "Velocity Bench", + ], + "Test": [ + "Test Suite", + ], +} + + +def enabled_suites(preset: str) -> list[str]: + try: + return presets[preset] + except KeyError: + raise ValueError(f"Preset '{preset}' not found.") diff --git a/unified-runtime/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt similarity index 85% rename from unified-runtime/scripts/benchmarks/requirements.txt rename to devops/scripts/benchmarks/requirements.txt index 99ba0caab55c2..9f0381ceef6c2 100644 --- a/unified-runtime/scripts/benchmarks/requirements.txt +++ b/devops/scripts/benchmarks/requirements.txt @@ -2,3 +2,4 @@ matplotlib==3.9.2 mpld3==0.5.10 dataclasses-json==0.6.7 PyYAML==6.0.1 +Mako==1.3.9 diff --git a/unified-runtime/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py similarity index 94% rename from unified-runtime/scripts/benchmarks/utils/compute_runtime.py rename to devops/scripts/benchmarks/utils/compute_runtime.py index 74d8ff4eb5345..e617168f37a76 100644 --- a/unified-runtime/scripts/benchmarks/utils/compute_runtime.py +++ b/devops/scripts/benchmarks/utils/compute_runtime.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -62,7 +62,7 @@ def build_gmmlib(self, repo, commit): f"-DCMAKE_BUILD_TYPE=Release", ] run(configure_command) - run(f"cmake --build {self.gmmlib_build} -j") + run(f"cmake --build {self.gmmlib_build} -j {options.build_jobs}") run(f"cmake --install {self.gmmlib_build}") return self.gmmlib_install @@ -87,7 +87,7 @@ def build_level_zero(self, repo, commit): f"-DCMAKE_BUILD_TYPE=Release", ] run(configure_command) - run(f"cmake --build {self.level_zero_build} -j") + run(f"cmake --build {self.level_zero_build} -j {options.build_jobs}") run(f"cmake --install {self.level_zero_build}") return self.level_zero_install @@ -142,8 +142,11 @@ def build_igc(self, repo, commit): ] run(configure_command) - # set timeout to 30min. IGC takes A LONG time to build if building from scratch. - run(f"cmake --build {self.igc_build} -j", timeout=600 * 3) + # set timeout to 2h. IGC takes A LONG time to build if building from scratch. + run( + f"cmake --build {self.igc_build} -j {options.build_jobs}", + timeout=60 * 60 * 2, + ) # cmake --install doesn't work... run("make install", cwd=self.igc_build) return self.igc_install @@ -214,7 +217,7 @@ def build_compute_runtime(self): configure_command.append(f"-DIGC_DIR={self.igc}") run(configure_command) - run(f"cmake --build {self.compute_runtime_build} -j") + run(f"cmake --build {self.compute_runtime_build} -j {options.build_jobs}") return self.compute_runtime_build diff --git a/unified-runtime/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py similarity index 78% rename from unified-runtime/scripts/benchmarks/benches/oneapi.py rename to devops/scripts/benchmarks/utils/oneapi.py index 0547f6646e39e..fc27b9a8b2d3e 100644 --- a/unified-runtime/scripts/benchmarks/benches/oneapi.py +++ b/devops/scripts/benchmarks/utils/oneapi.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,29 +7,33 @@ from utils.utils import download, run from options import options import os +import hashlib class OneAPI: - # random unique number for benchmark oneAPI installation - ONEAPI_BENCHMARK_INSTANCE_ID = 987654 - def __init__(self): self.oneapi_dir = os.path.join(options.workdir, "oneapi") Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True) - # delete if some option is set? + self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir) # can we just hardcode these links? self.install_package( "dnnl", "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh", + "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b", ) self.install_package( "mkl", "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh", + "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002", ) return - def install_package(self, name, url): + def generate_unique_oneapi_id(self, path): + hash_object = hashlib.md5(path.encode()) + return hash_object.hexdigest() + + def install_package(self, name, url, checksum): package_path = os.path.join(self.oneapi_dir, name) if Path(package_path).exists(): print( @@ -37,11 +41,13 @@ def install_package(self, name, url): ) return - package = download(self.oneapi_dir, url, f"package_{name}.sh") + package = download( + self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum + ) try: print(f"installing {name}") run( - f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}" + f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}" ) except: print("oneAPI installation likely exists already") diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py new file mode 100644 index 0000000000000..b9ebfdcb60952 --- /dev/null +++ b/devops/scripts/benchmarks/utils/result.py @@ -0,0 +1,70 @@ +# Copyright (C) 2024-2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from dataclasses import dataclass, field +from typing import Optional, Dict, List, Any +from dataclasses_json import config, dataclass_json +from datetime import datetime + + +@dataclass_json +@dataclass +class Result: + label: str + value: float + command: list[str] + env: dict[str, str] + stdout: str + passed: bool = True + unit: str = "" + explicit_group: str = "" + # stddev can be optionally set by the benchmark, + # if not set, it will be calculated automatically. + stddev: float = 0.0 + # values below should not be set by the benchmark + name: str = "" + lower_is_better: bool = True + suite: str = "Unknown" + git_url: str = "" + git_hash: str = "" + +@dataclass_json +@dataclass +class BenchmarkRun: + results: list[Result] + name: str = "This PR" + hostname: str = "Unknown" + git_hash: str = "" + github_repo: str = None + date: datetime = field( + default=None, + metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat), + ) + + +@dataclass_json +@dataclass +class BenchmarkTag: + name: str + description: str = "" + + +@dataclass_json +@dataclass +class BenchmarkMetadata: + type: str = "benchmark" # or 'group' + description: Optional[str] = None + notes: Optional[str] = None + unstable: Optional[str] = None + tags: list[str] = field(default_factory=list) # Changed to list of tag names + + +@dataclass_json +@dataclass +class BenchmarkOutput: + runs: list[BenchmarkRun] + metadata: Dict[str, BenchmarkMetadata] + tags: Dict[str, BenchmarkTag] + default_compare_names: List[str] = field(default_factory=list) diff --git a/unified-runtime/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py similarity index 81% rename from unified-runtime/scripts/benchmarks/utils/utils.py rename to devops/scripts/benchmarks/utils/utils.py index 3a516e8d724f7..54f2ef7fb9c1f 100644 --- a/unified-runtime/scripts/benchmarks/utils/utils.py +++ b/devops/scripts/benchmarks/utils/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,6 +12,7 @@ import urllib # nosec B404 from options import options from pathlib import Path +import hashlib def run( @@ -45,6 +46,12 @@ def run( env.update(env_vars) + if options.verbose: + command_str = " ".join(command) + env_str = " ".join(f"{key}={value}" for key, value in env_vars.items()) + full_command_str = f"{env_str} {command_str}".strip() + print(f"Running: {full_command_str}") + result = subprocess.run( command, cwd=cwd, @@ -107,7 +114,7 @@ def prepare_workdir(dir, version): shutil.rmtree(dir) else: raise Exception( - f"The directory {dir} exists but is a benchmark work directory." + f"The directory {dir} exists but is not a benchmark work directory." ) os.makedirs(dir) @@ -128,11 +135,26 @@ def create_build_path(directory, name): return build_path -def download(dir, url, file, untar=False, unzip=False): +def calculate_checksum(file_path): + sha_hash = hashlib.sha384() + with open(file_path, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha_hash.update(byte_block) + return sha_hash.hexdigest() + + +def download(dir, url, file, untar=False, unzip=False, checksum=""): data_file = os.path.join(dir, file) if not Path(data_file).exists(): print(f"{data_file} does not exist, downloading") urllib.request.urlretrieve(url, data_file) + calculated_checksum = calculate_checksum(data_file) + if calculated_checksum != checksum: + print( + f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue." + ) + exit(1) + if untar: file = tarfile.open(data_file) file.extractall(dir) diff --git a/unified-runtime/scripts/benchmarks/workflow.png b/devops/scripts/benchmarks/workflow.png similarity index 100% rename from unified-runtime/scripts/benchmarks/workflow.png rename to devops/scripts/benchmarks/workflow.png diff --git a/unified-runtime/.github/scripts/get_system_info.sh b/devops/scripts/get_system_info.sh similarity index 100% rename from unified-runtime/.github/scripts/get_system_info.sh rename to devops/scripts/get_system_info.sh diff --git a/unified-runtime/scripts/benchmarks/benches/compute.py b/unified-runtime/scripts/benchmarks/benches/compute.py deleted file mode 100644 index 4658a3414e16a..0000000000000 --- a/unified-runtime/scripts/benchmarks/benches/compute.py +++ /dev/null @@ -1,473 +0,0 @@ -# Copyright (C) 2024-2025 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -import os -import csv -import io -from utils.utils import run, git_clone, create_build_path -from .base import Benchmark, Suite -from .result import Result -from options import options -from enum import Enum - -class ComputeBench(Suite): - def __init__(self, directory): - self.directory = directory - - def name(self) -> str: - return "Compute Benchmarks" - - def setup(self): - if options.sycl is None: - return - - repo_path = git_clone( - self.directory, - "compute-benchmarks-repo", - "https://github.com/intel/compute-benchmarks.git", - "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba", - ) - build_path = create_build_path(self.directory, "compute-benchmarks-build") - - configure_command = [ - "cmake", - f"-B {build_path}", - f"-S {repo_path}", - f"-DCMAKE_BUILD_TYPE=Release", - f"-DBUILD_SYCL=ON", - f"-DSYCL_COMPILER_ROOT={options.sycl}", - f"-DALLOW_WARNINGS=ON", - ] - - if options.ur is not None: - configure_command += [ - f"-DBUILD_UR=ON", - f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime", - ] - - print(f"{self.__class__.__name__}: Run {configure_command}") - run(configure_command, add_sycl=True) - print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j") - run(f"cmake --build {build_path} -j", add_sycl=True) - - self.built = True - - def benchmarks(self) -> list[Benchmark]: - if options.sycl is None: - return [] - - if options.ur_adapter == "cuda": - return [] - - benches = [ - SubmitKernelL0(self, 0), - SubmitKernelL0(self, 1), - SubmitKernelSYCL(self, 0), - SubmitKernelSYCL(self, 1), - QueueInOrderMemcpy(self, 0, "Device", "Device", 1024), - QueueInOrderMemcpy(self, 0, "Host", "Device", 1024), - QueueMemcpy(self, "Device", "Device", 1024), - StreamMemory(self, "Triad", 10 * 1024, "Device"), - ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024), - ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024), - VectorSum(self), - MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), - MemcpyExecute(self, 400, 8, 1024, 100, 1, 1, 1), - MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), - MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100), - ] - - if options.ur is not None: - benches += [ - SubmitKernelUR(self, 0, 0), - SubmitKernelUR(self, 1, 0), - SubmitKernelUR(self, 1, 1), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 100), - ] - - return benches - - -def parse_unit_type(compute_unit): - if "[count]" in compute_unit: - return "instr" - elif "[us]" in compute_unit: - return "μs" - return compute_unit.replace("[", "").replace("]", "") - - -class ComputeBenchmark(Benchmark): - def __init__(self, bench, name, test): - super().__init__(bench.directory, bench) - self.bench = bench - self.bench_name = name - self.test = test - - def bin_args(self) -> list[str]: - return [] - - def extra_env_vars(self) -> dict: - return {} - - def setup(self): - self.benchmark_bin = os.path.join( - self.bench.directory, "compute-benchmarks-build", "bin", self.bench_name - ) - - def explicit_group(self): - return "" - - def run(self, env_vars) -> list[Result]: - command = [ - f"{self.benchmark_bin}", - f"--test={self.test}", - "--csv", - "--noHeaders", - ] - - command += self.bin_args() - env_vars.update(self.extra_env_vars()) - - result = self.run_bench(command, env_vars) - parsed_results = self.parse_output(result) - ret = [] - for label, median, stddev, unit in parsed_results: - extra_label = " CPU count" if parse_unit_type(unit) == "instr" else "" - explicit_group = ( - self.explicit_group() + extra_label - if self.explicit_group() != "" - else "" - ) - ret.append( - Result( - label=self.name() + extra_label, - explicit_group=explicit_group, - value=median, - stddev=stddev, - command=command, - env=env_vars, - stdout=result, - unit=parse_unit_type(unit), - ) - ) - return ret - - def parse_output(self, output): - csv_file = io.StringIO(output) - reader = csv.reader(csv_file) - next(reader, None) - results = [] - while True: - data_row = next(reader, None) - if data_row is None: - break - try: - label = data_row[0] - mean = float(data_row[1]) - median = float(data_row[2]) - # compute benchmarks report stddev as % - stddev = mean * (float(data_row[3].strip("%")) / 100.0) - unit = data_row[7] - results.append((label, median, stddev, unit)) - except (ValueError, IndexError) as e: - raise ValueError(f"Error parsing output: {e}") - if len(results) == 0: - raise ValueError("Benchmark output does not contain data.") - return results - - def teardown(self): - return - - -class SubmitKernelSYCL(ComputeBenchmark): - def __init__(self, bench, ioq): - self.ioq = ioq - super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel") - - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_sycl SubmitKernel {order}" - - def explicit_group(self): - return "SubmitKernel" - - def bin_args(self) -> list[str]: - return [ - f"--Ioq={self.ioq}", - "--DiscardEvents=0", - "--MeasureCompletion=0", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - ] - - -class SubmitKernelUR(ComputeBenchmark): - def __init__(self, bench, ioq, measureCompletion): - self.ioq = ioq - self.measureCompletion = measureCompletion - super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel") - - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_ur SubmitKernel {order}" + ( - " with measure completion" if self.measureCompletion else "" - ) - - def explicit_group(self): - return "SubmitKernel" - - def bin_args(self) -> list[str]: - return [ - f"--Ioq={self.ioq}", - "--DiscardEvents=0", - f"--MeasureCompletion={self.measureCompletion}", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - ] - - -class SubmitKernelL0(ComputeBenchmark): - def __init__(self, bench, ioq): - self.ioq = ioq - super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel") - - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_l0 SubmitKernel {order}" - - def explicit_group(self): - return "SubmitKernel" - - def bin_args(self) -> list[str]: - return [ - f"--Ioq={self.ioq}", - "--DiscardEvents=0", - "--MeasureCompletion=0", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - ] - - -class ExecImmediateCopyQueue(ComputeBenchmark): - def __init__(self, bench, ioq, isCopyOnly, source, destination, size): - self.ioq = ioq - self.isCopyOnly = isCopyOnly - self.source = source - self.destination = destination - self.size = size - super().__init__(bench, "api_overhead_benchmark_sycl", "ExecImmediateCopyQueue") - - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" - - def bin_args(self) -> list[str]: - return [ - "--iterations=100000", - f"--ioq={self.ioq}", - f"--IsCopyOnly={self.isCopyOnly}", - "--MeasureCompletionTime=0", - f"--src={self.destination}", - f"--dst={self.destination}", - f"--size={self.size}", - ] - - -class QueueInOrderMemcpy(ComputeBenchmark): - def __init__(self, bench, isCopyOnly, source, destination, size): - self.isCopyOnly = isCopyOnly - self.source = source - self.destination = destination - self.size = size - super().__init__(bench, "memory_benchmark_sycl", "QueueInOrderMemcpy") - - def name(self): - return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" - - def bin_args(self) -> list[str]: - return [ - "--iterations=10000", - f"--IsCopyOnly={self.isCopyOnly}", - f"--sourcePlacement={self.source}", - f"--destinationPlacement={self.destination}", - f"--size={self.size}", - "--count=100", - ] - - -class QueueMemcpy(ComputeBenchmark): - def __init__(self, bench, source, destination, size): - self.source = source - self.destination = destination - self.size = size - super().__init__(bench, "memory_benchmark_sycl", "QueueMemcpy") - - def name(self): - return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}" - - def bin_args(self) -> list[str]: - return [ - "--iterations=10000", - f"--sourcePlacement={self.source}", - f"--destinationPlacement={self.destination}", - f"--size={self.size}", - ] - - -class StreamMemory(ComputeBenchmark): - def __init__(self, bench, type, size, placement): - self.type = type - self.size = size - self.placement = placement - super().__init__(bench, "memory_benchmark_sycl", "StreamMemory") - - def name(self): - return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" - - # measurement is in GB/s - def lower_is_better(self): - return False - - def bin_args(self) -> list[str]: - return [ - "--iterations=10000", - f"--type={self.type}", - f"--size={self.size}", - f"--memoryPlacement={self.placement}", - "--useEvents=0", - "--contents=Zeros", - "--multiplier=1", - ] - - -class VectorSum(ComputeBenchmark): - def __init__(self, bench): - super().__init__(bench, "miscellaneous_benchmark_sycl", "VectorSum") - - def name(self): - return f"miscellaneous_benchmark_sycl VectorSum" - - def bin_args(self) -> list[str]: - return [ - "--iterations=1000", - "--numberOfElementsX=512", - "--numberOfElementsY=256", - "--numberOfElementsZ=256", - ] - - -class MemcpyExecute(ComputeBenchmark): - def __init__( - self, - bench, - numOpsPerThread, - numThreads, - allocSize, - iterations, - srcUSM, - dstUSM, - useEvent, - ): - self.numOpsPerThread = numOpsPerThread - self.numThreads = numThreads - self.allocSize = allocSize - self.iterations = iterations - self.srcUSM = srcUSM - self.dstUSM = dstUSM - self.useEvents = useEvent - super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute") - - def name(self): - return ( - f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" - + (" without events" if not self.useEvents else "") - ) - - def bin_args(self) -> list[str]: - return [ - "--Ioq=1", - f"--UseEvents={self.useEvents}", - "--MeasureCompletion=1", - "--UseQueuePerThread=1", - f"--AllocSize={self.allocSize}", - f"--NumThreads={self.numThreads}", - f"--NumOpsPerThread={self.numOpsPerThread}", - f"--iterations={self.iterations}", - f"--SrcUSM={self.srcUSM}", - f"--DstUSM={self.dstUSM}", - ] - - -class RUNTIMES(Enum): - SYCL = "sycl" - LEVEL_ZERO = "l0" - UR = "ur" - - -class GraphApiSinKernelGraph(ComputeBenchmark): - def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels): - self.withGraphs = withGraphs - self.numKernels = numKernels - self.runtime = runtime - super().__init__( - bench, f"graph_api_benchmark_{runtime.value}", "SinKernelGraph" - ) - - def explicit_group(self): - return f"SinKernelGraph {self.numKernels}" - - def name(self): - return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}" - - def bin_args(self) -> list[str]: - return [ - "--iterations=10000", - f"--numKernels={self.numKernels}", - f"--withGraphs={self.withGraphs}", - "--withCopyOffload=1", - "--immediateAppendCmdList=0", - ] - - -class GraphApiSubmitExecGraph(ComputeBenchmark): - def __init__(self, bench, ioq, submit, numKernels): - self.ioq = ioq - self.submit = submit - self.numKernels = numKernels - super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph") - - def name(self): - return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}" - - def explicit_group(self): - if self.submit: - return "SubmitGraph" - else: - return "ExecGraph" - - def bin_args(self) -> list[str]: - return [ - "--iterations=100", - f"--measureSubmit={self.submit}", - f"--ioq={self.ioq}", - f"--numKernels={self.numKernels}", - ] diff --git a/unified-runtime/scripts/benchmarks/benches/result.py b/unified-runtime/scripts/benchmarks/benches/result.py deleted file mode 100644 index 52a098d91c24a..0000000000000 --- a/unified-runtime/scripts/benchmarks/benches/result.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from dataclasses import dataclass -from typing import Optional -from dataclasses_json import dataclass_json -from datetime import datetime - - -@dataclass_json -@dataclass -class Result: - label: str - value: float - command: str - env: str - stdout: str - passed: bool = True - unit: str = "" - explicit_group: str = "" - # stddev can be optionally set by the benchmark, - # if not set, it will be calculated automatically. - stddev: float = 0.0 - # values below should not be set by the benchmark - name: str = "" - lower_is_better: bool = True - git_hash: str = "" - date: Optional[datetime] = None - suite: str = "Unknown" - - -@dataclass_json -@dataclass -class BenchmarkRun: - results: list[Result] - name: str = "This PR" - git_hash: str = "" - date: datetime = None diff --git a/unified-runtime/scripts/benchmarks/benches/test.py b/unified-runtime/scripts/benchmarks/benches/test.py deleted file mode 100644 index 06eac12b25344..0000000000000 --- a/unified-runtime/scripts/benchmarks/benches/test.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -import random -from utils.utils import git_clone -from .base import Benchmark, Suite -from .result import Result -from utils.utils import run, create_build_path -from options import options -import os - - -class TestSuite(Suite): - def __init__(self): - return - - def setup(self): - return - - def benchmarks(self) -> list[Benchmark]: - bench_configs = [ - ("Memory Bandwidth", 2000, 200, "Foo Group"), - ("Latency", 100, 20, "Bar Group"), - ("Throughput", 1500, 150, "Foo Group"), - ("FLOPS", 3000, 300, "Foo Group"), - ("Cache Miss Rate", 250, 25, "Bar Group"), - ] - - result = [] - for base_name, base_value, base_diff, group in bench_configs: - for variant in range(6): - value_multiplier = 1.0 + (variant * 0.2) - name = f"{base_name} {variant+1}" - value = base_value * value_multiplier - diff = base_diff * value_multiplier - - result.append(TestBench(name, value, diff, group)) - - return result - - -class TestBench(Benchmark): - def __init__(self, name, value, diff, group=""): - self.bname = name - self.value = value - self.diff = diff - self.group = group - super().__init__("") - - def name(self): - return self.bname - - def lower_is_better(self): - return True - - def setup(self): - return - - def run(self, env_vars) -> list[Result]: - random_value = self.value + random.uniform(-1 * (self.diff), self.diff) - return [ - Result( - label=self.name(), - explicit_group=self.group, - value=random_value, - command="", - env={"A": "B"}, - stdout="no output", - unit="ms", - ) - ] - - def teardown(self): - return diff --git a/unified-runtime/scripts/benchmarks/benchmark_results.html.template b/unified-runtime/scripts/benchmarks/benchmark_results.html.template deleted file mode 100644 index 1deeedad66b00..0000000000000 --- a/unified-runtime/scripts/benchmarks/benchmark_results.html.template +++ /dev/null @@ -1,192 +0,0 @@ - - - - - - Benchmark Results - - - - -
-

Benchmark Results

-
- -
-
- ${suite_checkboxes_html} -
-
- Historical Results -
- ${timeseries_charts_html} -
-
-
- Comparisons -
- ${bar_charts_html} -
-
-
- - diff --git a/unified-runtime/scripts/benchmarks/output_html.py b/unified-runtime/scripts/benchmarks/output_html.py deleted file mode 100644 index 4ba395bc3aac6..0000000000000 --- a/unified-runtime/scripts/benchmarks/output_html.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -import re -import os -from pathlib import Path -import matplotlib.pyplot as plt -import mpld3 -from collections import defaultdict -from dataclasses import dataclass -import matplotlib.dates as mdates -from benches.result import BenchmarkRun, Result -import numpy as np -from string import Template - - -@dataclass -class BenchmarkMetadata: - unit: str - suite: str - lower_is_better: bool - - -@dataclass -class BenchmarkSeries: - label: str - metadata: BenchmarkMetadata - runs: list[BenchmarkRun] - - -@dataclass -class BenchmarkChart: - label: str - suite: str - html: str - - -def tooltip_css() -> str: - return ".mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}" - - -def create_time_series_chart( - benchmarks: list[BenchmarkSeries], github_repo: str -) -> list[BenchmarkChart]: - plt.close("all") - - num_benchmarks = len(benchmarks) - if num_benchmarks == 0: - return [] - - html_charts = [] - - for _, benchmark in enumerate(benchmarks): - fig, ax = plt.subplots(figsize=(10, 4)) - - all_values = [] - all_stddevs = [] - - for run in benchmark.runs: - sorted_points = sorted(run.results, key=lambda x: x.date) - dates = [point.date for point in sorted_points] - values = [point.value for point in sorted_points] - stddevs = [point.stddev for point in sorted_points] - - all_values.extend(values) - all_stddevs.extend(stddevs) - - ax.errorbar(dates, values, yerr=stddevs, fmt="-", label=run.name, alpha=0.5) - scatter = ax.scatter(dates, values, picker=True) - - tooltip_labels = [ - f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n" - f"Value: {point.value:.2f} {benchmark.metadata.unit}\n" - f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n" - f"Git Hash: {point.git_hash}" - for point in sorted_points - ] - - targets = [ - f"https://github.com/{github_repo}/commit/{point.git_hash}" - for point in sorted_points - ] - - tooltip = mpld3.plugins.PointHTMLTooltip( - scatter, tooltip_labels, css=tooltip_css(), targets=targets - ) - mpld3.plugins.connect(fig, tooltip) - - ax.set_title(benchmark.label, pad=20) - performance_indicator = ( - "lower is better" - if benchmark.metadata.lower_is_better - else "higher is better" - ) - ax.text( - 0.5, - 1.05, - f"({performance_indicator})", - ha="center", - transform=ax.transAxes, - style="italic", - fontsize=7, - color="#666666", - ) - - ax.set_xlabel("") - unit = benchmark.metadata.unit - ax.set_ylabel(f"Value ({unit})" if unit else "Value") - ax.grid(True, alpha=0.2) - ax.legend(bbox_to_anchor=(1, 1), loc="upper left") - ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%Y-%m-%d %H:%M:%S")) - - plt.tight_layout() - html_charts.append( - BenchmarkChart( - html=mpld3.fig_to_html(fig), - label=benchmark.label, - suite=benchmark.metadata.suite, - ) - ) - plt.close(fig) - - return html_charts - - -@dataclass -class ExplicitGroup: - name: str - nnames: int - metadata: BenchmarkMetadata - runs: dict[str, dict[str, Result]] - - -def create_explicit_groups( - benchmark_runs: list[BenchmarkRun], compare_names: list[str] -) -> list[ExplicitGroup]: - groups = {} - - for run in benchmark_runs: - if run.name in compare_names: - for res in run.results: - if res.explicit_group != "": - if res.explicit_group not in groups: - groups[res.explicit_group] = ExplicitGroup( - name=res.explicit_group, - nnames=len(compare_names), - metadata=BenchmarkMetadata( - unit=res.unit, - lower_is_better=res.lower_is_better, - suite=res.suite, - ), - runs={}, - ) - - group = groups[res.explicit_group] - if res.label not in group.runs: - group.runs[res.label] = {name: None for name in compare_names} - - if group.runs[res.label][run.name] is None: - group.runs[res.label][run.name] = res - - return list(groups.values()) - - -def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]: - plt.close("all") - - html_charts = [] - - for group in groups: - fig, ax = plt.subplots(figsize=(10, 6)) - - x = np.arange(group.nnames) - x_labels = [] - width = 0.8 / len(group.runs) - - max_height = 0 - - for i, (run_name, run_results) in enumerate(group.runs.items()): - offset = width * i - - positions = x + offset - x_labels = run_results.keys() - valid_data = [r.value if r is not None else 0 for r in run_results.values()] - rects = ax.bar(positions, valid_data, width, label=run_name) - # This is a hack to disable all bar_label. Setting labels to empty doesn't work. - # We create our own labels below for each bar, this works better in mpld3. - ax.bar_label(rects, fmt="") - - for rect, run, res in zip(rects, run_results.keys(), run_results.values()): - if res is None: - continue - - height = rect.get_height() - if height > max_height: - max_height = height - - ax.text( - rect.get_x() + rect.get_width() / 2.0, - height + 1, - f"{res.value:.1f}", - ha="center", - va="bottom", - fontsize=9, - ) - - tooltip_labels = [ - f"Date: {res.date.strftime('%Y-%m-%d %H:%M:%S')}\n" - f"Run: {run}\n" - f"Label: {res.label}\n" - f"Value: {res.value:.2f} {res.unit}\n" - f"Stddev: {res.stddev:.2f} {res.unit}\n" - ] - tooltip = mpld3.plugins.LineHTMLTooltip( - rect, tooltip_labels, css=tooltip_css() - ) - mpld3.plugins.connect(ax.figure, tooltip) - - # normally we'd just set legend to be outside - # the chart, but this is not supported by mpld3. - # instead, we adjust the y axis to account for - # the height of the bars. - legend_height = len(group.runs) * 0.1 - ax.set_ylim(0, max_height * (1 + legend_height)) - - ax.set_xticks([]) - ax.grid(True, axis="y", alpha=0.2) - ax.set_ylabel(f"Value ({group.metadata.unit})") - ax.legend(loc="upper left") - ax.set_title(group.name, pad=20) - performance_indicator = ( - "lower is better" if group.metadata.lower_is_better else "higher is better" - ) - ax.text( - 0.5, - 1.03, - f"({performance_indicator})", - ha="center", - transform=ax.transAxes, - style="italic", - fontsize=7, - color="#666666", - ) - - for idx, label in enumerate(x_labels): - # this is a hack to get labels to show above the legend - # we normalize the idx to transAxes transform and offset it a little. - x_norm = (idx + 0.3 - ax.get_xlim()[0]) / ( - ax.get_xlim()[1] - ax.get_xlim()[0] - ) - ax.text(x_norm, 1.03, label, transform=ax.transAxes, color="#666666") - - plt.tight_layout() - html_charts.append( - BenchmarkChart( - label=group.name, - html=mpld3.fig_to_html(fig), - suite=group.metadata.suite, - ) - ) - plt.close(fig) - - return html_charts - - -def process_benchmark_data( - benchmark_runs: list[BenchmarkRun], compare_names: list[str] -) -> list[BenchmarkSeries]: - benchmark_metadata: dict[str, BenchmarkMetadata] = {} - run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list)) - - for run in benchmark_runs: - if run.name not in compare_names: - continue - - for result in run.results: - if result.label not in benchmark_metadata: - benchmark_metadata[result.label] = BenchmarkMetadata( - unit=result.unit, - lower_is_better=result.lower_is_better, - suite=result.suite, - ) - - result.date = run.date - result.git_hash = run.git_hash - run_map[result.label][run.name].append(result) - - benchmark_series = [] - for label, metadata in benchmark_metadata.items(): - runs = [ - BenchmarkRun(name=run_name, results=results) - for run_name, results in run_map[label].items() - ] - benchmark_series.append( - BenchmarkSeries(label=label, metadata=metadata, runs=runs) - ) - - return benchmark_series - - -def generate_html( - benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str] -) -> str: - benchmarks = process_benchmark_data(benchmark_runs, compare_names) - - timeseries = create_time_series_chart(benchmarks, github_repo) - timeseries_charts_html = "\n".join( - f'
{ts.html}
' - for ts in timeseries - ) - - explicit_groups = create_explicit_groups(benchmark_runs, compare_names) - - bar_charts = create_grouped_bar_charts(explicit_groups) - bar_charts_html = "\n".join( - f'
{bc.html}
' - for bc in bar_charts - ) - - suite_names = {t.suite for t in timeseries} - suite_checkboxes_html = " ".join( - f'' - for suite in suite_names - ) - - script_path = os.path.dirname(os.path.realpath(__file__)) - results_template_path = Path(script_path, "benchmark_results.html.template") - with open(results_template_path, "r") as file: - html_template = file.read() - - template = Template(html_template) - data = { - "suite_checkboxes_html": suite_checkboxes_html, - "timeseries_charts_html": timeseries_charts_html, - "bar_charts_html": bar_charts_html, - } - - return template.substitute(data)