diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md index 004fe14eca35b..fcadded3cad51 100644 --- a/devops/scripts/benchmarks/README.md +++ b/devops/scripts/benchmarks/README.md @@ -6,6 +6,8 @@ Scripts for running performance tests on SYCL and Unified Runtime. - [Velocity Bench](https://github.com/oneapi-src/Velocity-Bench) - [Compute Benchmarks](https://github.com/intel/compute-benchmarks/) +- [LlamaCpp Benchmarks](https://github.com/ggerganov/llama.cpp) +- [SYCL-Bench](https://github.com/unisa-hpc/sycl-bench) ## Running @@ -27,8 +29,6 @@ You can also include additional benchmark parameters, such as environment variab Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request. -By default, all benchmark runs are compared against `baseline`, which is a well-established set of the latest data. - You must be a member of the `oneapi-src` organization to access these features. ## Comparing results @@ -37,8 +37,8 @@ By default, the benchmark results are not stored. To store them, use the option You can compare benchmark results using `--compare` option. The comparison will be presented in a markdown output file (see below). If you want to calculate the relative performance of the new results against the previously saved data, use `--compare ` (i.e. `--compare baseline`). In case of comparing only stored data without generating new results, use `--dry-run --compare --compare --relative-perf `, where `name1` indicates the baseline for the relative performance calculation and `--dry-run` prevents the script for running benchmarks. Listing more than two `--compare` options results in displaying only execution time, without statistical analysis. -Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results -are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html). +Baseline_L0, as well as Baseline_L0v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results +are stored [here](https://oneapi-src.github.io/unified-runtime/performance/). ## Output formats You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`. diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 859aa96e50903..d12f79286ce86 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -17,6 +17,7 @@ from history import BenchmarkHistory from utils.utils import prepare_workdir from utils.compute_runtime import * +from presets import enabled_suites, presets import argparse import re @@ -175,6 +176,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): failures = {} for s in suites: + if s.name() not in enabled_suites(options.preset): + continue + suite_benchmarks = s.benchmarks() if filter: suite_benchmarks = [ @@ -457,6 +461,13 @@ def validate_and_parse_env_args(env_args): help="Directory for cublas library", default=None, ) + parser.add_argument( + "--preset", + type=str, + choices=[p for p in presets.keys()], + help="Benchmark preset to run", + default=options.preset, + ) parser.add_argument( "--results-dir", type=str, @@ -495,6 +506,7 @@ def validate_and_parse_env_args(env_args): options.current_run_name = args.relative_perf options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory + options.preset = args.preset options.custom_results_dir = args.results_dir options.build_jobs = args.build_jobs diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 78eda7ae3c88e..76d2de55aaa36 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -2,6 +2,7 @@ from enum import Enum import multiprocessing +from presets import presets class Compare(Enum): LATEST = "latest" @@ -42,6 +43,7 @@ class Options: compute_runtime_tag: str = "25.05.32567.12" build_igc: bool = False current_run_name: str = "This PR" + preset: str = "Full" custom_results_dir = None build_jobs: int = multiprocessing.cpu_count() diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py new file mode 100644 index 0000000000000..3f191766deb8c --- /dev/null +++ b/devops/scripts/benchmarks/presets.py @@ -0,0 +1,38 @@ +# Copyright (C) 2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +presets: dict[str, list[str]] = { + "Full": [ + "Compute Benchmarks", + "llama.cpp bench", + "SYCL-Bench", + "Velocity Bench", + "UMF", + ], + "SYCL": [ + "Compute Benchmarks", + "llama.cpp bench", + "SYCL-Bench", + "Velocity Bench", + ], + "Minimal": [ + "Compute Benchmarks", + ], + "Normal": [ + "Compute Benchmarks", + "llama.cpp bench", + "Velocity Bench", + ], + "Test": [ + "Test Suite", + ], +} + + +def enabled_suites(preset: str) -> list[str]: + try: + return presets[preset] + except KeyError: + raise ValueError(f"Preset '{preset}' not found.")