intel · PatKamin · Sep 29, 2025 · Sep 29, 2025 · Sep 29, 2025
@@ -3,19 +3,19 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from itertools import product
+import copy
 import csv
 import io
-import copy
 import math
 from enum import Enum
+from itertools import product
 from pathlib import Path
 
-from .base import Benchmark, Suite, TracingType
-from utils.result import BenchmarkMetadata, Result
-from .base import Benchmark, Suite
-from options import options
 from git_project import GitProject
+from options import options
+from utils.result import BenchmarkMetadata, Result
+
+from .base import Benchmark, Suite, TracingType
 
 
 class RUNTIMES(Enum):
@@ -100,66 +100,57 @@ def setup(self) -> None:
 
     def additional_metadata(self) -> dict[str, BenchmarkMetadata]:
         metadata = {
-            "SubmitKernel": BenchmarkMetadata(
-                type="group",
-                description="Measures CPU time overhead of submitting kernels through different APIs.",
-                notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
-                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
-                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
-                "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=["submit", "micro", "SYCL", "UR", "L0"],
-                range_min=0.0,
-            ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
                 tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"],
             ),
-            "SubmitGraph": BenchmarkMetadata(
-                type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
-            ),
             "FinalizeGraph": BenchmarkMetadata(
                 type="group", tags=["finalize", "micro", "SYCL", "graph"]
             ),
         }
 
         # Add metadata for all SubmitKernel group variants
-        base_metadata = metadata["SubmitKernel"]
-
+        submit_kernel_metadata = BenchmarkMetadata(
+            type="group",
+            notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
+            "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
+            "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
+            "Work is ongoing to reduce the overhead of the SYCL API\n",
+            tags=["submit", "micro", "SYCL", "UR", "L0"],
+            range_min=0.0,
+        )
         for order in ["in order", "out of order"]:
             for completion in ["", " with completion"]:
                 for events in ["", " using events"]:
                     group_name = f"SubmitKernel {order}{completion}{events} long kernel"
-                    metadata[group_name] = BenchmarkMetadata(
-                        type="group",
-                        description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.",
-                        notes=base_metadata.notes,
-                        tags=base_metadata.tags,
-                        range_min=base_metadata.range_min,
+                    metadata[group_name] = copy.deepcopy(submit_kernel_metadata)
+                    metadata[group_name].description = (
+                        f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs."
                     )
-
                     # CPU count variants
                     cpu_count_group = f"{group_name}, CPU count"
-                    metadata[cpu_count_group] = BenchmarkMetadata(
-                        type="group",
-                        description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.",
-                        notes=base_metadata.notes,
-                        tags=base_metadata.tags,
-                        range_min=base_metadata.range_min,
+                    metadata[cpu_count_group] = copy.deepcopy(submit_kernel_metadata)
+                    metadata[cpu_count_group].description = (
+                        f"Measures CPU instructions count overhead of submitting {order} kernels with longer execution times through different APIs."
                     )
 
         # Add metadata for all SubmitGraph group variants
-        base_metadata = metadata["SubmitGraph"]
+        submit_graph_metadata = BenchmarkMetadata(
+            type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
+        )
         for order in ["in order", "out of order"]:
             for completion in ["", " with completion"]:
                 for events in ["", " using events"]:
                     for num_kernels in self.submit_graph_num_kernels:
-                        group_name = f"SubmitGraph {order}{completion}{events}, {num_kernels} kernels"
-                        metadata[group_name] = BenchmarkMetadata(
-                            type="group",
-                            tags=base_metadata.tags,
-                        )
-
+                        for host_tasks in ["", " use host tasks"]:
+                            group_name = f"SubmitGraph {order}{completion}{events}{host_tasks}, {num_kernels} kernels"
+                            metadata[group_name] = copy.deepcopy(submit_graph_metadata)
+                            # CPU count variants
+                            cpu_count_group = f"{group_name}, CPU count"
+                            metadata[cpu_count_group] = copy.deepcopy(
+                                submit_graph_metadata
+                            )
         return metadata
 
     def benchmarks(self) -> list[Benchmark]:
@@ -1088,6 +1079,22 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
             bin_args.append(f"--profilerType={self.profiler_type.value}")
         return bin_args
 
+    def get_metadata(self) -> dict[str, BenchmarkMetadata]:
+        metadata_dict = super().get_metadata()
+
+        # Create CPU count variant with modified display name and explicit_group
+        cpu_count_name = self.name() + " CPU count"
+        cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()])
+        cpu_count_display_name = self.display_name() + ", CPU count"
+        cpu_count_explicit_group = (
+            self.explicit_group() + ", CPU count" if self.explicit_group() else ""
+        )
+        cpu_count_metadata.display_name = cpu_count_display_name
+        cpu_count_metadata.explicit_group = cpu_count_explicit_group
+        metadata_dict[cpu_count_name] = cpu_count_metadata
+
+        return metadata_dict
+
 
 class UllsEmptyKernel(ComputeBenchmark):
     def __init__(