11"""
22Benchmark execution orchestration and lifecycle management.
33
4- Provides the core benchmarking engine that coordinates request scheduling,
5- data aggregation, and result compilation across different execution strategies
6- and environments. The Benchmarker acts as the primary workflow coordinator,
7- managing the complete benchmark lifecycle from request submission through
8- result compilation while supporting thread-safe singleton operations.
4+ Provides the core benchmarking engine coordinating request scheduling,
5+ data aggregation, and result compilation across execution strategies
6+ and environments. The Benchmarker manages the complete benchmark lifecycle
7+ from request submission through result compilation while supporting
8+ thread-safe singleton operations for consistent state management .
99"""
1010
1111from __future__ import annotations
1818from guidellm .benchmark .profile import Profile
1919from guidellm .benchmark .progress import BenchmarkerProgress
2020from guidellm .benchmark .schemas import (
21- BenchmarkerArgs ,
21+ BenchmarkAccumulatorT ,
22+ BenchmarkConfig ,
2223 BenchmarkT ,
23- EstimatedBenchmarkState ,
2424)
2525from guidellm .logger import logger
2626from guidellm .scheduler import (
2727 BackendInterface ,
28+ Constraint ,
2829 Environment ,
30+ MultiTurnRequestT ,
2931 RequestT ,
3032 ResponseT ,
3133 Scheduler ,
34+ SchedulingStrategy ,
3235)
3336from guidellm .utils import ThreadSafeSingletonMixin
37+ from guidellm .utils .mixins import InfoMixin
3438
3539__all__ = ["Benchmarker" ]
3640
@@ -43,46 +47,45 @@ class Benchmarker(
4347 """
4448 Abstract benchmark orchestrator for request processing workflows.
4549
46- Coordinates execution of benchmarking runs across different scheduling
47- strategies, aggregating metrics and compiling results. Manages the complete
48- benchmark lifecycle from request submission through result compilation while
49- implementing thread-safe singleton pattern to ensure consistent state across
50- concurrent operations.
50+ Coordinates benchmarking runs across scheduling strategies, aggregating
51+ metrics and compiling results. Manages the complete benchmark lifecycle
52+ from request submission through result compilation while implementing a
53+ thread-safe singleton pattern for consistent state across concurrent
54+ operations.
5155 """
5256
5357 async def run (
5458 self ,
59+ accumulator_class : type [BenchmarkAccumulatorT ],
5560 benchmark_class : type [BenchmarkT ],
56- requests : Iterable [RequestT | Iterable [RequestT | tuple [ RequestT , float ] ]],
61+ requests : Iterable [RequestT | MultiTurnRequestT [RequestT ]],
5762 backend : BackendInterface [RequestT , ResponseT ],
5863 profile : Profile ,
5964 environment : Environment ,
60- data : list [Any ],
61- progress : BenchmarkerProgress [BenchmarkT ] | None = None ,
65+ progress : (
66+ BenchmarkerProgress [BenchmarkAccumulatorT , BenchmarkT ] | None
67+ ) = None ,
6268 sample_requests : int | None = 20 ,
6369 warmup : float | None = None ,
6470 cooldown : float | None = None ,
6571 prefer_response_metrics : bool = True ,
6672 ) -> AsyncIterator [BenchmarkT ]:
6773 """
68- Execute benchmark runs across multiple scheduling strategies.
69-
70- Orchestrates the complete benchmark workflow by iterating through scheduling
71- strategies from the profile, executing requests through the scheduler,
72- aggregating metrics, and compiling final benchmark results.
73-
74- :param benchmark_class: Class for constructing final benchmark objects
75- :param requests: Request datasets for processing across strategies
76- :param backend: Backend interface for request processing
77- :param profile: Benchmark profile defining strategies and constraints
78- :param environment: Execution environment for coordination
79- :param progress: Optional progress tracker for benchmark lifecycle events
80- :param sample_requests: Number of sample requests to use for estimation
81- :param warmup: Optional warmup duration in seconds before benchmarking
82- :param cooldown: Optional cooldown duration in seconds after benchmarking
83- :param prefer_response_metrics: Whether to prefer response-based metrics over
84- request-based metrics
85- :yield: Compiled benchmark results for each strategy execution
74+ Execute benchmark runs across scheduling strategies defined in the profile.
75+
76+ :param accumulator_class: Class for accumulating metrics during execution
77+ :param benchmark_class: Class for constructing final benchmark results
78+ :param requests: Request datasets to process across strategies
79+ :param backend: Backend interface for executing requests
80+ :param profile: Profile defining scheduling strategies and constraints
81+ :param environment: Environment for execution coordination
82+ :param progress: Optional tracker for benchmark lifecycle events
83+ :param sample_requests: Number of requests to sample for estimation
84+ :param warmup: Warmup duration in seconds before benchmarking
85+ :param cooldown: Cooldown duration in seconds after benchmarking
86+ :param prefer_response_metrics: Whether to prefer response metrics over
87+ request metrics
88+ :yield: Compiled benchmark result for each strategy execution
8689 :raises Exception: If benchmark execution or compilation fails
8790 """
8891 with self .thread_lock :
@@ -91,21 +94,38 @@ async def run(
9194
9295 run_id = str (uuid .uuid4 ())
9396 strategies_generator = profile .strategies_generator ()
97+ strategy : SchedulingStrategy | None
98+ constraints : dict [str , Constraint ] | None
9499 strategy , constraints = next (strategies_generator )
95100
96101 while strategy is not None :
97102 if progress :
98103 await progress .on_benchmark_start (strategy )
99104
100- args = BenchmarkerArgs (
105+ config = BenchmarkConfig (
101106 run_id = run_id ,
102107 run_index = len (profile .completed_strategies ),
108+ strategy = strategy ,
109+ constraints = (
110+ {
111+ key : InfoMixin .extract_from_obj (val )
112+ for key , val in constraints .items ()
113+ }
114+ if isinstance (constraints , dict )
115+ else {"constraint" : InfoMixin .extract_from_obj (constraints )}
116+ if constraints
117+ else {}
118+ ),
103119 sample_requests = sample_requests ,
104120 warmup = warmup ,
105121 cooldown = cooldown ,
106122 prefer_response_metrics = prefer_response_metrics ,
123+ profile = profile ,
124+ requests = InfoMixin .extract_from_obj (requests ),
125+ backend = InfoMixin .extract_from_obj (backend ),
126+ environment = InfoMixin .extract_from_obj (environment ),
107127 )
108- estimated_state = EstimatedBenchmarkState ( )
128+ accumulator = accumulator_class ( config = config )
109129 scheduler_state = None
110130 scheduler : Scheduler [RequestT , ResponseT ] = Scheduler ()
111131
@@ -123,35 +143,26 @@ async def run(
123143 ** constraints or {},
124144 ):
125145 try :
126- benchmark_class .update_estimate (
127- args ,
128- estimated_state ,
146+ accumulator .update_estimate (
129147 response ,
130148 request ,
131149 request_info ,
132150 scheduler_state ,
133151 )
134152 if progress :
135153 await progress .on_benchmark_update (
136- estimated_state , scheduler_state
154+ accumulator , scheduler_state
137155 )
138156 except Exception as err : # noqa: BLE001
139157 logger .error (
140158 f"Error updating benchmark estimate/progress: { err } "
141159 )
142160
143161 benchmark = benchmark_class .compile (
144- args = args ,
145- estimated_state = estimated_state ,
162+ accumulator = accumulator ,
146163 scheduler_state = scheduler_state ,
147- profile = profile ,
148- requests = requests ,
149- backend = backend ,
150- environment = environment ,
151- strategy = strategy ,
152- constraints = constraints ,
153- data = data ,
154164 )
165+
155166 if progress :
156167 await progress .on_benchmark_complete (benchmark )
157168
0 commit comments