Skip to content

Commit bfc4b2f

Browse files
committed
Updated and functional state with e2e working for new stats calculations, metrics, and outputs
1 parent 7eb304e commit bfc4b2f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+5266
-4455
lines changed

src/guidellm/backends/backend.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,8 @@ def requests_limit(self) -> int | None:
102102
return None
103103

104104
@abstractmethod
105-
async def default_model(self) -> str | None:
105+
async def default_model(self) -> str:
106106
"""
107107
:return: The default model name or identifier for generation requests,
108-
None if no default model is available
109108
"""
110109
...

src/guidellm/benchmark/__init__.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from .benchmarker import Benchmarker
1414
from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
15-
from .output import (
15+
from .outputs import (
1616
GenerativeBenchmarkerConsole,
1717
GenerativeBenchmarkerCSV,
1818
GenerativeBenchmarkerHTML,
@@ -31,34 +31,43 @@
3131
from .scenarios import get_builtin_scenarios
3232
from .schemas import (
3333
Benchmark,
34-
BenchmarkerArgs,
35-
BenchmarkerDict,
34+
BenchmarkAccumulator,
35+
BenchmarkAccumulatorT,
36+
BenchmarkConfig,
3637
BenchmarkGenerativeTextArgs,
37-
BenchmarkSchedulerStats,
38-
EstimatedBenchmarkState,
38+
BenchmarkT,
3939
GenerativeAudioMetricsSummary,
4040
GenerativeBenchmark,
41+
GenerativeBenchmarkAccumulator,
4142
GenerativeBenchmarksReport,
43+
GenerativeBenchmarkTimings,
4244
GenerativeImageMetricsSummary,
4345
GenerativeMetrics,
46+
GenerativeMetricsAccumulator,
4447
GenerativeMetricsSummary,
48+
GenerativeRequestsAccumulator,
49+
GenerativeTextMetricsSummary,
4550
GenerativeVideoMetricsSummary,
46-
SchedulerDict,
51+
RunningMetricStats,
52+
SchedulerMetrics,
53+
SchedulerMetricsAccumulator,
4754
)
4855

4956
__all__ = [
5057
"AsyncProfile",
5158
"Benchmark",
59+
"BenchmarkAccumulator",
60+
"BenchmarkAccumulatorT",
61+
"BenchmarkConfig",
5262
"BenchmarkGenerativeTextArgs",
53-
"BenchmarkSchedulerStats",
63+
"BenchmarkT",
5464
"Benchmarker",
55-
"BenchmarkerArgs",
56-
"BenchmarkerDict",
5765
"BenchmarkerProgress",
5866
"ConcurrentProfile",
59-
"EstimatedBenchmarkState",
6067
"GenerativeAudioMetricsSummary",
6168
"GenerativeBenchmark",
69+
"GenerativeBenchmarkAccumulator",
70+
"GenerativeBenchmarkTimings",
6271
"GenerativeBenchmarkerCSV",
6372
"GenerativeBenchmarkerConsole",
6473
"GenerativeBenchmarkerHTML",
@@ -67,11 +76,16 @@
6776
"GenerativeConsoleBenchmarkerProgress",
6877
"GenerativeImageMetricsSummary",
6978
"GenerativeMetrics",
79+
"GenerativeMetricsAccumulator",
7080
"GenerativeMetricsSummary",
81+
"GenerativeRequestsAccumulator",
82+
"GenerativeTextMetricsSummary",
7183
"GenerativeVideoMetricsSummary",
7284
"Profile",
7385
"ProfileType",
74-
"SchedulerDict",
86+
"RunningMetricStats",
87+
"SchedulerMetrics",
88+
"SchedulerMetricsAccumulator",
7589
"SweepProfile",
7690
"SynchronousProfile",
7791
"ThroughputProfile",

src/guidellm/benchmark/benchmarker.py

Lines changed: 59 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
"""
22
Benchmark execution orchestration and lifecycle management.
33
4-
Provides the core benchmarking engine that coordinates request scheduling,
5-
data aggregation, and result compilation across different execution strategies
6-
and environments. The Benchmarker acts as the primary workflow coordinator,
7-
managing the complete benchmark lifecycle from request submission through
8-
result compilation while supporting thread-safe singleton operations.
4+
Provides the core benchmarking engine coordinating request scheduling,
5+
data aggregation, and result compilation across execution strategies
6+
and environments. The Benchmarker manages the complete benchmark lifecycle
7+
from request submission through result compilation while supporting
8+
thread-safe singleton operations for consistent state management.
99
"""
1010

1111
from __future__ import annotations
@@ -18,19 +18,23 @@
1818
from guidellm.benchmark.profile import Profile
1919
from guidellm.benchmark.progress import BenchmarkerProgress
2020
from guidellm.benchmark.schemas import (
21-
BenchmarkerArgs,
21+
BenchmarkAccumulatorT,
22+
BenchmarkConfig,
2223
BenchmarkT,
23-
EstimatedBenchmarkState,
2424
)
2525
from guidellm.logger import logger
2626
from guidellm.scheduler import (
2727
BackendInterface,
28+
Constraint,
2829
Environment,
30+
MultiTurnRequestT,
2931
RequestT,
3032
ResponseT,
3133
Scheduler,
34+
SchedulingStrategy,
3235
)
3336
from guidellm.utils import ThreadSafeSingletonMixin
37+
from guidellm.utils.mixins import InfoMixin
3438

3539
__all__ = ["Benchmarker"]
3640

@@ -43,46 +47,45 @@ class Benchmarker(
4347
"""
4448
Abstract benchmark orchestrator for request processing workflows.
4549
46-
Coordinates execution of benchmarking runs across different scheduling
47-
strategies, aggregating metrics and compiling results. Manages the complete
48-
benchmark lifecycle from request submission through result compilation while
49-
implementing thread-safe singleton pattern to ensure consistent state across
50-
concurrent operations.
50+
Coordinates benchmarking runs across scheduling strategies, aggregating
51+
metrics and compiling results. Manages the complete benchmark lifecycle
52+
from request submission through result compilation while implementing a
53+
thread-safe singleton pattern for consistent state across concurrent
54+
operations.
5155
"""
5256

5357
async def run(
5458
self,
59+
accumulator_class: type[BenchmarkAccumulatorT],
5560
benchmark_class: type[BenchmarkT],
56-
requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
61+
requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
5762
backend: BackendInterface[RequestT, ResponseT],
5863
profile: Profile,
5964
environment: Environment,
60-
data: list[Any],
61-
progress: BenchmarkerProgress[BenchmarkT] | None = None,
65+
progress: (
66+
BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
67+
) = None,
6268
sample_requests: int | None = 20,
6369
warmup: float | None = None,
6470
cooldown: float | None = None,
6571
prefer_response_metrics: bool = True,
6672
) -> AsyncIterator[BenchmarkT]:
6773
"""
68-
Execute benchmark runs across multiple scheduling strategies.
69-
70-
Orchestrates the complete benchmark workflow by iterating through scheduling
71-
strategies from the profile, executing requests through the scheduler,
72-
aggregating metrics, and compiling final benchmark results.
73-
74-
:param benchmark_class: Class for constructing final benchmark objects
75-
:param requests: Request datasets for processing across strategies
76-
:param backend: Backend interface for request processing
77-
:param profile: Benchmark profile defining strategies and constraints
78-
:param environment: Execution environment for coordination
79-
:param progress: Optional progress tracker for benchmark lifecycle events
80-
:param sample_requests: Number of sample requests to use for estimation
81-
:param warmup: Optional warmup duration in seconds before benchmarking
82-
:param cooldown: Optional cooldown duration in seconds after benchmarking
83-
:param prefer_response_metrics: Whether to prefer response-based metrics over
84-
request-based metrics
85-
:yield: Compiled benchmark results for each strategy execution
74+
Execute benchmark runs across scheduling strategies defined in the profile.
75+
76+
:param accumulator_class: Class for accumulating metrics during execution
77+
:param benchmark_class: Class for constructing final benchmark results
78+
:param requests: Request datasets to process across strategies
79+
:param backend: Backend interface for executing requests
80+
:param profile: Profile defining scheduling strategies and constraints
81+
:param environment: Environment for execution coordination
82+
:param progress: Optional tracker for benchmark lifecycle events
83+
:param sample_requests: Number of requests to sample for estimation
84+
:param warmup: Warmup duration in seconds before benchmarking
85+
:param cooldown: Cooldown duration in seconds after benchmarking
86+
:param prefer_response_metrics: Whether to prefer response metrics over
87+
request metrics
88+
:yield: Compiled benchmark result for each strategy execution
8689
:raises Exception: If benchmark execution or compilation fails
8790
"""
8891
with self.thread_lock:
@@ -91,21 +94,38 @@ async def run(
9194

9295
run_id = str(uuid.uuid4())
9396
strategies_generator = profile.strategies_generator()
97+
strategy: SchedulingStrategy | None
98+
constraints: dict[str, Constraint] | None
9499
strategy, constraints = next(strategies_generator)
95100

96101
while strategy is not None:
97102
if progress:
98103
await progress.on_benchmark_start(strategy)
99104

100-
args = BenchmarkerArgs(
105+
config = BenchmarkConfig(
101106
run_id=run_id,
102107
run_index=len(profile.completed_strategies),
108+
strategy=strategy,
109+
constraints=(
110+
{
111+
key: InfoMixin.extract_from_obj(val)
112+
for key, val in constraints.items()
113+
}
114+
if isinstance(constraints, dict)
115+
else {"constraint": InfoMixin.extract_from_obj(constraints)}
116+
if constraints
117+
else {}
118+
),
103119
sample_requests=sample_requests,
104120
warmup=warmup,
105121
cooldown=cooldown,
106122
prefer_response_metrics=prefer_response_metrics,
123+
profile=profile,
124+
requests=InfoMixin.extract_from_obj(requests),
125+
backend=InfoMixin.extract_from_obj(backend),
126+
environment=InfoMixin.extract_from_obj(environment),
107127
)
108-
estimated_state = EstimatedBenchmarkState()
128+
accumulator = accumulator_class(config=config)
109129
scheduler_state = None
110130
scheduler: Scheduler[RequestT, ResponseT] = Scheduler()
111131

@@ -123,35 +143,26 @@ async def run(
123143
**constraints or {},
124144
):
125145
try:
126-
benchmark_class.update_estimate(
127-
args,
128-
estimated_state,
146+
accumulator.update_estimate(
129147
response,
130148
request,
131149
request_info,
132150
scheduler_state,
133151
)
134152
if progress:
135153
await progress.on_benchmark_update(
136-
estimated_state, scheduler_state
154+
accumulator, scheduler_state
137155
)
138156
except Exception as err: # noqa: BLE001
139157
logger.error(
140158
f"Error updating benchmark estimate/progress: {err}"
141159
)
142160

143161
benchmark = benchmark_class.compile(
144-
args=args,
145-
estimated_state=estimated_state,
162+
accumulator=accumulator,
146163
scheduler_state=scheduler_state,
147-
profile=profile,
148-
requests=requests,
149-
backend=backend,
150-
environment=environment,
151-
strategy=strategy,
152-
constraints=constraints,
153-
data=data,
154164
)
165+
155166
if progress:
156167
await progress.on_benchmark_complete(benchmark)
157168

0 commit comments

Comments
 (0)