-
EVALUATION METRIC
+
Evaluation Metrics
diff --git a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss
index 8a8b3af5..8f6988d0 100644
--- a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss
+++ b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss
@@ -1,34 +1,55 @@
.dialog-container {
border-radius: 12px;
padding: 18px;
- width:500px;
+ width: 520px;
box-shadow: 0 8px 16px var(--run-eval-config-dialog-container-box-shadow-color);
}
-.threshold-slider {
- --mdc-slider-active-track-color: var(--run-eval-config-dialog-threshold-slider-active-track-color);
- --mdc-slider-inactive-track-color: var(--run-eval-config-dialog-threshold-slider-inactive-track-color);
- --mdc-slider-handle-color: var(--run-eval-config-dialog-threshold-slider-handle-color);
- --mdc-slider-ripple-color: var(--run-eval-config-dialog-threshold-slider-ripple-color);
- width: 100px
+.eval-form {
+ display: flex;
+ flex-direction: column;
+ gap: 16px;
}
.metric-row {
display: flex;
flex-direction: row;
- align-items: center;
+ gap: 16px;
+ align-items: flex-start;
+}
+
+.metric-details {
+ flex: 1;
+ display: flex;
+ flex-direction: column;
+ gap: 4px;
+}
+
+.metric-title {
+ font-weight: 500;
}
-.metric-name {
- width: 250px;
+.metric-description {
+ font-size: 13px;
+ color: var(--run-eval-config-dialog-metric-description-color, #9aa0a6);
}
-.threshold-value {
- margin-left: 20px;
+.metric-hints {
+ font-size: 12px;
+ color: var(--run-eval-config-dialog-metric-hints-color, #9aa0a6);
}
-.mdc-slider__thumb--with-indicator {
- background-color: var(--mdc-slider-handle-color, var(--run-eval-config-dialog-mdc-slider-thumb-background-color));
- border: none !important;
- box-shadow: none !important;
-}
\ No newline at end of file
+.metric-input {
+ width: 180px;
+}
+
+.metric-error {
+ margin-top: 4px;
+ font-size: 12px;
+ color: var(--run-eval-config-dialog-metric-error-color, #d93025);
+}
+
+.no-metrics {
+ font-size: 14px;
+ color: var(--run-eval-config-dialog-no-metrics-color, #9aa0a6);
+}
diff --git a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts
index 7a9bbbf7..062ddb03 100644
--- a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts
+++ b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts
@@ -22,8 +22,6 @@ import {
MatDialogModule,
MatDialogRef,
} from '@angular/material/dialog';
-import {MatRadioModule} from '@angular/material/radio';
-import {MatSliderModule} from '@angular/material/slider';
import {NoopAnimationsModule} from '@angular/platform-browser/animations';
@@ -42,8 +40,6 @@ describe('RunEvalConfigDialogComponent', () => {
imports: [
ReactiveFormsModule,
MatDialogModule,
- MatRadioModule,
- MatSliderModule,
NoopAnimationsModule,
RunEvalConfigDialogComponent,
],
@@ -52,14 +48,24 @@ describe('RunEvalConfigDialogComponent', () => {
{
provide: MAT_DIALOG_DATA,
useValue: {
- evalMetrics: [
+ metrics: [
{
metricName: 'tool_trajectory_avg_score',
threshold: 1,
+ metricValueInfo: {
+ minThreshold: 0,
+ maxThreshold: 1,
+ step: 0.1,
+ },
},
{
metricName: 'response_match_score',
threshold: 0.7,
+ metricValueInfo: {
+ minThreshold: 0,
+ maxThreshold: 1,
+ step: 0.1,
+ },
},
],
},
@@ -93,23 +99,26 @@ describe('RunEvalConfigDialogComponent', () => {
expect(dialogRefSpy.close).toHaveBeenCalledWith(null);
});
- it('should update threshold value when slider changes (simulated)', () => {
- const toolTrajectoryAvgScoreSlider = component.evalForm.get(
- 'tool_trajectory_avg_score_threshold'
- )!;
- const responseMatchScoreSlider = component.evalForm.get(
- 'response_match_score_threshold'
- )!;
+ it('should close dialog with updated thresholds on start', () => {
+ const toolControl =
+ component.evalForm.get('tool_trajectory_avg_score_threshold')!;
+ const responseControl =
+ component.evalForm.get('response_match_score_threshold')!;
- toolTrajectoryAvgScoreSlider.setValue(0.4); // Simulate slider value change
- responseMatchScoreSlider.setValue(0.5); // Simulate slider value change
- fixture.detectChanges();
+ toolControl.setValue(0.4);
+ responseControl.setValue(0.5);
- expect(toolTrajectoryAvgScoreSlider.value).toBe(0.4);
- expect(responseMatchScoreSlider.value).toBe(0.5);
- const thresholdValueDisplays =
- fixture.nativeElement.querySelectorAll('.threshold-value');
- expect(thresholdValueDisplays[0].textContent).toContain('0.4');
- expect(thresholdValueDisplays[1].textContent).toContain('0.5');
+ component.onStart();
+
+ expect(dialogRefSpy.close).toHaveBeenCalledWith([
+ jasmine.objectContaining({
+ metricName: 'tool_trajectory_avg_score',
+ threshold: 0.4,
+ }),
+ jasmine.objectContaining({
+ metricName: 'response_match_score',
+ threshold: 0.5,
+ }),
+ ]);
});
});
diff --git a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts
index 94ef2199..5619e35c 100644
--- a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts
+++ b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts
@@ -16,13 +16,16 @@
*/
import {Component, Inject} from '@angular/core';
-import { FormBuilder, FormGroup, Validators, FormsModule, ReactiveFormsModule } from '@angular/forms';
+import {FormBuilder, FormGroup, Validators, FormsModule, ReactiveFormsModule} from '@angular/forms';
import { MAT_DIALOG_DATA, MatDialogRef, MatDialogTitle, MatDialogContent, MatDialogActions } from '@angular/material/dialog';
-import {EvalMetric} from '../../../core/models/Eval';
+import {EvalMetricConfig} from '../../../core/models/Eval';
import { CdkScrollable } from '@angular/cdk/scrolling';
-import { MatSlider, MatSliderThumb } from '@angular/material/slider';
import { MatButton } from '@angular/material/button';
+import { MatFormField } from '@angular/material/form-field';
+import { MatLabel } from '@angular/material/form-field';
+import { MatInput } from '@angular/material/input';
+import { NgIf, NgFor } from '@angular/common';
/**
* @interface EvalConfigData
@@ -30,7 +33,7 @@ import { MatButton } from '@angular/material/button';
* evaluation metrics.
*/
export interface EvalConfigData {
- evalMetrics: EvalMetric[];
+ metrics: EvalMetricConfig[];
}
@Component({
@@ -43,17 +46,21 @@ export interface EvalConfigData {
MatDialogContent,
FormsModule,
ReactiveFormsModule,
- MatSlider,
- MatSliderThumb,
MatDialogActions,
MatButton,
+ MatFormField,
+ MatLabel,
+ MatInput,
+ NgIf,
+ NgFor,
],
})
export class RunEvalConfigDialogComponent {
// FormGroup to manage the dialog's form controls
evalForm: FormGroup;
- evalMetrics: EvalMetric[] = [];
+ metrics: EvalMetricConfig[] = [];
+ private controlNameByMetric = new Map
();
/**
* @constructor
@@ -68,48 +75,124 @@ export class RunEvalConfigDialogComponent {
public dialogRef: MatDialogRef,
private fb: FormBuilder,
@Inject(MAT_DIALOG_DATA) public data: EvalConfigData) {
- this.evalMetrics = this.data.evalMetrics;
-
- // Initialize the form with controls and validators
- this.evalForm = this.fb.group({
- tool_trajectory_avg_score_threshold: [
- this.getEvalMetricThresholdFromData('tool_trajectory_avg_score'),
- [Validators.required, Validators.min(0), Validators.max(1)]
- ],
- response_match_score_threshold: [
- this.getEvalMetricThresholdFromData('response_match_score'),
- [Validators.required, Validators.min(0), Validators.max(1)]
- ]
- });
+ this.metrics = this.data.metrics ?? [];
+
+ this.evalForm = this.fb.group({});
+ this.initializeForm();
+ }
+
+ protected getControlName(metricName: string): string {
+ return this.controlNameByMetric.get(metricName) ?? '';
+ }
+
+ protected getMin(metric: EvalMetricConfig): number|undefined {
+ return metric.metricValueInfo?.minThreshold;
}
- private getEvalMetricThresholdFromData(metricName: string): number {
- return this.evalMetrics.find((metric) => metric.metricName === metricName)
- ?.threshold ??
- 0;
+ protected getMax(metric: EvalMetricConfig): number|undefined {
+ return metric.metricValueInfo?.maxThreshold;
+ }
+
+ protected getStep(metric: EvalMetricConfig): number|undefined {
+ return metric.metricValueInfo?.step;
+ }
+
+ private initializeForm() {
+ for (const metric of this.metrics) {
+ const controlName = this.createControlName(metric.metricName);
+ this.controlNameByMetric.set(metric.metricName, controlName);
+
+ const validators = [Validators.required];
+ const min = this.getMin(metric);
+ if (min !== undefined) {
+ validators.push(Validators.min(min));
+ }
+ const max = this.getMax(metric);
+ if (max !== undefined) {
+ validators.push(Validators.max(max));
+ }
+
+ this.evalForm.addControl(controlName, this.fb.control(
+ metric.threshold,
+ validators));
+ }
+ }
+
+ private createControlName(metricName: string): string {
+ const sanitized = metricName.replace(/[^a-zA-Z0-9]/g, '_');
+ return `${sanitized}_threshold`;
}
onStart(): void {
if (this.evalForm.valid) {
- const {
- tool_trajectory_avg_score_threshold,
- response_match_score_threshold
- } = this.evalForm.value;
-
- for (const metric of this.evalMetrics) {
- if (metric.metricName === 'tool_trajectory_avg_score') {
- metric.threshold = tool_trajectory_avg_score_threshold;
- } else if (metric.metricName === 'response_match_score') {
- metric.threshold = response_match_score_threshold;
- }
- }
+ this.metrics = this.metrics.map((metric) => {
+ const controlName = this.getControlName(metric.metricName);
+ const value = this.evalForm.get(controlName)?.value;
+ return {
+ ...metric,
+ threshold: Number(value),
+ };
+ });
+
+ this.dialogRef.close(this.metrics);
+
+ return;
+ }
+
+ this.evalForm.markAllAsTouched();
+ }
+
+ protected hasError(metric: EvalMetricConfig): boolean {
+ const control = this.evalForm.get(this.getControlName(metric.metricName));
+ return !!control && control.invalid && (control.dirty || control.touched);
+ }
- this.dialogRef.close(this.evalMetrics);
+ protected getErrorMessage(metric: EvalMetricConfig): string {
+ const control = this.evalForm.get(this.getControlName(metric.metricName));
+ if (!control || !control.errors) {
+ return '';
}
+ if (control.errors['min']) {
+ const min = this.getMin(metric);
+ return `Minimum threshold is ${min}`;
+ }
+ if (control.errors['max']) {
+ const max = this.getMax(metric);
+ return `Maximum threshold is ${max}`;
+ }
+ if (control.errors['required']) {
+ return 'Threshold is required';
+ }
+ return 'Invalid threshold';
}
- onCancel(): void {
- this.dialogRef.close(
- null); // Return null or undefined to indicate cancellation
+ protected formatRangeDescription(metric: EvalMetricConfig): string {
+ const min = this.getMin(metric);
+ const max = this.getMax(metric);
+ if (min === undefined && max === undefined) {
+ return '';
+ }
+ if (min !== undefined && max !== undefined) {
+ return `Range ${min} – ${max}`;
+ }
+ if (min !== undefined) {
+ return `≥ ${min}`;
+ }
+ if (max !== undefined) {
+ return `≤ ${max}`;
+ }
+ return '';
}
+
+ protected formatStepDescription(metric: EvalMetricConfig): string {
+ const step = this.getStep(metric);
+ if (step === undefined) {
+ return '';
+ }
+ return `Step ${step}`;
+ }
+
+ onCancel(): void {
+ this.dialogRef.close(null);
+ }
}
diff --git a/src/app/core/models/Eval.ts b/src/app/core/models/Eval.ts
index 8b910a34..cdc0ff4b 100644
--- a/src/app/core/models/Eval.ts
+++ b/src/app/core/models/Eval.ts
@@ -23,6 +23,26 @@
export declare interface EvalMetric {
metricName: string;
threshold: number;
+ criterion?: unknown;
+}
+
+export declare interface MetricValueInfo {
+ defaultThreshold?: number;
+ minThreshold?: number;
+ maxThreshold?: number;
+ step?: number;
+}
+
+export declare interface MetricInfo {
+ metricName: string;
+ description?: string;
+ metricValueInfo?: MetricValueInfo;
+}
+
+export declare interface EvalMetricConfig extends EvalMetric {
+ description?: string;
+ metricValueInfo?: MetricValueInfo;
+ criterion?: unknown;
}
export const DEFAULT_EVAL_METRICS: EvalMetric[] = [
diff --git a/src/app/core/services/eval.service.ts b/src/app/core/services/eval.service.ts
index 8dfdd873..0defe869 100644
--- a/src/app/core/services/eval.service.ts
+++ b/src/app/core/services/eval.service.ts
@@ -88,6 +88,14 @@ export class EvalService {
});
}
+ listMetricsInfo(appName: string) {
+ if (this.apiServerDomain != undefined) {
+ const url = this.apiServerDomain + `/apps/${appName}/metrics-info`;
+ return this.http.get(url, {});
+ }
+ return new Observable();
+ }
+
listEvalResults(appName: string) {
if (this.apiServerDomain != undefined) {
const url = this.apiServerDomain + `/apps/${appName}/eval_results`;