From 9b91e63852a0804da50caa91b6acdd0bb5a94d3d Mon Sep 17 00:00:00 2001
From: Aki-07 <akileshramesh2003@gmail.com>
Date: Sat, 18 Oct 2025 14:49:25 +0530
Subject: [PATCH 1/4] feat(eval): extend metric models and service

---
 src/app/core/models/Eval.ts           | 20 ++++++++++++++++++++
 src/app/core/services/eval.service.ts |  8 ++++++++
 2 files changed, 28 insertions(+)
diff --git a/src/app/core/models/Eval.ts b/src/app/core/models/Eval.ts
index 8b910a34..cdc0ff4b 100644
--- a/src/app/core/models/Eval.ts
+++ b/src/app/core/models/Eval.ts
@@ -23,6 +23,26 @@
 export declare interface EvalMetric {
   metricName: string;
   threshold: number;
+  criterion?: unknown;
+}
+
+export declare interface MetricValueInfo {
+  defaultThreshold?: number;
+  minThreshold?: number;
+  maxThreshold?: number;
+  step?: number;
+}
+
+export declare interface MetricInfo {
+  metricName: string;
+  description?: string;
+  metricValueInfo?: MetricValueInfo;
+}
+
+export declare interface EvalMetricConfig extends EvalMetric {
+  description?: string;
+  metricValueInfo?: MetricValueInfo;
+  criterion?: unknown;
 }
 
 export const DEFAULT_EVAL_METRICS: EvalMetric[] = [
diff --git a/src/app/core/services/eval.service.ts b/src/app/core/services/eval.service.ts
index 8dfdd873..0defe869 100644
--- a/src/app/core/services/eval.service.ts
+++ b/src/app/core/services/eval.service.ts
@@ -88,6 +88,14 @@ export class EvalService {
     });
   }
 
+  listMetricsInfo(appName: string) {
+    if (this.apiServerDomain != undefined) {
+      const url = this.apiServerDomain + `/apps/${appName}/metrics-info`;
+      return this.http.get<any>(url, {});
+    }
+    return new Observable<any>();
+  }
+
   listEvalResults(appName: string) {
     if (this.apiServerDomain != undefined) {
       const url = this.apiServerDomain + `/apps/${appName}/eval_results`;

From bf6f2f6c709527b45815ca2e2b9a510bbddea913 Mon Sep 17 00:00:00 2001
From: Aki-07 <akileshramesh2003@gmail.com>
Date: Sat, 18 Oct 2025 14:49:33 +0530
Subject: [PATCH 2/4] feat(eval-tab): support dynamic metric selection and
 results

---
 .../eval-tab/eval-tab.component.html          |  71 ++++-
 .../eval-tab/eval-tab.component.scss          |  72 +++++
 .../components/eval-tab/eval-tab.component.ts | 264 ++++++++++++++++--
 3 files changed, 386 insertions(+), 21 deletions(-)

diff --git a/src/app/components/eval-tab/eval-tab.component.html b/src/app/components/eval-tab/eval-tab.component.html
index fff5aac6..e7d4753d 100644
--- a/src/app/components/eval-tab/eval-tab.component.html
+++ b/src/app/components/eval-tab/eval-tab.component.html
@@ -63,6 +63,36 @@
         <div>
           @if (!showEvalHistory()) {
             <div>
+              <div class="metric-selection">
+                <mat-form-field appearance="outline" class="metric-select-field">
+                  <mat-label>Evaluation metrics</mat-label>
+                  <mat-select multiple [value]="selectedMetricNames" (valueChange)="onMetricSelectionChange($event)">
+                    @for (metric of metricOptions; track metric.metricName) {
+                      <mat-option [value]="metric.metricName">
+                        <span>{{ metric.metricName }}</span>
+                        @if (metric.description) {
+                          <span class="metric-option-description"> — {{ metric.description }}</span>
+                        }
+                      </mat-option>
+                    }
+                  </mat-select>
+                </mat-form-field>
+                @if (selectedMetricNames.length === 0) {
+                  <div class="metric-selection-helper">
+                    Select at least one metric before running an evaluation.
+                  </div>
+                } @else {
+                  <div class="selected-metric-summary">
+                    @for (metric of metricOptions; track metric.metricName) {
+                      @if (metric.selected) {
+                        <span class="selected-metric-chip">
+                          {{ metric.metricName }} · threshold: {{ metric.threshold }}
+                        </span>
+                      }
+                    }
+                  </div>
+                }
+              </div>
               <div class="evaluation-tab-header">
                 <button class="run-eval-btn" (click)="openEvalConfigDialog()">Run Evaluation</button>
                 <mat-icon class="evaluation-history-icon" (click)="toggleEvalHistoryButton()" matTooltip="View eval run history">history</mat-icon>
@@ -128,9 +158,15 @@
                         </div>
                         @if (getEvalMetrics(evalResult)) {
                           <div class="status-card__metrics">
-                            @for (evalMetric of getEvalMetrics(evalResult); track evalMetric) {
-                              <span class="status-card__metric"> {{ evalMetric.metricName }}:
-                                {{ evalMetric.threshold }}
+                            @for (evalMetric of getEvalMetrics(evalResult); track evalMetric.metricName) {
+                              <span class="status-card__metric">
+                                {{ evalMetric.metricName }} · Threshold: {{ evalMetric.threshold }}
+                                @if (evalMetric.score !== undefined) {
+                                  · Score: {{ evalMetric.score }}
+                                }
+                                @if (evalMetric.evalStatus) {
+                                  · Status: {{ evalMetric.evalStatus }}
+                                }
                               </span>
                             }
                           </div>
@@ -155,6 +191,34 @@
                                 <div style="padding-top: 4px;">{{ evalResult.finalEvalStatus == 1 ? "PASS": "FAIL"}} </div>
                               </button>
                             </div>
+                            @if (evalResult.overallEvalMetricResults?.length) {
+                              <div class="status-card__metric-results">
+                                @for (metricResult of evalResult.overallEvalMetricResults; track metricResult.metricName) {
+                                  <div
+                                    class="status-card__metric-result"
+                                    [ngClass]="{
+                                      'status-card__metric-result--pass': metricResult.evalStatus === 'PASSED' || metricResult.evalStatus === 1,
+                                      'status-card__metric-result--fail': metricResult.evalStatus === 'FAILED' || metricResult.evalStatus === 2,
+                                      'status-card__metric-result--neutral': metricResult.evalStatus !== 'PASSED' && metricResult.evalStatus !== 'FAILED' && metricResult.evalStatus !== 1 && metricResult.evalStatus !== 2
+                                    }"
+                                  >
+                                    <span class="status-card__metric-result-name">
+                                      {{ metricResult.metricName }}
+                                    </span>
+                                    @if (metricResult.score !== undefined) {
+                                      <span class="status-card__metric-result-score">
+                                        Score: {{ metricResult.score }}
+                                      </span>
+                                    }
+                                    @if (metricResult.threshold !== undefined) {
+                                      <span class="status-card__metric-result-threshold">
+                                        Threshold: {{ metricResult.threshold }}
+                                      </span>
+                                    }
+                                  </div>
+                                }
+                              </div>
+                            }
                           </div>
                         }
                       </div>
@@ -184,4 +248,3 @@
     </div>
   }
 </div>
-
diff --git a/src/app/components/eval-tab/eval-tab.component.scss b/src/app/components/eval-tab/eval-tab.component.scss
index dadc466c..85c89a90 100644
--- a/src/app/components/eval-tab/eval-tab.component.scss
+++ b/src/app/components/eval-tab/eval-tab.component.scss
@@ -165,6 +165,45 @@
   width: 100%;
 }
 
+.metric-selection {
+  display: flex;
+  flex-direction: column;
+  margin-top: 12px;
+  gap: 8px;
+}
+
+.metric-select-field {
+  width: 100%;
+}
+
+.metric-option-description {
+  color: var(--eval-tab-metric-option-description-color, #9aa0a6);
+  font-size: 12px;
+  margin-left: 4px;
+}
+
+.metric-selection-helper {
+  color: var(--eval-tab-metric-selection-helper-color, #9aa0a6);
+  font-size: 12px;
+}
+
+.selected-metric-summary {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  font-size: 12px;
+  color: var(--eval-tab-selected-metric-summary-color, #9aa0a6);
+}
+
+.selected-metric-chip {
+  background-color: var(
+      --eval-tab-selected-metric-chip-background,
+      rgba(255, 255, 255, 0.08)
+    );
+  border-radius: 12px;
+  padding: 4px 8px;
+}
+
 .evaluation-history-icon {
   cursor: pointer;
   margin-top: 4px;
@@ -271,6 +310,39 @@
     width: 100%;
     margin-top: 15px;
   }
+
+  &__metric-results {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 6px;
+    margin-top: 8px;
+  }
+
+  &__metric-result {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    border-radius: 12px;
+    padding: 4px 8px;
+    background-color: var(
+        --eval-tab-status-card-metric-result-background,
+        rgba(255, 255, 255, 0.08)
+      );
+    font-size: 12px;
+    color: var(--eval-tab-status-card-metric-result-color, #e8eaed);
+
+    &--pass {
+      color: var(--eval-tab-status-card-metric-result-pass-color, #1e8e3e);
+    }
+
+    &--fail {
+      color: var(--eval-tab-status-card-metric-result-fail-color, #d93025);
+    }
+
+    &--neutral {
+      color: var(--eval-tab-status-card-metric-result-neutral-color, #9aa0a6);
+    }
+  }
 }
 
 .eval-spinner {
diff --git a/src/app/components/eval-tab/eval-tab.component.ts b/src/app/components/eval-tab/eval-tab.component.ts
index fb5f0332..3a8880af 100644
--- a/src/app/components/eval-tab/eval-tab.component.ts
+++ b/src/app/components/eval-tab/eval-tab.component.ts
@@ -23,7 +23,7 @@ import { MatTableDataSource, MatTable, MatColumnDef, MatHeaderCellDef, MatHeader
 import {BehaviorSubject, of} from 'rxjs';
 import {catchError} from 'rxjs/operators';
 
-import {DEFAULT_EVAL_METRICS, EvalMetric, EvalCase} from '../../core/models/Eval';
+import {DEFAULT_EVAL_METRICS, EvalMetric, EvalCase, EvalMetricConfig, MetricValueInfo} from '../../core/models/Eval';
 import {Session} from '../../core/models/Session';
 import {Invocation} from '../../core/models/Eval';
 import {EvalService, EVAL_SERVICE} from '../../core/services/eval.service';
@@ -37,6 +37,10 @@ import { MatIcon } from '@angular/material/icon';
 import { MatTooltip } from '@angular/material/tooltip';
 import { NgClass } from '@angular/common';
 import { MatProgressSpinner } from '@angular/material/progress-spinner';
+import { MatFormField } from '@angular/material/form-field';
+import { MatLabel } from '@angular/material/form-field';
+import { MatSelect } from '@angular/material/select';
+import { MatOption } from '@angular/material/core';
 
 
 interface EvaluationResult {
@@ -50,6 +54,10 @@ interface EvaluationResult {
   sessionDetails: any;
 }
 
+interface MetricOption extends EvalMetricConfig {
+  selected: boolean;
+}
+
 interface UIEvaluationResult {
   isToggled: boolean;
   evaluationResults: EvaluationResult[];
@@ -96,6 +104,10 @@ interface AppEvaluationResult {
         MatRowDef,
         MatRow,
         MatProgressSpinner,
+        MatFormField,
+        MatLabel,
+        MatSelect,
+        MatOption,
     ],
 })
 export class EvalTabComponent implements OnInit, OnChanges {
@@ -128,6 +140,9 @@ export class EvalTabComponent implements OnInit, OnChanges {
 
   evalRunning = signal(false);
   evalMetrics: EvalMetric[] = DEFAULT_EVAL_METRICS;
+  metricOptions: MetricOption[] = [];
+  selectedMetricNames: string[] =
+      DEFAULT_EVAL_METRICS.map((metric) => metric.metricName);
 
   // Key: evalSetId
   // Value: EvaluationResult[]
@@ -151,10 +166,203 @@ export class EvalTabComponent implements OnInit, OnChanges {
     });
   }
 
+  private loadMetricsInfo() {
+    const appName = this.appName();
+
+    if (!appName) {
+      this.initializeMetricOptions([]);
+      return;
+    }
+
+    this.evalService.listMetricsInfo(appName)
+        .pipe(catchError(() => of({metrics_info: []})))
+        .subscribe((response: any) => {
+          const metricsInfo = response?.metrics_info ?? [];
+          this.initializeMetricOptions(metricsInfo);
+        });
+  }
+
+  private initializeMetricOptions(rawMetrics: any[]) {
+    const previousOptions =
+        new Map(this.metricOptions.map((metric) => [metric.metricName, metric]));
+    const selectedNamesBefore = new Set(this.selectedMetricNames);
+
+    const metrics: MetricOption[] = rawMetrics
+                                         .map((rawMetric: any) => {
+                                           const metricName =
+                                               rawMetric?.metricName ??
+                                               rawMetric?.metric_name ??
+                                               rawMetric?.name ?? '';
+                                           if (!metricName) {
+                                             return null;
+                                           }
+
+                                           const previous =
+                                               previousOptions.get(metricName);
+                                           const metricValueInfo =
+                                               this.normalizeMetricValueInfo(
+                                                   rawMetric?.metricValueInfo ??
+                                                   rawMetric?.metric_value_info);
+                                           const threshold =
+                                               previous?.threshold ??
+                                               this.findThreshold(metricName) ??
+                                               this.getDefaultThreshold(
+                                                   metricValueInfo);
+                                           const selected =
+                                               previous?.selected ??
+                                               selectedNamesBefore.has(
+                                                   metricName) ??
+                                               this.isDefaultMetric(metricName);
+
+                                           return {
+                                             metricName,
+                                             description:
+                                                 rawMetric?.description ??
+                                                 previous?.description,
+                                             metricValueInfo,
+                                             threshold:
+                                                 threshold ??
+                                                 this.getDefaultThreshold(
+                                                     metricValueInfo),
+                                             selected: !!selected,
+                                             criterion:
+                                                 previous?.criterion ??
+                                                 rawMetric?.criterion,
+                                           } as MetricOption;
+                                         })
+                                         .filter((metric) => !!metric) as
+        MetricOption[];
+
+    if (metrics.length === 0) {
+      this.metricOptions = this.buildFallbackMetricOptions();
+    } else {
+      this.metricOptions = metrics;
+      if (!this.metricOptions.some((metric) => metric.selected)) {
+        this.metricOptions.forEach((metric) => {
+          metric.selected = this.isDefaultMetric(metric.metricName);
+        });
+      }
+    }
+
+    this.selectedMetricNames =
+        this.metricOptions.filter((metric) => metric.selected)
+            .map((metric) => metric.metricName);
+    this.syncEvalMetricsFromOptions();
+    this.changeDetectorRef.detectChanges();
+  }
+
+  private buildFallbackMetricOptions(): MetricOption[] {
+    return DEFAULT_EVAL_METRICS.map((metric) => ({
+                                      metricName: metric.metricName,
+                                      threshold: metric.threshold,
+                                      selected: true,
+                                      description: '',
+                                      metricValueInfo: undefined,
+                                    })) as MetricOption[];
+  }
+
+  private normalizeMetricValueInfo(raw: any): MetricValueInfo|undefined {
+    if (!raw) {
+      return undefined;
+    }
+
+    const toNumber =
+        (value: unknown|undefined): number|undefined => {
+          if (value === null || value === undefined) {
+            return undefined;
+          }
+          const parsed = Number(value);
+          return isNaN(parsed) ? undefined : parsed;
+        };
+
+    return {
+      defaultThreshold: toNumber(
+          raw.defaultThreshold ?? raw.default_threshold ?? raw.default_value ??
+          raw.default),
+      minThreshold: toNumber(
+          raw.minThreshold ?? raw.min_threshold ?? raw.min_value ?? raw.min),
+      maxThreshold: toNumber(
+          raw.maxThreshold ?? raw.max_threshold ?? raw.max_value ?? raw.max),
+      step: toNumber(raw.step ?? raw.thresholdStep ?? raw.threshold_step),
+    };
+  }
+
+  private getDefaultThreshold(metricValueInfo: MetricValueInfo|undefined) {
+    return metricValueInfo?.defaultThreshold ?? 1;
+  }
+
+  private findThreshold(metricName: string): number|undefined {
+    const existing =
+        this.evalMetrics.find((metric) => metric.metricName === metricName);
+    if (existing) {
+      return existing.threshold;
+    }
+    const fallback =
+        DEFAULT_EVAL_METRICS.find((metric) => metric.metricName === metricName);
+    return fallback?.threshold;
+  }
+
+  private isDefaultMetric(metricName: string): boolean {
+    return DEFAULT_EVAL_METRICS.some(
+        (metric) => metric.metricName === metricName);
+  }
+
+  protected onMetricSelectionChange(selected: string[]) {
+    this.selectedMetricNames = selected;
+    const selectedSet = new Set(selected);
+
+    this.metricOptions.forEach((metric) => {
+      metric.selected = selectedSet.has(metric.metricName);
+      if (metric.selected &&
+          (metric.threshold === undefined || metric.threshold === null)) {
+        metric.threshold = this.getDefaultThreshold(metric.metricValueInfo);
+      }
+    });
+
+    this.syncEvalMetricsFromOptions();
+  }
+
+  private syncEvalMetricsFromOptions() {
+    const selectedMetrics =
+        this.metricOptions.filter((metric) => metric.selected);
+
+    this.evalMetrics = selectedMetrics.map((metric) => {
+      return {
+        metricName: metric.metricName,
+        threshold: metric.threshold,
+        ...(metric.criterion ? {criterion: metric.criterion} : {}),
+      } as EvalMetric;
+    });
+  }
+
+  private cloneSelectedMetricOptions(): MetricOption[] {
+    return this.metricOptions.filter((metric) => metric.selected)
+        .map((metric) => {
+          return {...metric};
+        });
+  }
+
+  private applyUpdatedMetricOptions(updatedMetrics: MetricOption[]) {
+    const updatedMap =
+        new Map(updatedMetrics.map((metric) => [metric.metricName, metric]));
+
+    this.metricOptions.forEach((metric) => {
+      const updated = updatedMap.get(metric.metricName);
+      if (updated) {
+        metric.threshold = updated.threshold;
+        metric.criterion = updated.criterion;
+      }
+    });
+
+    this.syncEvalMetricsFromOptions();
+    this.changeDetectorRef.detectChanges();
+  }
+
   ngOnChanges(changes: SimpleChanges): void {
     if (changes['appName']) {
       this.selectedEvalSet = '';
       this.evalCases = [];
+      this.loadMetricsInfo();
       this.getEvalSet();
       this.getEvaluationResult();
     }
@@ -232,12 +440,16 @@ export class EvalTabComponent implements OnInit, OnChanges {
   }
 
   runEval() {
-    this.evalRunning.set(true);
     if (this.selection.selected.length == 0) {
       alert('No case selected!');
-      this.evalRunning.set(false);
       return;
     }
+    if (this.evalMetrics.length === 0) {
+      alert('No metric selected!');
+      return;
+    }
+
+    this.evalRunning.set(true);
     this.evalService
         .runEval(
             this.appName(),
@@ -345,6 +557,7 @@ export class EvalTabComponent implements OnInit, OnChanges {
   private addEvalFieldsToBotEvent(
       event: any, invocationResult: any, failedMetric: string, score: number,
       threshold: number) {
+    event.metricResults = invocationResult.evalMetricResults ?? [];
     event.failedMetric = failedMetric;
     event.evalScore = score;
     event.evalThreshold = threshold;
@@ -553,19 +766,23 @@ export class EvalTabComponent implements OnInit, OnChanges {
       alert('No case selected!');
       return;
     }
+    if (this.metricOptions.filter((metric) => metric.selected).length === 0) {
+      alert('No metric selected!');
+      return;
+    }
 
     const dialogRef = this.dialog.open(RunEvalConfigDialogComponent, {
       maxWidth: '90vw',
       maxHeight: '90vh',
       data: {
-        evalMetrics: this.evalMetrics,
+        metrics: this.cloneSelectedMetricOptions(),
       },
     });
 
-    dialogRef.afterClosed().subscribe((evalMetrics) => {
-      if (!!evalMetrics) {
-        this.evalMetrics = evalMetrics;
-
+    dialogRef.afterClosed().subscribe((updatedMetrics: MetricOption[]|
+                                        null|undefined) => {
+      if (!!updatedMetrics && updatedMetrics.length > 0) {
+        this.applyUpdatedMetricOptions(updatedMetrics);
         this.runEval();
       }
     });
@@ -574,28 +791,41 @@ export class EvalTabComponent implements OnInit, OnChanges {
   protected getEvalMetrics(evalResult: any|undefined) {
     if (!evalResult || !evalResult.evaluationResults ||
         !evalResult.evaluationResults.evaluationResults) {
-      return this.evalMetrics;
+      return this.metricOptions.filter((metric) => metric.selected)
+          .map((metric) => ({
+                 metricName: metric.metricName,
+                 threshold: metric.threshold,
+                 score: undefined,
+                 evalStatus: undefined,
+               }));
     }
 
     const results = evalResult.evaluationResults.evaluationResults;
 
     if (results.length === 0) {
-      return this.evalMetrics;
+      return this.metricOptions.filter((metric) => metric.selected)
+          .map((metric) => ({
+                 metricName: metric.metricName,
+                 threshold: metric.threshold,
+                 score: undefined,
+                 evalStatus: undefined,
+               }));
     }
 
     if (typeof results[0].overallEvalMetricResults === 'undefined' ||
         !results[0].overallEvalMetricResults ||
         results[0].overallEvalMetricResults.length === 0) {
-      return this.evalMetrics;
+      return this.metricOptions.filter((metric) => metric.selected)
+          .map((metric) => ({
+                 metricName: metric.metricName,
+                 threshold: metric.threshold,
+                 score: undefined,
+                 evalStatus: undefined,
+               }));
     }
 
     const overallEvalMetricResults = results[0].overallEvalMetricResults;
 
-    return overallEvalMetricResults.map((result: any) => {
-      return {
-        metricName: result.metricName,
-        threshold: result.threshold,
-      };
-    });
+    return overallEvalMetricResults;
   }
 }

From a47d1147a5300d7f49df5c69ef705fc6e32662e3 Mon Sep 17 00:00:00 2001
From: Aki-07 <akileshramesh2003@gmail.com>
Date: Sat, 18 Oct 2025 14:49:44 +0530
Subject: [PATCH 3/4] feat(eval-dialog): configure thresholds for selected
 metrics

---
 .../run-eval-config-dialog.component.html     |  65 ++++---
 .../run-eval-config-dialog.component.scss     |  55 ++++--
 .../run-eval-config-dialog.component.spec.ts  |  51 +++---
 .../run-eval-config-dialog.component.ts       | 163 +++++++++++++-----
 4 files changed, 230 insertions(+), 104 deletions(-)

diff --git a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.html b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.html
index eb8623b1..637b0b04 100644
--- a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.html
+++ b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.html
@@ -14,34 +14,47 @@
  limitations under the License.
 -->
 <div class="dialog-container">
-  <h2 mat-dialog-title class="dialog-title">EVALUATION METRIC</h2>
+  <h2 mat-dialog-title class="dialog-title">Evaluation Metrics</h2>
   <mat-dialog-content>
     <form [formGroup]="evalForm" class="eval-form">
-      <div class="metric-row">
-        <div class="metric-name">Tool trajectory avg score: </div>
-        <div class="flex-1 pl-4">
-          <mat-slider min="0" max="1" step="0.1" thumbLabel class="threshold-slider">
-            <input matSliderThumb formControlName="tool_trajectory_avg_score_threshold" />
-          </mat-slider>
-          <!-- Display current threshold value -->
-          <span class="threshold-value">
-            {{ evalForm.controls['tool_trajectory_avg_score_threshold'].value }}
-          </span>
-        </div>
-      </div>
-
-      <div class="metric-row">
-        <div class="metric-name">Response match score: </div>
-        <div class="flex-1 pl-4">
-          <mat-slider min="0" max="1" step="0.1" thumbLabel class="threshold-slider">
-            <input matSliderThumb formControlName="response_match_score_threshold" />
-          </mat-slider>
-          <!-- Display current threshold value -->
-          <span class="threshold-value">
-            {{ evalForm.controls['response_match_score_threshold'].value }}
-          </span>
-        </div>
-      </div>
+      @if (metrics.length === 0) {
+        <div class="no-metrics">No metrics available for this app.</div>
+      } @else {
+        @for (metric of metrics; track metric.metricName) {
+          <div class="metric-row">
+            <div class="metric-details">
+              <div class="metric-title">{{ metric.metricName }}</div>
+              @if (metric.description) {
+                <div class="metric-description">{{ metric.description }}</div>
+              }
+              @if (formatRangeDescription(metric) || formatStepDescription(metric)) {
+                <div class="metric-hints">
+                  <span>{{ formatRangeDescription(metric) }}</span>
+                  @if (formatStepDescription(metric)) {
+                    <span> · {{ formatStepDescription(metric) }}</span>
+                  }
+                </div>
+              }
+            </div>
+            <div class="metric-input">
+              <mat-form-field appearance="outline">
+                <mat-label>Threshold</mat-label>
+                <input
+                  matInput
+                  type="number"
+                  [step]="getStep(metric) ?? 0.01"
+                  [min]="getMin(metric)"
+                  [max]="getMax(metric)"
+                  [formControlName]="getControlName(metric.metricName)"
+                />
+              </mat-form-field>
+              @if (hasError(metric)) {
+                <div class="metric-error">{{ getErrorMessage(metric) }}</div>
+              }
+            </div>
+          </div>
+        }
+      }
     </form>
   </mat-dialog-content>
 
diff --git a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss
index 8a8b3af5..8f6988d0 100644
--- a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss
+++ b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.scss
@@ -1,34 +1,55 @@
 .dialog-container {
   border-radius: 12px;
   padding: 18px;
-  width:500px;
+  width: 520px;
   box-shadow: 0 8px 16px var(--run-eval-config-dialog-container-box-shadow-color);
 }
 
-.threshold-slider {
-  --mdc-slider-active-track-color: var(--run-eval-config-dialog-threshold-slider-active-track-color);
-  --mdc-slider-inactive-track-color: var(--run-eval-config-dialog-threshold-slider-inactive-track-color);
-  --mdc-slider-handle-color: var(--run-eval-config-dialog-threshold-slider-handle-color);
-  --mdc-slider-ripple-color: var(--run-eval-config-dialog-threshold-slider-ripple-color);
-  width: 100px
+.eval-form {
+  display: flex;
+  flex-direction: column;
+  gap: 16px;
 }
 
 .metric-row {
   display: flex;
   flex-direction: row;
-  align-items: center;
+  gap: 16px;
+  align-items: flex-start;
+}
+
+.metric-details {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.metric-title {
+  font-weight: 500;
 }
 
-.metric-name {
-  width: 250px;
+.metric-description {
+  font-size: 13px;
+  color: var(--run-eval-config-dialog-metric-description-color, #9aa0a6);
 }
 
-.threshold-value {
-  margin-left: 20px;
+.metric-hints {
+  font-size: 12px;
+  color: var(--run-eval-config-dialog-metric-hints-color, #9aa0a6);
 }
 
-.mdc-slider__thumb--with-indicator {
-  background-color: var(--mdc-slider-handle-color, var(--run-eval-config-dialog-mdc-slider-thumb-background-color));
-  border: none !important;
-  box-shadow: none !important;
-}
\ No newline at end of file
+.metric-input {
+  width: 180px;
+}
+
+.metric-error {
+  margin-top: 4px;
+  font-size: 12px;
+  color: var(--run-eval-config-dialog-metric-error-color, #d93025);
+}
+
+.no-metrics {
+  font-size: 14px;
+  color: var(--run-eval-config-dialog-no-metrics-color, #9aa0a6);
+}
diff --git a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts
index 7a9bbbf7..062ddb03 100644
--- a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts
+++ b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.spec.ts
@@ -22,8 +22,6 @@ import {
   MatDialogModule,
   MatDialogRef,
 } from '@angular/material/dialog';
-import {MatRadioModule} from '@angular/material/radio';
-import {MatSliderModule} from '@angular/material/slider';
 import {NoopAnimationsModule} from '@angular/platform-browser/animations';
 
 
@@ -42,8 +40,6 @@ describe('RunEvalConfigDialogComponent', () => {
     imports: [
         ReactiveFormsModule,
         MatDialogModule,
-        MatRadioModule,
-        MatSliderModule,
         NoopAnimationsModule,
         RunEvalConfigDialogComponent,
     ],
@@ -52,14 +48,24 @@ describe('RunEvalConfigDialogComponent', () => {
         {
             provide: MAT_DIALOG_DATA,
             useValue: {
-                evalMetrics: [
+                metrics: [
                     {
                         metricName: 'tool_trajectory_avg_score',
                         threshold: 1,
+                        metricValueInfo: {
+                          minThreshold: 0,
+                          maxThreshold: 1,
+                          step: 0.1,
+                        },
                     },
                     {
                         metricName: 'response_match_score',
                         threshold: 0.7,
+                        metricValueInfo: {
+                          minThreshold: 0,
+                          maxThreshold: 1,
+                          step: 0.1,
+                        },
                     },
                 ],
             },
@@ -93,23 +99,26 @@ describe('RunEvalConfigDialogComponent', () => {
     expect(dialogRefSpy.close).toHaveBeenCalledWith(null);
   });
 
-  it('should update threshold value when slider changes (simulated)', () => {
-    const toolTrajectoryAvgScoreSlider = component.evalForm.get(
-      'tool_trajectory_avg_score_threshold'
-    )!;
-    const responseMatchScoreSlider = component.evalForm.get(
-      'response_match_score_threshold'
-    )!;
+  it('should close dialog with updated thresholds on start', () => {
+    const toolControl =
+      component.evalForm.get('tool_trajectory_avg_score_threshold')!;
+    const responseControl =
+      component.evalForm.get('response_match_score_threshold')!;
 
-    toolTrajectoryAvgScoreSlider.setValue(0.4); // Simulate slider value change
-    responseMatchScoreSlider.setValue(0.5); // Simulate slider value change
-    fixture.detectChanges();
+    toolControl.setValue(0.4);
+    responseControl.setValue(0.5);
 
-    expect(toolTrajectoryAvgScoreSlider.value).toBe(0.4);
-    expect(responseMatchScoreSlider.value).toBe(0.5);
-    const thresholdValueDisplays =
-      fixture.nativeElement.querySelectorAll('.threshold-value');
-    expect(thresholdValueDisplays[0].textContent).toContain('0.4');
-    expect(thresholdValueDisplays[1].textContent).toContain('0.5');
+    component.onStart();
+
+    expect(dialogRefSpy.close).toHaveBeenCalledWith([
+      jasmine.objectContaining({
+        metricName: 'tool_trajectory_avg_score',
+        threshold: 0.4,
+      }),
+      jasmine.objectContaining({
+        metricName: 'response_match_score',
+        threshold: 0.5,
+      }),
+    ]);
   });
 });
diff --git a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts
index 94ef2199..5619e35c 100644
--- a/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts
+++ b/src/app/components/eval-tab/run-eval-config-dialog/run-eval-config-dialog.component.ts
@@ -16,13 +16,16 @@
  */
 
 import {Component, Inject} from '@angular/core';
-import { FormBuilder, FormGroup, Validators, FormsModule, ReactiveFormsModule } from '@angular/forms';
+import {FormBuilder, FormGroup, Validators, FormsModule, ReactiveFormsModule} from '@angular/forms';
 import { MAT_DIALOG_DATA, MatDialogRef, MatDialogTitle, MatDialogContent, MatDialogActions } from '@angular/material/dialog';
 
-import {EvalMetric} from '../../../core/models/Eval';
+import {EvalMetricConfig} from '../../../core/models/Eval';
 import { CdkScrollable } from '@angular/cdk/scrolling';
-import { MatSlider, MatSliderThumb } from '@angular/material/slider';
 import { MatButton } from '@angular/material/button';
+import { MatFormField } from '@angular/material/form-field';
+import { MatLabel } from '@angular/material/form-field';
+import { MatInput } from '@angular/material/input';
+import { NgIf, NgFor } from '@angular/common';
 
 /**
  * @interface EvalConfigData
@@ -30,7 +33,7 @@ import { MatButton } from '@angular/material/button';
  * evaluation metrics.
  */
 export interface EvalConfigData {
-  evalMetrics: EvalMetric[];
+  metrics: EvalMetricConfig[];
 }
 
 @Component({
@@ -43,17 +46,21 @@ export interface EvalConfigData {
         MatDialogContent,
         FormsModule,
         ReactiveFormsModule,
-        MatSlider,
-        MatSliderThumb,
         MatDialogActions,
         MatButton,
+        MatFormField,
+        MatLabel,
+        MatInput,
+        NgIf,
+        NgFor,
     ],
 })
 export class RunEvalConfigDialogComponent {
   // FormGroup to manage the dialog's form controls
   evalForm: FormGroup;
 
-  evalMetrics: EvalMetric[] = [];
+  metrics: EvalMetricConfig[] = [];
+  private controlNameByMetric = new Map<string, string>();
 
   /**
    * @constructor
@@ -68,48 +75,124 @@ export class RunEvalConfigDialogComponent {
       public dialogRef: MatDialogRef<RunEvalConfigDialogComponent>,
       private fb: FormBuilder,
       @Inject(MAT_DIALOG_DATA) public data: EvalConfigData) {
-    this.evalMetrics = this.data.evalMetrics;
-
-    // Initialize the form with controls and validators
-    this.evalForm = this.fb.group({
-      tool_trajectory_avg_score_threshold: [
-        this.getEvalMetricThresholdFromData('tool_trajectory_avg_score'),
-        [Validators.required, Validators.min(0), Validators.max(1)]
-      ],
-      response_match_score_threshold: [
-        this.getEvalMetricThresholdFromData('response_match_score'),
-        [Validators.required, Validators.min(0), Validators.max(1)]
-      ]
-    });
+    this.metrics = this.data.metrics ?? [];
+
+    this.evalForm = this.fb.group({});
+    this.initializeForm();
+  }
+
+  protected getControlName(metricName: string): string {
+    return this.controlNameByMetric.get(metricName) ?? '';
+  }
+
+  protected getMin(metric: EvalMetricConfig): number|undefined {
+    return metric.metricValueInfo?.minThreshold;
   }
 
-  private getEvalMetricThresholdFromData(metricName: string): number {
-    return this.evalMetrics.find((metric) => metric.metricName === metricName)
-               ?.threshold ??
-        0;
+  protected getMax(metric: EvalMetricConfig): number|undefined {
+    return metric.metricValueInfo?.maxThreshold;
+  }
+
+  protected getStep(metric: EvalMetricConfig): number|undefined {
+    return metric.metricValueInfo?.step;
+  }
+
+  private initializeForm() {
+    for (const metric of this.metrics) {
+      const controlName = this.createControlName(metric.metricName);
+      this.controlNameByMetric.set(metric.metricName, controlName);
+
+      const validators = [Validators.required];
+      const min = this.getMin(metric);
+      if (min !== undefined) {
+        validators.push(Validators.min(min));
+      }
+      const max = this.getMax(metric);
+      if (max !== undefined) {
+        validators.push(Validators.max(max));
+      }
+
+      this.evalForm.addControl(controlName, this.fb.control(
+                                         metric.threshold,
+                                         validators));
+    }
+  }
+
+  private createControlName(metricName: string): string {
+    const sanitized = metricName.replace(/[^a-zA-Z0-9]/g, '_');
+    return `${sanitized}_threshold`;
   }
 
   onStart(): void {
     if (this.evalForm.valid) {
-      const {
-        tool_trajectory_avg_score_threshold,
-        response_match_score_threshold
-      } = this.evalForm.value;
-
-      for (const metric of this.evalMetrics) {
-        if (metric.metricName === 'tool_trajectory_avg_score') {
-          metric.threshold = tool_trajectory_avg_score_threshold;
-        } else if (metric.metricName === 'response_match_score') {
-          metric.threshold = response_match_score_threshold;
-        }
-      }
+      this.metrics = this.metrics.map((metric) => {
+        const controlName = this.getControlName(metric.metricName);
+        const value = this.evalForm.get(controlName)?.value;
+        return {
+          ...metric,
+          threshold: Number(value),
+        };
+      });
+
+      this.dialogRef.close(this.metrics);
+
+      return;
+    }
+
+    this.evalForm.markAllAsTouched();
+  }
+
+  protected hasError(metric: EvalMetricConfig): boolean {
+    const control = this.evalForm.get(this.getControlName(metric.metricName));
+    return !!control && control.invalid && (control.dirty || control.touched);
+  }
 
-      this.dialogRef.close(this.evalMetrics);
+  protected getErrorMessage(metric: EvalMetricConfig): string {
+    const control = this.evalForm.get(this.getControlName(metric.metricName));
+    if (!control || !control.errors) {
+      return '';
     }
+    if (control.errors['min']) {
+      const min = this.getMin(metric);
+      return `Minimum threshold is ${min}`;
+    }
+    if (control.errors['max']) {
+      const max = this.getMax(metric);
+      return `Maximum threshold is ${max}`;
+    }
+    if (control.errors['required']) {
+      return 'Threshold is required';
+    }
+    return 'Invalid threshold';
   }
 
-  onCancel(): void {
-    this.dialogRef.close(
-        null);  // Return null or undefined to indicate cancellation
+  protected formatRangeDescription(metric: EvalMetricConfig): string {
+    const min = this.getMin(metric);
+    const max = this.getMax(metric);
+    if (min === undefined && max === undefined) {
+      return '';
+    }
+    if (min !== undefined && max !== undefined) {
+      return `Range ${min} – ${max}`;
+    }
+    if (min !== undefined) {
+      return `≥ ${min}`;
+    }
+    if (max !== undefined) {
+      return `≤ ${max}`;
+    }
+    return '';
   }
+
+  protected formatStepDescription(metric: EvalMetricConfig): string {
+    const step = this.getStep(metric);
+    if (step === undefined) {
+      return '';
+    }
+    return `Step ${step}`;
+  }
+
+  onCancel(): void {
+    this.dialogRef.close(null);
+    }
 }

From c8889793e967e62840bd16c782f6456a5aa0729f Mon Sep 17 00:00:00 2001
From: Aki-07 <akileshramesh2003@gmail.com>
Date: Sat, 18 Oct 2025 14:50:00 +0530
Subject: [PATCH 4/4] feat(chat-panel): surface per-metric evaluation results

---
 .../chat-panel/chat-panel.component.html      | 24 ++++++++++++++
 .../chat-panel/chat-panel.component.scss      | 33 +++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/src/app/components/chat-panel/chat-panel.component.html b/src/app/components/chat-panel/chat-panel.component.html
index 97664029..e968992e 100644
--- a/src/app/components/chat-panel/chat-panel.component.html
+++ b/src/app/components/chat-panel/chat-panel.component.html
@@ -209,6 +209,30 @@
         }
       </div>
       }
+      @if (message.metricResults?.length) {
+      <div class="message-metric-results">
+        @for (metricResult of message.metricResults; track metricResult.metricName) {
+          <span
+            class="metric-result-chip"
+            [ngClass]="{
+              'metric-pass': metricResult.evalStatus === 1 || metricResult.evalStatus === 'PASSED',
+              'metric-fail': metricResult.evalStatus === 2 || metricResult.evalStatus === 'FAILED',
+              'metric-neutral': metricResult.evalStatus !== 1 && metricResult.evalStatus !== 2 && metricResult.evalStatus !== 'PASSED' && metricResult.evalStatus !== 'FAILED'
+            }"
+          >
+            <span class="metric-result-name">{{ metricResult.metricName }}</span>
+            @if (metricResult.score !== undefined) {
+              <span class="metric-result-score">Score: {{ metricResult.score }}</span>
+            }
+            @if (metricResult.threshold !== undefined) {
+              <span class="metric-result-threshold"
+                >Threshold: {{ metricResult.threshold }}</span
+              >
+            }
+          </span>
+        }
+      </div>
+      }
     </mat-card>
     } @if (message.functionCall) {
     <button
diff --git a/src/app/components/chat-panel/chat-panel.component.scss b/src/app/components/chat-panel/chat-panel.component.scss
index 0452163b..2f1899b0 100644
--- a/src/app/components/chat-panel/chat-panel.component.scss
+++ b/src/app/components/chat-panel/chat-panel.component.scss
@@ -151,6 +151,39 @@
   font-weight: 600;
 }
 
+.message-metric-results {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  margin-top: 10px;
+}
+
+.metric-result-chip {
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  border-radius: 12px;
+  padding: 4px 8px;
+  background-color: var(
+      --chat-panel-metric-result-chip-background,
+      rgba(255, 255, 255, 0.1)
+    );
+  font-size: 12px;
+  color: var(--chat-panel-metric-result-chip-color, #e8eaed);
+
+  &.metric-pass {
+    color: var(--chat-panel-metric-result-chip-pass-color, #1e8e3e);
+  }
+
+  &.metric-fail {
+    color: var(--chat-panel-metric-result-chip-fail-color, #d93025);
+  }
+
+  &.metric-neutral {
+    color: var(--chat-panel-metric-result-chip-neutral-color, #9aa0a6);
+  }
+}
+
 .eval-response-header {
   padding-bottom: 5px;
   border-bottom: 2px solid var(--chat-panel-eval-response-header-border-bottom-color);