firebase · HunterHeston · Dec 11, 2024 · Nov 20, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/.gitignore b/.gitignore
@@ -20,6 +20,7 @@ js/testapps/firebase-functions-sample1/.firebase
 js/testapps/firebase-functions-sample1/.firebaserc
 js/testapps/firebase-functions-sample1/public/bundle.js
 js/testapps/firebase-functions-sample1/public/config.js
+.genkit
 js/**/.genkit
 samples/**/.genkit
 go/**/.genkit

diff --git a/js/plugins/checks/README.md b/js/plugins/checks/README.md
@@ -81,8 +81,32 @@ Create a JSON file with the data you want to test. Add as many test cases as you
 ### Run the evaluators
 
 ```bash
+# Run all configured classifiers.
+genkit eval:run test-dataset.json --evaluators=checks/all_metrics
+
 # Run just the DANGEROUS_CONTENT classifier.
 genkit eval:run test-dataset.json --evaluators=checks/dangerous_content
+
+# Run just the HARASSMENT classifier.
+genkit eval:run test-dataset.json --evaluators=checks/harassment
+
+# Run just the HATE_SPEECH classifier.
+genkit eval:run test-dataset.json --evaluators=checks/hate_speech
+
+# Run just the MEDICAL_INFO classifier.
+genkit eval:run test-dataset.json --evaluators=checks/medical_info
+
+# Run just the OBSCENITY_AND_PROFANITY classifier.
+genkit eval:run test-dataset.json --evaluators=checks/obscenity_and_profanity
+
+# Run just the PII_SOLICITING_RECITING classifier.
+genkit eval:run test-dataset.json --evaluators=checks/pii_soliciting_reciting
+
+# Run just the SEXUALLY_EXPLICIT classifier.
+genkit eval:run test-dataset.json --evaluators=checks/sexually_explicit
+
+# Run just the VIOLENCE_AND_GORE classifier.
+genkit eval:run test-dataset.json --evaluators=checks/violence_and_gore
 ```
 
 ```bash

diff --git a/js/plugins/checks/src/evaluation.ts b/js/plugins/checks/src/evaluation.ts
@@ -77,10 +77,22 @@ export function checksEvaluators(
     }
   );
 
+  // Individual evaluators, one per configured metric.
   const evaluators = policy_configs.map((policy_config) => {
-    return createPolicyEvaluator(projectId, auth, ai, policy_config);
+    return createPolicyEvaluator(
+      projectId,
+      auth,
+      ai,
+      [policy_config],
+      policy_config.type as string
+    );
   });
 
+  // Single evaluator instnace with all configured policies.
+  evaluators.push(
+    createPolicyEvaluator(projectId, auth, ai, policy_configs, 'all_metrics')
+  );
+
   return evaluators;
 }
 
@@ -104,15 +116,14 @@ function createPolicyEvaluator(
   projectId: string,
   auth: GoogleAuth,
   ai: Genkit,
-  policy_config: ChecksEvaluationMetricConfig
+  policy_config: ChecksEvaluationMetricConfig[],
+  name: string
 ): EvaluatorAction {
-  const policyType = policy_config.type as string;
-
   return ai.defineEvaluator(
     {
-      name: `checks/${policyType.toLowerCase()}`,
-      displayName: policyType,
-      definition: `Evaluates text against the Checks ${policyType} policy.`,
+      name: `checks/${name.toLowerCase()}`,
+      displayName: name,
+      definition: `Evaluates text against the Checks ${name} policy.`,
     },
     async (datapoint: BaseEvalDataPoint) => {
       const partialRequest = {
@@ -121,10 +132,12 @@ function createPolicyEvaluator(
             content: datapoint.output as string,
           },
         },
-        policies: {
-          policy_type: policy_config.type,
-          threshold: policy_config.threshold,
-        },
+        policies: policy_config.map((config) => {
+          return {
+            policy_type: config.type,
+            threshold: config.threshold,
+          };
+        }),
       };
 
       const response = await checksEvalInstance(
@@ -134,13 +147,18 @@ function createPolicyEvaluator(
         ResponseSchema
       );
 
-      return {
-        evaluation: {
-          score: response.policyResults[0].score,
+      const evaluationResults = response.policyResults.map((result) => {
+        return {
+          id: result.policyType,
+          score: result.score,
           details: {
-            reasoning: response.policyResults[0].violationResult,
+            reasoning: `Status ${result.violationResult}`,
           },
-        },
+        };
+      });
+
+      return {
+        evaluation: evaluationResults,
         testCaseId: datapoint.testCaseId,
       };
     }