chore(ci): switch from gpt-4 to gpt-5 for default evals agent (#518)

Cali0707 · web-flow · commit a7528b6976e6 · 2025-11-28T14:56:06.000+01:00
* chore: switch from gpt-4 to gpt-5 for default evals agent

Signed-off-by: Calum Murray &lt;cmurray@redhat.com&gt;

* cleanup: address review comments

Signed-off-by: Calum Murray &lt;cmurray@redhat.com&gt;

---------

Signed-off-by: Calum Murray &lt;cmurray@redhat.com&gt;
diff --git a/.github/workflows/gevals.yaml b/.github/workflows/gevals.yaml
@@ -120,11 +120,10 @@ jobs:
           # OpenAI Agent configuration
           MODEL_BASE_URL: ${{ secrets.MODEL_BASE_URL }}
           MODEL_KEY: ${{ secrets.MODEL_KEY }}
-          MODEL_NAME: ${{ secrets.MODEL_NAME }}
           # LLM Judge configuration
           JUDGE_BASE_URL: ${{ secrets.JUDGE_BASE_URL }}
           JUDGE_API_KEY: ${{ secrets.JUDGE_API_KEY }}
-          JUDGE_MODEL_NAME: ${{ secrets.JUDGE_MODEL_NAME }}
+          JUDGE_MODEL_NAME: ${{ secrets.JUDGE_MODEL_NAME }} # we still need this one, as only the agent model is specified in yaml
 
       - name: Cleanup
         if: always()
diff --git a/evals/README.md b/evals/README.md
@@ -62,7 +62,6 @@ The tasks and MCP configuration are shared - only the agent configuration differ
 # Set your model credentials
 export MODEL_BASE_URL='https://your-api-endpoint.com/v1'
 export MODEL_KEY='your-api-key'
-export MODEL_NAME='your-model-name'
 
 # Run the test
 ./gevals eval examples/kube-mcp-server/openai-agent/eval.yaml
diff --git a/evals/openai-agent/agent.yaml b/evals/openai-agent/agent.yaml
@@ -3,7 +3,7 @@ metadata:
   name: "openai-agent"
 builtin:
   type: "openai-agent"
-  model: "gpt-4"  # Change to your model
+  model: "gpt-5"  # Change to your model
 # Before running, set environment variables:
 #   export MODEL_BASE_URL="https://api.openai.com/v1"
 #   export MODEL_KEY="sk-..."
diff --git a/evals/openai-agent/eval-inline.yaml b/evals/openai-agent/eval-inline.yaml
@@ -5,7 +5,7 @@ config:
   # Inline agent configuration - no separate agent.yaml file needed
   agent:
     type: "builtin.openai-agent"
-    model: "gpt-4"  # Change to your model
+    model: "gpt-5"  # Change to your model
   # Before running, set environment variables:
   #   export MODEL_BASE_URL="https://api.openai.com/v1"
   #   export MODEL_KEY="sk-..."