diff --git a/docs.json b/docs.json index d55a955..540c61c 100644 --- a/docs.json +++ b/docs.json @@ -190,7 +190,10 @@ }, { "group": "Voice Simulation", - "pages": ["simulations/voice-simulation/voice-simulation"] + "pages": [ + "simulations/voice-simulation/voice-simulation", + "simulations/voice-simulation/simulation-runs" + ] } ] }, diff --git a/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/single-entry-result.png b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/single-entry-result.png new file mode 100644 index 0000000..331eec6 Binary files /dev/null and b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/single-entry-result.png differ diff --git a/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/trigger-voice-simulation-testrun.png b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/trigger-voice-simulation-testrun.png new file mode 100644 index 0000000..b1afc9e Binary files /dev/null and b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/trigger-voice-simulation-testrun.png differ diff --git a/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-agent-dataset.png b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-agent-dataset.png new file mode 100644 index 0000000..ebc9d23 Binary files /dev/null and b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-agent-dataset.png differ diff --git a/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-simulation-testrun-report.png b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-simulation-testrun-report.png new file mode 100644 index 0000000..2975912 Binary files /dev/null and b/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-simulation-testrun-report.png differ diff --git a/simulations/meta.json b/simulations/meta.json index c5d279d..9b2cad7 100644 --- a/simulations/meta.json +++ b/simulations/meta.json @@ -7,7 +7,7 @@ }, "voice-simulation": { "title": "Voice Simulation", - "pages": ["voice-simulation"] + "pages": ["voice-simulation", "simulation-runs"] } } } \ No newline at end of file diff --git a/simulations/voice-simulation/simulation-runs.mdx b/simulations/voice-simulation/simulation-runs.mdx new file mode 100644 index 0000000..2499a29 --- /dev/null +++ b/simulations/voice-simulation/simulation-runs.mdx @@ -0,0 +1,59 @@ +--- +title: Voice Simulation Runs +description: Test your Voice Agent's interaction capabilities with realistic voice simulations across thousands of scenarios. +--- + +## Test voice agents at scale with simulated conversations + +Run tests with datasets containing multiple scenarios for your voice agent to evaluate performance across different situations. + + + + +Configure your agent dataset template with: +- **Agent scenarios**: Define specific situations for testing (e.g., "Update address", "Order an iPhone") +- **Expected steps**: List the actions and responses you expect + +![Voice Agent Dataset](/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-agent-dataset.png) + + + +- Navigate to your voice agent and click **Test** +- **Simulated session** mode will be pre-selected (voice agents can't be tested in single-turn mode) +- Select your agent dataset from the dropdown +- Choose relevant evaluators + + + Only built-in evaluators are currently supported for voice simulation runs. Custom evaluators will be available soon. + + +![Configure simulation test run](/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/trigger-voice-simulation-testrun.png) + + + + +Click **Trigger test run** to start. The system will call your voice agent and simulate conversations for each scenario. + + + +Each session runs end-to-end for thorough evaluation: +- View detailed results for every scenario +- Text-based evaluators assess turn-by-turn call transcription +- Audio-based evaluators analyze the call recording + +![Simulation test run result](/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/voice-simulation-testrun-report.png) + + + +Click any entry to see detailed results for that specific scenario. + +By default, test runs evaluate these performance metrics from the recording audio file: +- **Avg latency**: How long the agent took to respond +- **Talk ratio**: Agent talk time compared to simulation agent talk time +- **Avg pitch**: The average pitch of the agent's responses +- **Words per minute**: The agent's speech rate + +![Simulation test run entry](/images/docs/evaluate/how-to/evaluate-workflows-via-api-endpoint/evaluate-simulated-voice-agent/single-entry-result.png) + + + \ No newline at end of file