diff --git a/dotnet/test/SessionTests.cs b/dotnet/test/SessionTests.cs index eac00b06e..c202bc00b 100644 --- a/dotnet/test/SessionTests.cs +++ b/dotnet/test/SessionTests.cs @@ -167,6 +167,11 @@ public async Task Should_Resume_A_Session_Using_The_Same_Client() var answer2 = await TestHelper.GetFinalAssistantMessageAsync(session2); Assert.NotNull(answer2); Assert.Contains("2", answer2!.Data.Content ?? string.Empty); + + // Can continue the conversation statefully + var answer3 = await session2.SendAndWaitAsync(new MessageOptions { Prompt = "Now if you double that, what do you get?" }); + Assert.NotNull(answer3); + Assert.Contains("4", answer3!.Data.Content ?? string.Empty); } [Fact] @@ -187,6 +192,11 @@ public async Task Should_Resume_A_Session_Using_A_New_Client() var messages = await session2.GetMessagesAsync(); Assert.Contains(messages, m => m is UserMessageEvent); Assert.Contains(messages, m => m is SessionResumeEvent); + + // Can continue the conversation statefully + var answer2 = await session2.SendAndWaitAsync(new MessageOptions { Prompt = "Now if you double that, what do you get?" }); + Assert.NotNull(answer2); + Assert.Contains("4", answer2!.Data.Content ?? string.Empty); } [Fact] @@ -231,68 +241,6 @@ await session.SendAsync(new MessageOptions Assert.Contains("4", answer!.Data.Content ?? string.Empty); } - // TODO: This test requires the session-events.schema.json to include assistant.message_delta. - // The CLI v0.0.376 emits delta events at runtime, but the schema hasn't been updated yet. - // Once the schema is updated and types are regenerated, this test can be enabled. - [Fact(Skip = "Requires schema update for AssistantMessageDeltaEvent type")] - public async Task Should_Receive_Streaming_Delta_Events_When_Streaming_Is_Enabled() - { - var session = await CreateSessionAsync(new SessionConfig { Streaming = true }); - - var deltaContents = new List(); - var doneEvent = new TaskCompletionSource(); - - session.On(evt => - { - switch (evt) - { - // TODO: Uncomment once AssistantMessageDeltaEvent is generated - // case AssistantMessageDeltaEvent delta: - // if (!string.IsNullOrEmpty(delta.Data.DeltaContent)) - // deltaContents.Add(delta.Data.DeltaContent); - // break; - case SessionIdleEvent: - doneEvent.TrySetResult(true); - break; - } - }); - - await session.SendAsync(new MessageOptions { Prompt = "What is 2+2?" }); - - // Wait for completion - var completed = await Task.WhenAny(doneEvent.Task, Task.Delay(TimeSpan.FromSeconds(60))); - Assert.Equal(doneEvent.Task, completed); - - // Should have received delta events - Assert.NotEmpty(deltaContents); - - // Get the final message to compare - var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); - Assert.NotNull(assistantMessage); - - // Accumulated deltas should equal the final message - var accumulated = string.Join("", deltaContents); - Assert.Equal(assistantMessage!.Data.Content, accumulated); - - // Final message should contain the answer - Assert.Contains("4", assistantMessage.Data.Content ?? string.Empty); - } - - [Fact] - public async Task Should_Pass_Streaming_Option_To_Session_Creation() - { - // Verify that the streaming option is accepted without errors - var session = await CreateSessionAsync(new SessionConfig { Streaming = true }); - - Assert.Matches(@"^[a-f0-9-]+$", session.SessionId); - - // Session should still work normally - await session.SendAsync(new MessageOptions { Prompt = "What is 1+1?" }); - var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); - Assert.NotNull(assistantMessage); - Assert.Contains("2", assistantMessage!.Data.Content); - } - [Fact] public async Task Should_Receive_Session_Events() { diff --git a/dotnet/test/StreamingFidelityTests.cs b/dotnet/test/StreamingFidelityTests.cs new file mode 100644 index 000000000..c38cb1545 --- /dev/null +++ b/dotnet/test/StreamingFidelityTests.cs @@ -0,0 +1,99 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using GitHub.Copilot.SDK.Test.Harness; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test; + +public class StreamingFidelityTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "streaming_fidelity", output) +{ + [Fact] + public async Task Should_Produce_Delta_Events_When_Streaming_Is_Enabled() + { + var session = await CreateSessionAsync(new SessionConfig { Streaming = true }); + + var events = new List(); + session.On(evt => events.Add(evt)); + + await session.SendAndWaitAsync(new MessageOptions { Prompt = "Count from 1 to 5, separated by commas." }); + + var types = events.Select(e => e.Type).ToList(); + + // Should have streaming deltas before the final message + var deltaEvents = events.OfType().ToList(); + Assert.NotEmpty(deltaEvents); + + // Deltas should have content + foreach (var delta in deltaEvents) + { + Assert.False(string.IsNullOrEmpty(delta.Data.DeltaContent)); + } + + // Should still have a final assistant.message + Assert.Contains("assistant.message", types); + + // Deltas should come before the final message + var firstDeltaIdx = types.IndexOf("assistant.message_delta"); + var lastAssistantIdx = types.LastIndexOf("assistant.message"); + Assert.True(firstDeltaIdx < lastAssistantIdx); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Should_Not_Produce_Deltas_When_Streaming_Is_Disabled() + { + var session = await CreateSessionAsync(new SessionConfig { Streaming = false }); + + var events = new List(); + session.On(evt => events.Add(evt)); + + await session.SendAndWaitAsync(new MessageOptions { Prompt = "Say 'hello world'." }); + + var deltaEvents = events.OfType().ToList(); + + // No deltas when streaming is off + Assert.Empty(deltaEvents); + + // But should still have a final assistant.message + var assistantEvents = events.OfType().ToList(); + Assert.NotEmpty(assistantEvents); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Should_Produce_Deltas_After_Session_Resume() + { + var session = await CreateSessionAsync(new SessionConfig { Streaming = false }); + await session.SendAndWaitAsync(new MessageOptions { Prompt = "What is 3 + 6?" }); + await session.DisposeAsync(); + + // Resume using a new client + using var newClient = Ctx.CreateClient(); + var session2 = await newClient.ResumeSessionAsync(session.SessionId, + new ResumeSessionConfig { OnPermissionRequest = PermissionHandler.ApproveAll, Streaming = true }); + + var events = new List(); + session2.On(evt => events.Add(evt)); + + var answer = await session2.SendAndWaitAsync(new MessageOptions { Prompt = "Now if you double that, what do you get?" }); + Assert.NotNull(answer); + Assert.Contains("18", answer!.Data.Content ?? string.Empty); + + // Should have streaming deltas before the final message + var deltaEvents = events.OfType().ToList(); + Assert.NotEmpty(deltaEvents); + + // Deltas should have content + foreach (var delta in deltaEvents) + { + Assert.False(string.IsNullOrEmpty(delta.Data.DeltaContent)); + } + + await session2.DisposeAsync(); + } +} diff --git a/go/internal/e2e/session_test.go b/go/internal/e2e/session_test.go index f04307c2d..0c50ba8d9 100644 --- a/go/internal/e2e/session_test.go +++ b/go/internal/e2e/session_test.go @@ -368,6 +368,15 @@ func TestSession(t *testing.T) { if answer2.Data.Content == nil || !strings.Contains(*answer2.Data.Content, "2") { t.Errorf("Expected resumed session answer to contain '2', got %v", answer2.Data.Content) } + + // Can continue the conversation statefully + answer3, err := session2.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Now if you double that, what do you get?"}) + if err != nil { + t.Fatalf("Failed to send follow-up message: %v", err) + } + if answer3 == nil || answer3.Data.Content == nil || !strings.Contains(*answer3.Data.Content, "4") { + t.Errorf("Expected follow-up answer to contain '4', got %v", answer3) + } }) t.Run("should resume a session using a new client", func(t *testing.T) { @@ -432,6 +441,15 @@ func TestSession(t *testing.T) { if !hasSessionResume { t.Error("Expected messages to contain 'session.resume'") } + + // Can continue the conversation statefully + answer3, err := session2.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Now if you double that, what do you get?"}) + if err != nil { + t.Fatalf("Failed to send follow-up message: %v", err) + } + if answer3 == nil || answer3.Data.Content == nil || !strings.Contains(*answer3.Data.Content, "4") { + t.Errorf("Expected follow-up answer to contain '4', got %v", answer3) + } }) t.Run("should throw error when resuming non-existent session", func(t *testing.T) { @@ -565,99 +583,6 @@ func TestSession(t *testing.T) { } }) - t.Run("should receive streaming delta events when streaming is enabled", func(t *testing.T) { - ctx.ConfigureForTest(t) - - session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ - OnPermissionRequest: copilot.PermissionHandler.ApproveAll, - Streaming: true, - }) - if err != nil { - t.Fatalf("Failed to create session with streaming: %v", err) - } - - var deltaContents []string - done := make(chan bool) - - session.On(func(event copilot.SessionEvent) { - switch event.Type { - case "assistant.message_delta": - if event.Data.DeltaContent != nil { - deltaContents = append(deltaContents, *event.Data.DeltaContent) - } - case "session.idle": - close(done) - } - }) - - _, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What is 2+2?"}) - if err != nil { - t.Fatalf("Failed to send message: %v", err) - } - - // Wait for completion - select { - case <-done: - case <-time.After(60 * time.Second): - t.Fatal("Timed out waiting for session.idle") - } - - // Should have received delta events - if len(deltaContents) == 0 { - t.Error("Expected to receive delta events, got none") - } - - // Get the final message to compare - assistantMessage, err := testharness.GetFinalAssistantMessage(t.Context(), session) - if err != nil { - t.Fatalf("Failed to get assistant message: %v", err) - } - - // Accumulated deltas should equal the final message - accumulated := strings.Join(deltaContents, "") - if assistantMessage.Data.Content != nil && accumulated != *assistantMessage.Data.Content { - t.Errorf("Accumulated deltas don't match final message.\nAccumulated: %q\nFinal: %q", accumulated, *assistantMessage.Data.Content) - } - - // Final message should contain the answer - if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content, "4") { - t.Errorf("Expected assistant message to contain '4', got %v", assistantMessage.Data.Content) - } - }) - - t.Run("should pass streaming option to session creation", func(t *testing.T) { - ctx.ConfigureForTest(t) - - // Verify that the streaming option is accepted without errors - session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ - OnPermissionRequest: copilot.PermissionHandler.ApproveAll, - Streaming: true, - }) - if err != nil { - t.Fatalf("Failed to create session with streaming: %v", err) - } - - matched, _ := regexp.MatchString(`^[a-f0-9-]+$`, session.SessionID) - if !matched { - t.Errorf("Expected session ID to match UUID pattern, got %q", session.SessionID) - } - - // Session should still work normally - _, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What is 1+1?"}) - if err != nil { - t.Fatalf("Failed to send message: %v", err) - } - - assistantMessage, err := testharness.GetFinalAssistantMessage(t.Context(), session) - if err != nil { - t.Fatalf("Failed to get assistant message: %v", err) - } - - if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content, "2") { - t.Errorf("Expected assistant message to contain '2', got %v", assistantMessage.Data.Content) - } - }) - t.Run("should receive session events", func(t *testing.T) { ctx.ConfigureForTest(t) diff --git a/go/internal/e2e/streaming_fidelity_test.go b/go/internal/e2e/streaming_fidelity_test.go new file mode 100644 index 000000000..ef76c3d8b --- /dev/null +++ b/go/internal/e2e/streaming_fidelity_test.go @@ -0,0 +1,186 @@ +package e2e + +import ( + "strings" + "testing" + + copilot "github.com/github/copilot-sdk/go" + "github.com/github/copilot-sdk/go/internal/e2e/testharness" +) + +func TestStreamingFidelity(t *testing.T) { + ctx := testharness.NewTestContext(t) + client := ctx.NewClient() + t.Cleanup(func() { client.ForceStop() }) + + t.Run("should produce delta events when streaming is enabled", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: true, + }) + if err != nil { + t.Fatalf("Failed to create session with streaming: %v", err) + } + + var events []copilot.SessionEvent + session.On(func(event copilot.SessionEvent) { + events = append(events, event) + }) + + _, err = session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Count from 1 to 5, separated by commas."}) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + // Should have streaming deltas before the final message + var deltaEvents []copilot.SessionEvent + for _, e := range events { + if e.Type == "assistant.message_delta" { + deltaEvents = append(deltaEvents, e) + } + } + if len(deltaEvents) < 1 { + t.Error("Expected at least 1 delta event") + } + + // Deltas should have content + for _, delta := range deltaEvents { + if delta.Data.DeltaContent == nil { + t.Error("Expected delta to have content") + } + } + + // Should still have a final assistant.message + hasAssistantMessage := false + for _, e := range events { + if e.Type == "assistant.message" { + hasAssistantMessage = true + break + } + } + if !hasAssistantMessage { + t.Error("Expected a final assistant.message event") + } + + // Deltas should come before the final message + firstDeltaIdx := -1 + lastAssistantIdx := -1 + for i, e := range events { + if e.Type == "assistant.message_delta" && firstDeltaIdx == -1 { + firstDeltaIdx = i + } + if e.Type == "assistant.message" { + lastAssistantIdx = i + } + } + if firstDeltaIdx >= lastAssistantIdx { + t.Errorf("Expected deltas before final message, got delta at %d, message at %d", firstDeltaIdx, lastAssistantIdx) + } + }) + + t.Run("should not produce deltas when streaming is disabled", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: false, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + var events []copilot.SessionEvent + session.On(func(event copilot.SessionEvent) { + events = append(events, event) + }) + + _, err = session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Say 'hello world'."}) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + // No deltas when streaming is off + var deltaEvents []copilot.SessionEvent + for _, e := range events { + if e.Type == "assistant.message_delta" { + deltaEvents = append(deltaEvents, e) + } + } + if len(deltaEvents) != 0 { + t.Errorf("Expected no delta events, got %d", len(deltaEvents)) + } + + // But should still have a final assistant.message + var assistantEvents []copilot.SessionEvent + for _, e := range events { + if e.Type == "assistant.message" { + assistantEvents = append(assistantEvents, e) + } + } + if len(assistantEvents) < 1 { + t.Error("Expected at least 1 assistant.message event") + } + }) + + t.Run("should produce deltas after session resume", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: false, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + _, err = session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "What is 3 + 6?"}) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + // Resume using a new client + newClient := ctx.NewClient() + defer newClient.ForceStop() + + session2, err := newClient.ResumeSession(t.Context(), session.SessionID, &copilot.ResumeSessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: true, + }) + if err != nil { + t.Fatalf("Failed to resume session: %v", err) + } + + var events []copilot.SessionEvent + session2.On(func(event copilot.SessionEvent) { + events = append(events, event) + }) + + answer, err := session2.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Now if you double that, what do you get?"}) + if err != nil { + t.Fatalf("Failed to send follow-up message: %v", err) + } + if answer == nil || answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "18") { + t.Errorf("Expected answer to contain '18', got %v", answer) + } + + // Should have streaming deltas before the final message + var deltaEvents []copilot.SessionEvent + for _, e := range events { + if e.Type == "assistant.message_delta" { + deltaEvents = append(deltaEvents, e) + } + } + if len(deltaEvents) < 1 { + t.Error("Expected at least 1 delta event") + } + + // Deltas should have content + for _, delta := range deltaEvents { + if delta.Data.DeltaContent == nil { + t.Error("Expected delta to have content") + } + } + }) +} diff --git a/nodejs/test/e2e/session.test.ts b/nodejs/test/e2e/session.test.ts index 93731d617..7a7a6d3a0 100644 --- a/nodejs/test/e2e/session.test.ts +++ b/nodejs/test/e2e/session.test.ts @@ -175,6 +175,12 @@ describe("Sessions", async () => { const messages = await session2.getMessages(); const assistantMessages = messages.filter((m) => m.type === "assistant.message"); expect(assistantMessages[assistantMessages.length - 1].data.content).toContain("2"); + + // Can continue the conversation statefully + const secondAssistantMessage = await session2.sendAndWait({ + prompt: "Now if you double that, what do you get?", + }); + expect(secondAssistantMessage?.data.content).toContain("4"); }); it("should resume a session using a new client", async () => { @@ -202,6 +208,12 @@ describe("Sessions", async () => { const messages = await session2.getMessages(); expect(messages).toContainEqual(expect.objectContaining({ type: "user.message" })); expect(messages).toContainEqual(expect.objectContaining({ type: "session.resume" })); + + // Can continue the conversation statefully + const secondAssistantMessage = await session2.sendAndWait({ + prompt: "Now if you double that, what do you get?", + }); + expect(secondAssistantMessage?.data.content).toContain("4"); }); it("should throw error when resuming non-existent session", async () => { @@ -284,56 +296,6 @@ describe("Sessions", async () => { expect(answer?.data.content).toContain("4"); }); - it("should receive streaming delta events when streaming is enabled", async () => { - const session = await client.createSession({ - onPermissionRequest: approveAll, - streaming: true, - }); - - const deltaContents: string[] = []; - let _finalMessage: string | undefined; - - // Set up event listener before sending - const unsubscribe = session.on((event) => { - if (event.type === "assistant.message_delta") { - const delta = (event.data as { deltaContent?: string }).deltaContent; - if (delta) { - deltaContents.push(delta); - } - } else if (event.type === "assistant.message") { - _finalMessage = event.data.content; - } - }); - - const assistantMessage = await session.sendAndWait({ prompt: "What is 2+2?" }); - - unsubscribe(); - - // Should have received delta events - expect(deltaContents.length).toBeGreaterThan(0); - - // Accumulated deltas should equal the final message - const accumulated = deltaContents.join(""); - expect(accumulated).toBe(assistantMessage?.data.content); - - // Final message should contain the answer - expect(assistantMessage?.data.content).toContain("4"); - }); - - it("should pass streaming option to session creation", async () => { - // Verify that the streaming option is accepted without errors - const session = await client.createSession({ - onPermissionRequest: approveAll, - streaming: true, - }); - - expect(session.sessionId).toMatch(/^[a-f0-9-]+$/); - - // Session should still work normally - const assistantMessage = await session.sendAndWait({ prompt: "What is 1+1?" }); - expect(assistantMessage?.data.content).toContain("2"); - }); - it("should receive session events", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); const receivedEvents: Array<{ type: string }> = []; diff --git a/nodejs/test/e2e/streaming_fidelity.test.ts b/nodejs/test/e2e/streaming_fidelity.test.ts index a5a2ead26..736c9313d 100644 --- a/nodejs/test/e2e/streaming_fidelity.test.ts +++ b/nodejs/test/e2e/streaming_fidelity.test.ts @@ -2,12 +2,12 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ -import { describe, expect, it } from "vitest"; -import { SessionEvent, approveAll } from "../../src/index.js"; -import { createSdkTestContext } from "./harness/sdkTestContext"; +import { describe, expect, it, onTestFinished } from "vitest"; +import { CopilotClient, SessionEvent, approveAll } from "../../src/index.js"; +import { createSdkTestContext, isCI } from "./harness/sdkTestContext"; describe("Streaming Fidelity", async () => { - const { copilotClient: client } = await createSdkTestContext(); + const { copilotClient: client, env } = await createSdkTestContext(); it("should produce delta events when streaming is enabled", async () => { const session = await client.createSession({ @@ -71,4 +71,43 @@ describe("Streaming Fidelity", async () => { await session.destroy(); }); + + it("should produce deltas after session resume", async () => { + const session = await client.createSession({ + onPermissionRequest: approveAll, + streaming: false, + }); + await session.sendAndWait({ prompt: "What is 3 + 6?" }); + await session.destroy(); + + // Resume using a new client + const newClient = new CopilotClient({ + env, + githubToken: isCI ? "fake-token-for-e2e-tests" : undefined, + }); + onTestFinished(() => newClient.forceStop()); + const session2 = await newClient.resumeSession(session.sessionId, { + onPermissionRequest: approveAll, + streaming: true, + }); + const events: SessionEvent[] = []; + session2.on((event) => events.push(event)); + + const secondAssistantMessage = await session2.sendAndWait({ + prompt: "Now if you double that, what do you get?", + }); + expect(secondAssistantMessage?.data.content).toContain("18"); + + // Should have streaming deltas before the final message + const deltaEvents = events.filter((e) => e.type === "assistant.message_delta"); + expect(deltaEvents.length).toBeGreaterThanOrEqual(1); + + // Deltas should have content + for (const delta of deltaEvents) { + expect(delta.data.deltaContent).toBeDefined(); + expect(typeof delta.data.deltaContent).toBe("string"); + } + + await session2.destroy(); + }); }); diff --git a/python/e2e/test_session.py b/python/e2e/test_session.py index 13e749507..e6e4b303c 100644 --- a/python/e2e/test_session.py +++ b/python/e2e/test_session.py @@ -172,6 +172,13 @@ async def test_should_resume_a_session_using_the_same_client(self, ctx: E2ETestC answer2 = await get_final_assistant_message(session2) assert "2" in answer2.data.content + # Can continue the conversation statefully + answer3 = await session2.send_and_wait( + {"prompt": "Now if you double that, what do you get?"} + ) + assert answer3 is not None + assert "4" in answer3.data.content + async def test_should_resume_a_session_using_a_new_client(self, ctx: E2ETestContext): # Create initial session session1 = await ctx.client.create_session( @@ -201,13 +208,17 @@ async def test_should_resume_a_session_using_a_new_client(self, ctx: E2ETestCont ) assert session2.session_id == session_id - # TODO: There's an inconsistency here. When resuming with a new client, - # we don't see the session.idle message in the history, which means we - # can't use get_final_assistant_message. messages = await session2.get_messages() message_types = [m.type.value for m in messages] assert "user.message" in message_types assert "session.resume" in message_types + + # Can continue the conversation statefully + answer2 = await session2.send_and_wait( + {"prompt": "Now if you double that, what do you get?"} + ) + assert answer2 is not None + assert "4" in answer2.data.content finally: await new_client.force_stop() @@ -419,65 +430,6 @@ async def test_should_abort_a_session(self, ctx: E2ETestContext): answer = await session.send_and_wait({"prompt": "What is 2+2?"}) assert "4" in answer.data.content - async def test_should_receive_streaming_delta_events_when_streaming_is_enabled( - self, ctx: E2ETestContext - ): - import asyncio - - session = await ctx.client.create_session( - {"streaming": True, "on_permission_request": PermissionHandler.approve_all} - ) - - delta_contents = [] - done_event = asyncio.Event() - - def on_event(event): - if event.type.value == "assistant.message_delta": - delta = getattr(event.data, "delta_content", None) - if delta: - delta_contents.append(delta) - elif event.type.value == "session.idle": - done_event.set() - - session.on(on_event) - - await session.send({"prompt": "What is 2+2?"}) - - # Wait for completion - try: - await asyncio.wait_for(done_event.wait(), timeout=60) - except TimeoutError: - pytest.fail("Timed out waiting for session.idle") - - # Should have received delta events - assert len(delta_contents) > 0, "Expected to receive delta events" - - # Get the final message to compare - assistant_message = await get_final_assistant_message(session) - - # Accumulated deltas should equal the final message - accumulated = "".join(delta_contents) - assert accumulated == assistant_message.data.content, ( - f"Accumulated deltas don't match final message.\n" - f"Accumulated: {accumulated!r}\nFinal: {assistant_message.data.content!r}" - ) - - # Final message should contain the answer - assert "4" in assistant_message.data.content - - async def test_should_pass_streaming_option_to_session_creation(self, ctx: E2ETestContext): - # Verify that the streaming option is accepted without errors - session = await ctx.client.create_session( - {"streaming": True, "on_permission_request": PermissionHandler.approve_all} - ) - - assert session.session_id - - # Session should still work normally - await session.send({"prompt": "What is 1+1?"}) - assistant_message = await get_final_assistant_message(session) - assert "2" in assistant_message.data.content - async def test_should_receive_session_events(self, ctx: E2ETestContext): import asyncio diff --git a/python/e2e/test_streaming_fidelity.py b/python/e2e/test_streaming_fidelity.py new file mode 100644 index 000000000..bca24753e --- /dev/null +++ b/python/e2e/test_streaming_fidelity.py @@ -0,0 +1,114 @@ +"""E2E Streaming Fidelity Tests""" + +import os + +import pytest + +from copilot import CopilotClient, PermissionHandler + +from .testharness import E2ETestContext + +pytestmark = pytest.mark.asyncio(loop_scope="module") + + +class TestStreamingFidelity: + async def test_should_produce_delta_events_when_streaming_is_enabled(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + {"streaming": True, "on_permission_request": PermissionHandler.approve_all} + ) + + events = [] + session.on(lambda event: events.append(event)) + + await session.send_and_wait({"prompt": "Count from 1 to 5, separated by commas."}) + + types = [e.type.value for e in events] + + # Should have streaming deltas before the final message + delta_events = [e for e in events if e.type.value == "assistant.message_delta"] + assert len(delta_events) >= 1 + + # Deltas should have content + for delta in delta_events: + delta_content = getattr(delta.data, "delta_content", None) + assert delta_content is not None + assert isinstance(delta_content, str) + + # Should still have a final assistant.message + assert "assistant.message" in types + + # Deltas should come before the final message + first_delta_idx = types.index("assistant.message_delta") + last_assistant_idx = len(types) - 1 - types[::-1].index("assistant.message") + assert first_delta_idx < last_assistant_idx + + await session.destroy() + + async def test_should_not_produce_deltas_when_streaming_is_disabled(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + {"streaming": False, "on_permission_request": PermissionHandler.approve_all} + ) + + events = [] + session.on(lambda event: events.append(event)) + + await session.send_and_wait({"prompt": "Say 'hello world'."}) + + delta_events = [e for e in events if e.type.value == "assistant.message_delta"] + + # No deltas when streaming is off + assert len(delta_events) == 0 + + # But should still have a final assistant.message + assistant_events = [e for e in events if e.type.value == "assistant.message"] + assert len(assistant_events) >= 1 + + await session.destroy() + + async def test_should_produce_deltas_after_session_resume(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + {"streaming": False, "on_permission_request": PermissionHandler.approve_all} + ) + await session.send_and_wait({"prompt": "What is 3 + 6?"}) + await session.destroy() + + # Resume using a new client + github_token = ( + "fake-token-for-e2e-tests" if os.environ.get("GITHUB_ACTIONS") == "true" else None + ) + new_client = CopilotClient( + { + "cli_path": ctx.cli_path, + "cwd": ctx.work_dir, + "env": ctx.get_env(), + "github_token": github_token, + } + ) + + try: + session2 = await new_client.resume_session( + session.session_id, + {"streaming": True, "on_permission_request": PermissionHandler.approve_all}, + ) + events = [] + session2.on(lambda event: events.append(event)) + + answer = await session2.send_and_wait( + {"prompt": "Now if you double that, what do you get?"} + ) + assert answer is not None + assert "18" in answer.data.content + + # Should have streaming deltas before the final message + delta_events = [e for e in events if e.type.value == "assistant.message_delta"] + assert len(delta_events) >= 1 + + # Deltas should have content + for delta in delta_events: + delta_content = getattr(delta.data, "delta_content", None) + assert delta_content is not None + assert isinstance(delta_content, str) + + await session2.destroy() + finally: + await new_client.force_stop() diff --git a/test/snapshots/session/should_pass_streaming_option_to_session_creation.yaml b/test/snapshots/session/should_pass_streaming_option_to_session_creation.yaml deleted file mode 100644 index 250402101..000000000 --- a/test/snapshots/session/should_pass_streaming_option_to_session_creation.yaml +++ /dev/null @@ -1,10 +0,0 @@ -models: - - claude-sonnet-4.5 -conversations: - - messages: - - role: system - content: ${system} - - role: user - content: What is 1+1? - - role: assistant - content: 1 + 1 = 2 diff --git a/test/snapshots/session/should_receive_streaming_delta_events_when_streaming_is_enabled.yaml b/test/snapshots/session/should_receive_streaming_delta_events_when_streaming_is_enabled.yaml deleted file mode 100644 index 9fe2fcd07..000000000 --- a/test/snapshots/session/should_receive_streaming_delta_events_when_streaming_is_enabled.yaml +++ /dev/null @@ -1,10 +0,0 @@ -models: - - claude-sonnet-4.5 -conversations: - - messages: - - role: system - content: ${system} - - role: user - content: What is 2+2? - - role: assistant - content: 2 + 2 = 4 diff --git a/test/snapshots/session/should_resume_a_session_using_a_new_client.yaml b/test/snapshots/session/should_resume_a_session_using_a_new_client.yaml index 250402101..bd0285837 100644 --- a/test/snapshots/session/should_resume_a_session_using_a_new_client.yaml +++ b/test/snapshots/session/should_resume_a_session_using_a_new_client.yaml @@ -8,3 +8,7 @@ conversations: content: What is 1+1? - role: assistant content: 1 + 1 = 2 + - role: user + content: Now if you double that, what do you get? + - role: assistant + content: 2 doubled is 4. diff --git a/test/snapshots/session/should_resume_a_session_using_the_same_client.yaml b/test/snapshots/session/should_resume_a_session_using_the_same_client.yaml index 250402101..b012e26ea 100644 --- a/test/snapshots/session/should_resume_a_session_using_the_same_client.yaml +++ b/test/snapshots/session/should_resume_a_session_using_the_same_client.yaml @@ -8,3 +8,7 @@ conversations: content: What is 1+1? - role: assistant content: 1 + 1 = 2 + - role: user + content: Now if you double that, what do you get? + - role: assistant + content: If you double 2, you get 4. diff --git a/test/snapshots/streaming_fidelity/should_produce_deltas_after_session_resume.yaml b/test/snapshots/streaming_fidelity/should_produce_deltas_after_session_resume.yaml new file mode 100644 index 000000000..25e10c4b1 --- /dev/null +++ b/test/snapshots/streaming_fidelity/should_produce_deltas_after_session_resume.yaml @@ -0,0 +1,14 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 3 + 6? + - role: assistant + content: 3 + 6 = 9 + - role: user + content: Now if you double that, what do you get? + - role: assistant + content: 9 × 2 = 18