From e2b4b1b9621e2913b065f1c1bcf6c49a95668058 Mon Sep 17 00:00:00 2001 From: Gale W Date: Sun, 3 May 2026 14:10:21 -0400 Subject: [PATCH] tests: cover app connector mcp elicitation --- CONTRIBUTING.md | 16 +- ROADMAP.md | 17 +- .../CodexAppServerLiveIntegrationTests.swift | 742 +++++++++++++++++- .../run-live-codex-server-request-probes.sh | 4 +- 4 files changed, 757 insertions(+), 22 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 81571cc..d268691 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -168,14 +168,16 @@ scripts/run-live-codex-server-request-probes.sh ``` That wrapper runs the deterministic command approval, permissions approval, -tool-user-input, and regular MCP elicitation fixture probes against the real -app-server, then writes +tool-user-input, regular MCP, and app-connector MCP elicitation fixture probes +against the real app-server, then writes `tmp/live-codex-reports/live-server-request-family-coverage.json` with the -current status of answerable server-request family coverage. The regular MCP -fixture proves the model-to-MCP tool path is deterministic, but that plain -stdio MCP path does not deterministically surface `mcpServer/elicitation/request` -through the app-server; the remaining MCP live gap is an app-connector MCP -elicitation fixture. +current status of answerable server-request family coverage. The app-connector +MCP fixture is the deterministic live elicitation path: it drives +`mcpServer/elicitation/request` through the real app-server, answers it through +SwiftASB, observes `serverRequest/resolved`, and waits for terminal turn +completion. The regular stdio MCP fixture remains in the runner as +model-to-MCP tool-path evidence, but app-connector MCP is the coverage source +for live MCP elicitation. That wrapper runs smoke probes, deterministic approval/server-request probes, the multi-turn create/edit/delete file scenario, and the disposable rollback diff --git a/ROADMAP.md b/ROADMAP.md index 2fe4afa..f15ac4b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -756,14 +756,15 @@ runtime can be driven with a mock Responses provider. collaboration mode. The probe asserts `item/tool/requestUserInput` delivery, SwiftASB's JSON-RPC response, `serverRequest/resolved`, and terminal turn completion. -- [ ] MCP server elicitation. - Status: deterministic fake-transport coverage proves public routing and - response behavior, and the opt-in live server-request runner now drives a - regular stdio MCP fixture through the real app-server. That fixture proves the - model-to-MCP tool path is deterministic, but the plain stdio MCP path does - not deterministically surface `mcpServer/elicitation/request`; the remaining - live gap is an app-connector MCP elicitation fixture matching upstream Codex - app-server coverage. +- [x] MCP server elicitation. + Decision: deterministic fake-transport coverage proves public routing and + response behavior, and the opt-in live server-request runner now drives an + app-connector MCP fixture through the real app-server. The probe asserts MCP + tool-call delivery, `mcpServer/elicitation/request` delivery, SwiftASB's + JSON-RPC response, `serverRequest/resolved`, and terminal turn completion. + The regular stdio MCP fixture remains in the runner as model-to-MCP tool-path + evidence, while app-connector MCP is the deterministic live elicitation + coverage source. - [ ] Guardian denied-action approval after SwiftASB owns a stable public model. - [ ] Future promoted surfaces such as `hooks/list` and model-provider capabilities when they become public or observable contracts. diff --git a/Tests/SwiftASBTests/Public/CodexAppServerLiveIntegrationTests.swift b/Tests/SwiftASBTests/Public/CodexAppServerLiveIntegrationTests.swift index a0dcc8c..1970b41 100644 --- a/Tests/SwiftASBTests/Public/CodexAppServerLiveIntegrationTests.swift +++ b/Tests/SwiftASBTests/Public/CodexAppServerLiveIntegrationTests.swift @@ -1696,6 +1696,244 @@ struct CodexAppServerLiveIntegrationTests { } } + @Test( + "completes deterministic app connector MCP elicitation through the raw real app-server", + .enabled( + if: ProcessInfo.processInfo.environment["SWIFTASB_ENABLE_LIVE_CODEX_TESTS"] == "1" + || ProcessInfo.processInfo.environment["SWIFTASB_ENABLE_LIVE_CODEX_SERVER_REQUEST_TESTS"] == "1", + "Requires explicit opt-in because this test launches the local Codex CLI and a temporary app connector MCP server fixture." + ), + .timeLimit(.minutes(2)) + ) + func completesDeterministicAppConnectorMcpElicitationThroughRawRealAppServer() async throws { + let mockResponses = try await MockResponsesServer( + responses: [ + .assistantMessage("APP_CONNECTOR_WARMUP_DONE"), + .appConnectorMcpElicitationToolCall(callID: "app-connector-mcp-elicitation-call"), + .assistantMessage("APP_CONNECTOR_MCP_ELICITATION_DONE"), + ] + ) + defer { mockResponses.stop() } + + let appsServer = try await MockAppConnectorMcpServer() + defer { appsServer.stop() } + + let harness = try LiveCodexHarness( + configMode: .mockResponsesWithAppConnectorMcpElicitation( + baseURL: mockResponses.baseURL.absoluteString, + appsBaseURL: appsServer.baseURL.absoluteString + ) + ) + defer { harness.cleanup() } + + let transport = CodexAppServerTransport( + configuration: .init( + codexExecutableURL: harness.codexExecutableURL, + currentDirectoryURL: harness.rootDirectoryURL, + environment: harness.configuration.environment + ) + ) + let protocolLayer = CodexAppServerProtocol() + let serverEvents = await transport.serverEvents() + var eventIterator = serverEvents.makeAsyncIterator() + + do { + try await transport.start() + + let initializeRequestID = CodexRPCRequestID.string("deterministic-app-connector-mcp-initialize") + let initializePayload = try protocolLayer.makeInitializeRequest( + id: initializeRequestID, + params: CodexWireInitializeParams( + capabilities: CodexWireInitializeCapabilities( + experimentalAPI: nil, + optOutNotificationMethods: [ + "account/rateLimits/updated", + "hook/completed", + "hook/started", + "mcpServer/startupStatus/updated", + ] + ), + clientInfo: .init( + name: "SwiftASBDeterministicAppConnectorMcpTests", + title: "SwiftASB Deterministic App Connector MCP Tests", + version: "0.1.0" + ) + ) + ) + let initializeResponsePayload = try await withTimeout( + seconds: 15, + operation: "waiting for deterministic app-connector MCP initialize response" + ) { + try await transport.send(initializePayload, id: initializeRequestID) + } + _ = try protocolLayer.decodeInitializeResponse( + initializeResponsePayload, + expectedID: initializeRequestID + ) + + try await transport.sendNotification( + try protocolLayer.makeInitializedNotification(), + method: "initialized" + ) + + let threadRequestID = CodexRPCRequestID.string("deterministic-app-connector-mcp-thread") + let threadStartPayload = try protocolLayer.makeThreadStartRequest( + id: threadRequestID, + params: CodexWireThreadStartParams( + approvalPolicy: .enumeration(.onRequest), + approvalsReviewer: .user, + baseInstructions: nil, + config: nil, + cwd: harness.approvalProbeWorkspace.path, + developerInstructions: "Use the model-provided app connector MCP tool call exactly as emitted.", + dynamicTools: nil, + environments: nil, + ephemeral: true, + experimentalRawEvents: nil, + mockExperimentalField: nil, + model: "mock-model", + modelProvider: nil, + permissions: nil, + persistExtendedHistory: nil, + personality: nil, + sandbox: .readOnly, + serviceName: nil, + serviceTier: nil, + sessionStartSource: nil + ) + ) + let threadResponsePayload = try await withTimeout( + seconds: 15, + operation: "waiting for deterministic app-connector MCP thread/start response" + ) { + try await transport.send(threadStartPayload, id: threadRequestID) + } + let threadResponse = try protocolLayer.decodeThreadStartResponse( + threadResponsePayload, + expectedID: threadRequestID + ) + + let warmupRequestID = CodexRPCRequestID.string("deterministic-app-connector-mcp-warmup-turn") + let warmupPayload = try protocolLayer.makeTurnStartRequest( + id: warmupRequestID, + params: CodexWireTurnStartParams( + approvalPolicy: .enumeration(.onRequest), + approvalsReviewer: .user, + collaborationMode: nil, + cwd: nil, + effort: nil, + environments: nil, + input: [ + CodexWireUserInput( + text: "Warm up connectors.", + textElements: nil, + type: .text, + url: nil, + path: nil, + name: nil + ) + ], + model: "mock-model", + outputSchema: nil, + permissions: nil, + personality: nil, + responsesapiClientMetadata: nil, + sandboxPolicy: nil, + serviceTier: nil, + summary: CodexWireReasoningSummary.none, + threadID: threadResponse.thread.id + ) + ) + let warmupResponsePayload = try await withTimeout( + seconds: 15, + operation: "waiting for deterministic app-connector MCP warmup turn/start response" + ) { + try await transport.send(warmupPayload, id: warmupRequestID) + } + let warmupResponse = try protocolLayer.decodeTurnStartResponse( + warmupResponsePayload, + expectedID: warmupRequestID + ) + let warmupCompletion = try await awaitRawTurnCompletion( + eventIterator: &eventIterator, + protocolLayer: protocolLayer, + threadID: threadResponse.thread.id, + turnID: warmupResponse.turn.id, + operation: "waiting for deterministic app-connector MCP warmup completion" + ) + #expect(warmupCompletion.turn.status == .completed) + + let turnRequestID = CodexRPCRequestID.string("deterministic-app-connector-mcp-turn") + let turnStartPayload = try protocolLayer.makeTurnStartRequest( + id: turnRequestID, + params: CodexWireTurnStartParams( + approvalPolicy: .enumeration(.onRequest), + approvalsReviewer: .user, + collaborationMode: nil, + cwd: nil, + effort: nil, + environments: nil, + input: [ + CodexWireUserInput( + text: "Use [$calendar](app://calendar) to run the calendar tool.", + textElements: nil, + type: .text, + url: nil, + path: nil, + name: nil + ) + ], + model: "mock-model", + outputSchema: nil, + permissions: nil, + personality: nil, + responsesapiClientMetadata: nil, + sandboxPolicy: nil, + serviceTier: nil, + summary: CodexWireReasoningSummary.none, + threadID: threadResponse.thread.id + ) + ) + let turnResponsePayload = try await withTimeout( + seconds: 15, + operation: "waiting for deterministic app-connector MCP turn/start response" + ) { + try await transport.send(turnStartPayload, id: turnRequestID) + } + let turnResponse = try protocolLayer.decodeTurnStartResponse( + turnResponsePayload, + expectedID: turnRequestID + ) + + let elicitationResult = try await awaitRawMcpElicitationCompletion( + eventIterator: &eventIterator, + protocolLayer: protocolLayer, + transport: transport, + threadID: threadResponse.thread.id, + turnID: turnResponse.turn.id, + operation: "waiting for deterministic app-connector MCP elicitation completion" + ) + #expect(elicitationResult.threadID == threadResponse.thread.id) + #expect(elicitationResult.turnID == turnResponse.turn.id) + #expect(elicitationResult.serverName == "codex_apps") + #expect(elicitationResult.sawMcpToolCall) + if !elicitationResult.sawElicitationRequest { + Issue.record("app connector debug log:\n\(appsServer.debugLog)") + } + #expect(elicitationResult.sawElicitationRequest) + #expect(elicitationResult.sawServerRequestResolved) + #expect(elicitationResult.completion.turn.status == .completed) + #expect(mockResponses.requestCount >= 3) + #expect(appsServer.directoryRequestCount >= 1) + #expect(appsServer.toolCallRequestCount >= 1) + + await transport.stop() + } catch { + await transport.stop() + throw error + } + } + @Test( "records live approval, sandbox, history, and diagnostics behavior matrix", .enabled( @@ -1862,10 +2100,10 @@ struct CodexAppServerLiveIntegrationTests { family: "mcpServerElicitation", publicSurface: "CodexThread.respond(to:with:) when turnId is null; CodexTurnHandle.respond(to:with:) when turn-routed", deterministicFakeTransportCoverage: true, - liveProbeCoverage: false, + liveProbeCoverage: true, liveProbeScript: "scripts/run-live-codex-server-request-probes.sh", - status: "blocked", - notes: "The public fake-transport suite proves routing and response behavior. A regular stdio MCP fixture now proves the model-to-MCP tool path is deterministic, but this path does not deterministically surface mcpServer/elicitation/request through the app-server; the remaining live gap is an app-connector MCP elicitation fixture matching upstream Codex app-server coverage." + status: "covered", + notes: "The focused server-request probe drives an app-connector MCP fixture through the real app-server and asserts MCP tool-call delivery, mcpServer/elicitation/request delivery, SwiftASB's JSON-RPC response, serverRequest/resolved, and terminal turn completion. The regular stdio fixture remains covered separately as model-to-MCP tool-path evidence, but app-connector MCP is the deterministic elicitation path." ), ], sourceNotes: [ @@ -1880,6 +2118,7 @@ struct CodexAppServerLiveIntegrationTests { "commandExecutionApproval", "permissionsApproval", "toolUserInput", + "mcpServerElicitation", ]) } @@ -2104,6 +2343,7 @@ private final class LiveCodexHarness { case approvalProbe case mockResponses(baseURL: String, requestPermissionsTool: Bool = false) case mockResponsesWithMcpElicitation(baseURL: String) + case mockResponsesWithAppConnectorMcpElicitation(baseURL: String, appsBaseURL: String) } init(configMode: ConfigMode = .standard, fileManager: FileManager = .default) throws { @@ -2357,6 +2597,33 @@ private final class LiveCodexHarness { [projects.\(tomlQuotedString(projectRootURL.path))] trust_level = "trusted" """ + case let .mockResponsesWithAppConnectorMcpElicitation(baseURL, appsBaseURL): + try writeFakeChatGPTAuth(to: codexHomeURL) + isolatedConfig = """ + model = "mock-model" + approval_policy = "on-request" + approvals_reviewer = "user" + sandbox_mode = "read-only" + model_provider = "mock_provider" + chatgpt_base_url = "\(appsBaseURL)" + mcp_oauth_credentials_store = "file" + suppress_unstable_features_warning = true + + [features] + apps = true + exec_permission_approvals = true + + [model_providers.mock_provider] + name = "SwiftASB Mock Responses Provider" + base_url = "\(baseURL)/v1" + wire_api = "responses" + request_max_retries = 0 + stream_max_retries = 0 + supports_websockets = false + + [projects.\(tomlQuotedString(projectRootURL.path))] + trust_level = "untrusted" + """ } try Data(isolatedConfig.utf8).write(to: configURL) } @@ -2366,7 +2633,7 @@ private final class LiveCodexHarness { projectRootURL: URL ) -> LiveApprovalProbeReport.CodexConfig? { switch configMode { - case .standard, .mockResponses, .mockResponsesWithMcpElicitation: + case .standard, .mockResponses, .mockResponsesWithMcpElicitation, .mockResponsesWithAppConnectorMcpElicitation: nil case .approvalProbe: .init( @@ -2392,6 +2659,48 @@ private final class LiveCodexHarness { .replacingOccurrences(of: "\"", with: "\\\"") } + private static func writeFakeChatGPTAuth(to codexHomeURL: URL) throws { + let idToken = try fakeChatGPTIDToken( + accountID: "account-123", + userID: "user-123" + ) + let authObject: [String: Any] = [ + "auth_mode": "chatgpt", + "tokens": [ + "id_token": idToken, + "access_token": "chatgpt-token", + "refresh_token": "refresh-token", + "account_id": "account-123", + ], + "last_refresh": "2026-05-03T00:00:00Z", + ] + let authData = try JSONSerialization.data( + withJSONObject: authObject, + options: [.prettyPrinted, .sortedKeys] + ) + try authData.write(to: codexHomeURL.appendingPathComponent("auth.json")) + } + + private static func fakeChatGPTIDToken(accountID: String, userID: String) throws -> String { + let header = try jsonBase64URLString([ + "alg": "none", + "typ": "JWT", + ]) + let payload = try jsonBase64URLString([ + "https://api.openai.com/auth": [ + "chatgpt_account_id": accountID, + "chatgpt_user_id": userID, + ], + ]) + let signature = Data("signature".utf8).base64URLEncodedString() + return "\(header).\(payload).\(signature)" + } + + private static func jsonBase64URLString(_ object: [String: Any]) throws -> String { + let data = try JSONSerialization.data(withJSONObject: object, options: [.sortedKeys]) + return data.base64URLEncodedString() + } + private static let mcpElicitationServerPythonScript = """ import json import sys @@ -2780,7 +3089,7 @@ private final class MockResponsesServer: @unchecked Sendable { try? FileManager.default.removeItem(at: rootDirectoryURL) } - private static func waitForPortFile(_ portFileURL: URL) async throws -> Int { + fileprivate static func waitForPortFile(_ portFileURL: URL) async throws -> Int { let deadline = ContinuousClock.now + .seconds(5) while ContinuousClock.now < deadline { if let text = try? String(contentsOf: portFileURL, encoding: .utf8), @@ -2854,6 +3163,368 @@ private final class MockResponsesServer: @unchecked Sendable { """ } +private final class MockAppConnectorMcpServer: @unchecked Sendable { + private let process: Process + private let rootDirectoryURL: URL + private let directoryRequestCountFileURL: URL + private let toolCallRequestCountFileURL: URL + private let debugLogFileURL: URL + + let baseURL: URL + + var directoryRequestCount: Int { + Self.readCount(from: directoryRequestCountFileURL) + } + + var toolCallRequestCount: Int { + Self.readCount(from: toolCallRequestCountFileURL) + } + + var debugLog: String { + (try? String(contentsOf: debugLogFileURL, encoding: .utf8)) ?? "" + } + + init() async throws { + let fileManager = FileManager.default + self.rootDirectoryURL = fileManager.temporaryDirectory + .appendingPathComponent("SwiftASB-MockAppConnectorMCP-\(UUID().uuidString)", isDirectory: true) + try fileManager.createDirectory(at: rootDirectoryURL, withIntermediateDirectories: true) + + let portFileURL = rootDirectoryURL.appendingPathComponent("port.txt") + self.directoryRequestCountFileURL = rootDirectoryURL.appendingPathComponent("directory-request-count.txt") + self.toolCallRequestCountFileURL = rootDirectoryURL.appendingPathComponent("tool-call-request-count.txt") + self.debugLogFileURL = rootDirectoryURL.appendingPathComponent("debug.log") + let scriptURL = rootDirectoryURL.appendingPathComponent("mock_app_connector_mcp_server.py") + + try Data("0\n".utf8).write(to: directoryRequestCountFileURL) + try Data("0\n".utf8).write(to: toolCallRequestCountFileURL) + try Data().write(to: debugLogFileURL) + try Data(Self.pythonScript.utf8).write(to: scriptURL) + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/env") + process.arguments = [ + "python3", + scriptURL.path, + portFileURL.path, + directoryRequestCountFileURL.path, + toolCallRequestCountFileURL.path, + debugLogFileURL.path, + ] + self.process = process + try process.run() + + let port = try await MockResponsesServer.waitForPortFile(portFileURL) + self.baseURL = URL(string: "http://127.0.0.1:\(port)")! + } + + func stop() { + if process.isRunning { + process.terminate() + process.waitUntilExit() + } + try? FileManager.default.removeItem(at: rootDirectoryURL) + } + + private static func readCount(from url: URL) -> Int { + guard let text = try? String(contentsOf: url, encoding: .utf8) else { + return 0 + } + return Int(text.trimmingCharacters(in: .whitespacesAndNewlines)) ?? 0 + } + + private static let pythonScript = """ + import json + import sys + import threading + import time + from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + + port_path, directory_count_path, tool_call_count_path, debug_log_path = sys.argv[1:5] + pending_elicitation_response = None + pending_condition = threading.Condition() + event_stream = None + event_stream_lock = threading.Lock() + + def log(message): + with open(debug_log_path, "a", encoding="utf-8") as handle: + handle.write(message + "\\n") + + def read_count(path): + try: + with open(path, "r", encoding="utf-8") as handle: + return int((handle.read().strip() or "0")) + except FileNotFoundError: + return 0 + + def increment_count(path): + with open(path, "w", encoding="utf-8") as handle: + handle.write(f"{read_count(path) + 1}\\n") + + def json_bytes(value): + return json.dumps(value, separators=(",", ":")).encode("utf-8") + + def sse_message(value): + return b"event: message\\n" + b"data: " + json_bytes(value) + b"\\n\\n" + + class Handler(BaseHTTPRequestHandler): + protocol_version = "HTTP/1.1" + + def log_message(self, format, *args): + return + + def do_GET(self): + log(f"GET {self.path}") + if self.path.startswith("/connectors/directory/list"): + self.handle_directory_list() + elif self.path.startswith("/connectors/directory/list_workspace"): + self.handle_directory_list() + elif self.path.startswith("/api/codex/apps"): + self.handle_event_stream() + else: + self.send_response(404) + self.send_header("content-length", "0") + self.end_headers() + + def do_POST(self): + log(f"POST {self.path}") + if not self.path.startswith("/api/codex/apps"): + self.send_response(404) + self.send_header("content-length", "0") + self.end_headers() + return + + length = int(self.headers.get("content-length", "0")) + body = self.rfile.read(length) if length else b"{}" + try: + message = json.loads(body.decode("utf-8")) + log(f"POST body {message}") + except Exception as error: + self.send_json({ + "jsonrpc": "2.0", + "id": None, + "error": {"code": -32700, "message": f"Invalid JSON: {error}"}, + }, status=400) + return + + if isinstance(message, list): + self.send_json([self.response_for_request(item) for item in message if "id" in item]) + return + + if "method" not in message and message.get("id") == 3: + self.handle_elicitation_response(message) + return + + method = message.get("method") + if method == "initialize": + self.send_json({ + "jsonrpc": "2.0", + "id": message.get("id"), + "result": { + "protocolVersion": "2025-06-18", + "capabilities": {"tools": {}}, + "serverInfo": {"name": "codex-apps", "version": "0.1.0"}, + }, + }, session_id="swiftasb-app-connector-session") + elif method == "notifications/initialized": + self.send_accepted() + elif method == "tools/list": + self.send_json(self.tools_list_response(message.get("id"))) + elif method == "tools/call": + self.handle_tool_call(message) + elif "id" in message: + self.send_json({ + "jsonrpc": "2.0", + "id": message.get("id"), + "error": {"code": -32601, "message": f"Unsupported method: {method}"}, + }) + else: + self.send_accepted() + + def handle_directory_list(self): + increment_count(directory_count_path) + bearer_ok = self.headers.get("authorization") == "Bearer chatgpt-token" + account_ok = self.headers.get("chatgpt-account-id") == "account-123" + external_logos_ok = "external_logos=true" in self.path + if not bearer_ok or not account_ok: + self.send_response(401) + self.send_header("content-length", "0") + self.end_headers() + return + if not external_logos_ok: + self.send_response(400) + self.send_header("content-length", "0") + self.end_headers() + return + self.send_json({ + "apps": [{ + "id": "calendar", + "name": "Calendar", + "description": "Calendar connector", + "logo_url": None, + "logo_url_dark": None, + "distribution_channel": None, + "branding": None, + "app_metadata": None, + "labels": None, + "install_url": None, + "is_accessible": False, + "is_enabled": True, + }], + "next_token": None, + }) + + def response_for_request(self, message): + method = message.get("method") + if method == "tools/list": + return self.tools_list_response(message.get("id")) + return { + "jsonrpc": "2.0", + "id": message.get("id"), + "error": {"code": -32601, "message": f"Unsupported batch method: {method}"}, + } + + def tools_list_response(self, request_id): + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "tools": [{ + "name": "calendar_confirm_action", + "description": "Confirm a calendar action.", + "inputSchema": { + "type": "object", + "additionalProperties": False, + }, + "annotations": { + "readOnlyHint": True, + }, + "_meta": { + "connector_id": "calendar", + "connector_name": "Calendar", + }, + }], + "nextCursor": None, + "_meta": None, + }, + } + + def handle_elicitation_response(self, message): + global pending_elicitation_response + log(f"elicitation response {message}") + with pending_condition: + pending_elicitation_response = message + pending_condition.notify_all() + self.send_accepted() + + def handle_event_stream(self): + global event_stream + log("opening app connector MCP event stream") + self.send_response(200) + self.send_header("content-type", "text/event-stream") + self.send_header("cache-control", "no-cache") + self.send_header("connection", "keep-alive") + self.end_headers() + with event_stream_lock: + event_stream = self + try: + while True: + time.sleep(0.25) + except Exception as error: + log(f"event stream closed {error}") + finally: + with event_stream_lock: + if event_stream is self: + event_stream = None + + def handle_tool_call(self, message): + increment_count(tool_call_count_path) + log(f"tool call {message}") + request_id = message.get("id") + self.send_response(200) + self.send_header("content-type", "text/event-stream") + self.send_header("cache-control", "no-cache") + self.send_header("connection", "close") + self.end_headers() + + elicitation_request = { + "jsonrpc": "2.0", + "id": 3, + "method": "elicitation/create", + "params": { + "message": "Allow this request?", + "requestedSchema": { + "type": "object", + "properties": { + "confirmed": { + "type": "boolean", + "title": "Confirmed", + }, + }, + "required": ["confirmed"], + }, + }, + } + self.wfile.write(sse_message(elicitation_request)) + self.wfile.flush() + log(f"sent elicitation request {elicitation_request}") + + deadline = time.time() + 10 + with pending_condition: + while pending_elicitation_response is None and time.time() < deadline: + pending_condition.wait(timeout=0.1) + response = pending_elicitation_response + log(f"elicitation wait result {response}") + + output_text = "accepted" + if isinstance(response, dict): + result = response.get("result", {}) + action = result.get("action") + if action == "decline": + output_text = "declined" + elif action == "cancel": + output_text = "cancelled" + + tool_response = { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "content": [{ + "type": "text", + "text": output_text, + }], + }, + } + self.wfile.write(sse_message(tool_response)) + self.wfile.flush() + log(f"sent tool response {tool_response}") + + def send_json(self, value, status=200, session_id=None): + body = json_bytes(value) + self.send_response(status) + self.send_header("content-type", "application/json") + self.send_header("content-length", str(len(body))) + if session_id is not None: + self.send_header("mcp-session-id", session_id) + self.end_headers() + self.wfile.write(body) + + def send_accepted(self): + self.send_response(202) + self.send_header("content-length", "0") + self.end_headers() + + class QuietThreadingHTTPServer(ThreadingHTTPServer): + def handle_error(self, request, client_address): + log(f"suppressed request handling error from {client_address}") + + server = QuietThreadingHTTPServer(("127.0.0.1", 0), Handler) + with open(port_path, "w", encoding="utf-8") as handle: + handle.write(f"{server.server_address[1]}\\n") + server.serve_forever() + """ +} + private struct MockResponsesEventStream: Encodable, Equatable { let body: String @@ -2930,6 +3601,19 @@ private struct MockResponsesEventStream: Encodable, Equatable { ]) } + static func appConnectorMcpElicitationToolCall(callID: String) throws -> Self { + try .init(events: [ + responseCreated(id: "resp-app-connector-mcp-elicitation"), + functionCall( + callID: callID, + name: "_confirm_action", + namespace: "mcp__codex_apps__calendar", + arguments: "{}" + ), + responseCompleted(id: "resp-app-connector-mcp-elicitation"), + ]) + } + static func assistantMessage(_ message: String) throws -> Self { try .init(events: [ responseCreated(id: "resp-final"), @@ -3476,6 +4160,9 @@ private struct RawMcpElicitationResult: Equatable, Sendable { let threadID: String let turnID: String let serverName: String? + let toolName: String? + let itemStatus: String? + let itemErrorDescription: String? let sawMcpToolCall: Bool let sawElicitationRequest: Bool let sawServerRequestResolved: Bool @@ -3670,6 +4357,9 @@ private func awaitRawMcpElicitationCompletion( operation: String ) async throws -> RawMcpElicitationResult { var serverName: String? + var toolName: String? + var itemStatus: String? + var itemErrorDescription: String? var sawMcpToolCall = false var sawElicitationRequest = false var sawServerRequestResolved = false @@ -3688,12 +4378,17 @@ private func awaitRawMcpElicitationCompletion( && started.item.type == .mcpToolCall: sawMcpToolCall = true serverName = started.item.server + toolName = started.item.tool + itemStatus = started.item.status case let .itemCompleted(completed) where completed.threadID == threadID && completed.turnID == turnID && completed.item.type == .mcpToolCall: sawMcpToolCall = true serverName = completed.item.server + toolName = completed.item.tool + itemStatus = completed.item.status + itemErrorDescription = completed.item.error.map { String(describing: $0) } case let .mcpServerElicitationRequested(request) where request.threadID == threadID && (request.turnID == nil || request.turnID == turnID): sawElicitationRequest = true @@ -3717,6 +4412,9 @@ private func awaitRawMcpElicitationCompletion( threadID: threadID, turnID: turnID, serverName: serverName, + toolName: toolName, + itemStatus: itemStatus, + itemErrorDescription: itemErrorDescription, sawMcpToolCall: sawMcpToolCall, sawElicitationRequest: sawElicitationRequest, sawServerRequestResolved: sawServerRequestResolved @@ -3729,6 +4427,31 @@ private func awaitRawMcpElicitationCompletion( throw LiveIntegrationError.eventStreamEnded(operation: "\(operation): observedEvents=\(observedEvents)") } +private func awaitRawTurnCompletion( + eventIterator: inout AsyncStream.Iterator, + protocolLayer: CodexAppServerProtocol, + threadID: String, + turnID: String, + operation: String +) async throws -> CodexWireTurnCompletedNotification { + var observedEvents: [String] = [] + + while let serverEvent = await eventIterator.next() { + guard let decodedEvent = try protocolLayer.decodeServerEvent(serverEvent) else { + continue + } + observedEvents.append(String(describing: decodedEvent)) + + if case let .turnCompleted(completed) = decodedEvent, + completed.threadID == threadID, + completed.turn.id == turnID { + return completed + } + } + + throw LiveIntegrationError.eventStreamEnded(operation: "\(operation): observedEvents=\(observedEvents)") +} + private struct RawCommandExecutionApprovalResponse: Encodable { let decision: String } @@ -3818,6 +4541,15 @@ private func prompt(label: String) -> String { """ } +private extension Data { + func base64URLEncodedString() -> String { + base64EncodedString() + .replacingOccurrences(of: "+", with: "-") + .replacingOccurrences(of: "/", with: "_") + .replacingOccurrences(of: "=", with: "") + } +} + private extension CodexAppServer.TurnStatus { var isTerminal: Bool { switch self { diff --git a/scripts/run-live-codex-server-request-probes.sh b/scripts/run-live-codex-server-request-probes.sh index 77bee16..f980714 100755 --- a/scripts/run-live-codex-server-request-probes.sh +++ b/scripts/run-live-codex-server-request-probes.sh @@ -13,10 +13,10 @@ printf '%s\n' 'Running SwiftASB live Codex server-request probes.' printf '%s\n' 'Step 1/3: deterministic command and permissions approval probes' sh "$REPO_ROOT/scripts/run-live-codex-approval-probe.sh" -printf '%s\n' 'Step 2/3: deterministic tool-user-input and regular MCP elicitation probes' +printf '%s\n' 'Step 2/3: deterministic tool-user-input, regular MCP, and app-connector MCP elicitation probes' env SWIFTASB_ENABLE_LIVE_CODEX_SERVER_REQUEST_TESTS=1 \ SWIFTASB_LIVE_CODEX_REPORT_DIR="$SWIFTASB_LIVE_CODEX_REPORT_DIR" \ - swift test --filter 'CodexAppServerLiveIntegrationTests/(completesDeterministicToolUserInputThroughRawRealAppServer|recordsDeterministicRegularMcpElicitationFixtureBehaviorThroughRawRealAppServer)' + swift test --filter 'CodexAppServerLiveIntegrationTests/(completesDeterministicToolUserInputThroughRawRealAppServer|recordsDeterministicRegularMcpElicitationFixtureBehaviorThroughRawRealAppServer|completesDeterministicAppConnectorMcpElicitationThroughRawRealAppServer)' printf '%s\n' 'Step 3/3: server-request family coverage report' env SWIFTASB_ENABLE_LIVE_CODEX_SERVER_REQUEST_TESTS=1 \