diff --git a/CHANGELOG.md b/CHANGELOG.md index dce1000..673a8e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,3 +13,4 @@ The format is based on Keep a Changelog and this project uses Semantic Versionin - Tahoe-first glass-surface UI with fallback styling and floating desktop window behavior. - Microphone permission-gated audio startup coordinator for wake-word and STT flow. - Model runtime bootstrap with first-run downloader, hash verification, and persona prompt builder. +- Agent loop timeout and cancellation controls with deterministic recovery to `idle`. diff --git a/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift b/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift index 863ef41..8b2b06e 100644 --- a/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift +++ b/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift @@ -3,6 +3,7 @@ import Foundation public enum AgentError: Error, Equatable { case invalidTransition(from: BMOState, to: BMOState) case unavailable(String) + case timeout(String) } public actor BMOAgent { @@ -17,6 +18,7 @@ public actor BMOAgent { private let visionService: VisionService? private var wakeTask: Task? + private var turnTask: Task? private let stream: AsyncStream private let continuation: AsyncStream.Continuation @@ -44,6 +46,7 @@ public actor BMOAgent { deinit { wakeTask?.cancel() + turnTask?.cancel() continuation.finish() } @@ -73,29 +76,24 @@ public actor BMOAgent { public func stop() async { wakeTask?.cancel() wakeTask = nil + turnTask?.cancel() + turnTask = nil await wakeWordService.stop() await ttsService.stop() - do { - try transition(to: .idle) - } catch { - emitError("Stop failed: \(error.localizedDescription)") - } + forceState(.idle) } public func handleWakeWordEvent() async { - do { - try transition(to: .listening) - let utterance = try await sttService.transcribeNextUtterance(timeout: config.sttTimeoutSeconds) - continuation.yield(.heardUtterance(utterance)) - await handleUserUtterance(utterance) - } catch { - emitError("Transcription failed: \(error.localizedDescription)") - do { - try transition(to: .idle) - } catch { - emitError("State reset failed: \(error.localizedDescription)") - } + guard turnTask == nil else { + return } + + turnTask = Task { + await self.processTurn() + } + + await turnTask?.value + turnTask = nil } public func handleUserUtterance(_ utterance: String) async { @@ -114,21 +112,22 @@ public actor BMOAgent { visionContext = nil } - let response = try await llmService.generateResponse( - prompt: utterance, - systemPrompt: "You are BMO, an upbeat and helpful desktop companion.", - context: visionContext - ) + let response = try await withTimeout( + seconds: config.llmTimeoutSeconds, + label: "LLM generation" + ) { [llmService] in + try await llmService.generateResponse( + prompt: utterance, + systemPrompt: "You are BMO, an upbeat and helpful desktop companion.", + context: visionContext + ) + } continuation.yield(.generatedResponse(response)) await speak(response) } catch { emitError("Agent processing failed: \(error.localizedDescription)") - do { - try transition(to: .idle) - } catch { - emitError("State reset failed: \(error.localizedDescription)") - } + recoverToIdle() } } @@ -144,11 +143,7 @@ public actor BMOAgent { try transition(to: .idle) } catch { emitError("TTS failed: \(error.localizedDescription)") - do { - try transition(to: .idle) - } catch { - emitError("State reset failed: \(error.localizedDescription)") - } + recoverToIdle() } } @@ -192,4 +187,56 @@ public actor BMOAgent { continuation.yield(.stateChanged(.error)) continuation.yield(.error(message)) } + + private func processTurn() async { + do { + try transition(to: .listening) + let utterance = try await withTimeout( + seconds: config.sttTimeoutSeconds, + label: "STT transcription" + ) { [sttService, config] in + try await sttService.transcribeNextUtterance(timeout: config.sttTimeoutSeconds) + } + continuation.yield(.heardUtterance(utterance)) + await handleUserUtterance(utterance) + } catch { + if error is CancellationError { + recoverToIdle() + return + } + emitError("Transcription failed: \(error.localizedDescription)") + recoverToIdle() + } + } + + private func recoverToIdle() { + forceState(.idle) + } + + private func forceState(_ next: BMOState) { + state = next + continuation.yield(.stateChanged(next)) + } + + private func withTimeout( + seconds: Double, + label: String, + operation: @escaping @Sendable () async throws -> T + ) async throws -> T { + try await withThrowingTaskGroup(of: T.self) { group in + group.addTask { + try await operation() + } + group.addTask { + try await Task.sleep(nanoseconds: UInt64(seconds * 1_000_000_000)) + throw AgentError.timeout("\(label) exceeded \(seconds)s") + } + + guard let first = try await group.next() else { + throw AgentError.timeout("\(label) did not return a result") + } + group.cancelAll() + return first + } + } } diff --git a/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift b/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift index 1534795..eb55095 100644 --- a/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift +++ b/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift @@ -21,7 +21,18 @@ private actor MockWakeWordService: WakeWordService { private actor MockSTTService: SpeechToTextService { var value: String = "hello" - func transcribeNextUtterance(timeout: TimeInterval) async throws -> String { value } + var delayNanoseconds: UInt64 = 0 + + func setDelayNanoseconds(_ value: UInt64) { + delayNanoseconds = value + } + + func transcribeNextUtterance(timeout: TimeInterval) async throws -> String { + if delayNanoseconds > 0 { + try await Task.sleep(nanoseconds: delayNanoseconds) + } + return value + } } private actor MockTTSService: TextToSpeechService { @@ -32,7 +43,12 @@ private actor MockTTSService: TextToSpeechService { private actor MockLLMService: LLMService { private(set) var requests: [String] = [] + var delayNanoseconds: UInt64 = 0 + func generateResponse(prompt: String, systemPrompt: String, context: VisionContext?) async throws -> String { + if delayNanoseconds > 0 { + try await Task.sleep(nanoseconds: delayNanoseconds) + } requests.append(prompt) return "Hi from BMO!" } @@ -83,4 +99,56 @@ final class CoreAgentTests: XCTestCase { XCTAssertEqual(spoken.count, 1) XCTAssertEqual(spoken.first, "Hi from BMO!") } + + func testAgentRecoversToIdleAfterSTTTimeout() async { + let wake = MockWakeWordService() + let stt = MockSTTService() + let tts = MockTTSService() + let llm = MockLLMService() + let vision = MockVisionService() + await stt.setDelayNanoseconds(300_000_000) + + let agent = BMOAgent( + config: AgentConfig(sttTimeoutSeconds: 0.05), + wakeWordService: wake, + sttService: stt, + ttsService: tts, + llmService: llm, + visionService: vision + ) + + await agent.handleWakeWordEvent() + try? await Task.sleep(nanoseconds: 120_000_000) + + let state = await agent.state + XCTAssertEqual(state, .idle) + } + + func testStopCancelsInFlightTurn() async { + let wake = MockWakeWordService() + let stt = MockSTTService() + let tts = MockTTSService() + let llm = MockLLMService() + let vision = MockVisionService() + await stt.setDelayNanoseconds(2_000_000_000) + + let agent = BMOAgent( + config: AgentConfig(sttTimeoutSeconds: 5.0), + wakeWordService: wake, + sttService: stt, + ttsService: tts, + llmService: llm, + visionService: vision + ) + + let turn = Task { + await agent.handleWakeWordEvent() + } + try? await Task.sleep(nanoseconds: 150_000_000) + await agent.stop() + _ = await turn.value + + let state = await agent.state + XCTAssertEqual(state, .idle) + } } diff --git a/docs/architecture.md b/docs/architecture.md index 7095706..f69d7fd 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -71,6 +71,10 @@ The view model consumes `AsyncStream` from `BMOAgent`. 5. Agent enters `speaking`, emits face changes, and plays TTS. 6. Agent returns to `idle`. +Timeout and cancellation guards: +- STT and LLM steps run with explicit timeout wrappers. +- In-flight turn tasks are canceled on `stop()` and the agent force-recovers to `idle`. + ## UI and Windowing - `GlassSurface` provides Tahoe-first liquid-style panels with material fallback.