Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ The format is based on Keep a Changelog and this project uses Semantic Versionin
- Tahoe-first glass-surface UI with fallback styling and floating desktop window behavior.
- Microphone permission-gated audio startup coordinator for wake-word and STT flow.
- Model runtime bootstrap with first-run downloader, hash verification, and persona prompt builder.
- Agent loop timeout and cancellation controls with deterministic recovery to `idle`.
111 changes: 79 additions & 32 deletions Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import Foundation
public enum AgentError: Error, Equatable {
case invalidTransition(from: BMOState, to: BMOState)
case unavailable(String)
case timeout(String)
}

public actor BMOAgent {
Expand All @@ -17,6 +18,7 @@ public actor BMOAgent {
private let visionService: VisionService?

private var wakeTask: Task<Void, Never>?
private var turnTask: Task<Void, Never>?
private let stream: AsyncStream<AgentEvent>
private let continuation: AsyncStream<AgentEvent>.Continuation

Expand Down Expand Up @@ -44,6 +46,7 @@ public actor BMOAgent {

deinit {
wakeTask?.cancel()
turnTask?.cancel()
continuation.finish()
}

Expand Down Expand Up @@ -73,29 +76,24 @@ public actor BMOAgent {
public func stop() async {
wakeTask?.cancel()
wakeTask = nil
turnTask?.cancel()
turnTask = nil
await wakeWordService.stop()
await ttsService.stop()
do {
try transition(to: .idle)
} catch {
emitError("Stop failed: \(error.localizedDescription)")
}
forceState(.idle)
}

public func handleWakeWordEvent() async {
do {
try transition(to: .listening)
let utterance = try await sttService.transcribeNextUtterance(timeout: config.sttTimeoutSeconds)
continuation.yield(.heardUtterance(utterance))
await handleUserUtterance(utterance)
} catch {
emitError("Transcription failed: \(error.localizedDescription)")
do {
try transition(to: .idle)
} catch {
emitError("State reset failed: \(error.localizedDescription)")
}
guard turnTask == nil else {
return
}

turnTask = Task {
await self.processTurn()
}

await turnTask?.value
turnTask = nil
}

public func handleUserUtterance(_ utterance: String) async {
Expand All @@ -114,21 +112,22 @@ public actor BMOAgent {
visionContext = nil
}

let response = try await llmService.generateResponse(
prompt: utterance,
systemPrompt: "You are BMO, an upbeat and helpful desktop companion.",
context: visionContext
)
let response = try await withTimeout(
seconds: config.llmTimeoutSeconds,
label: "LLM generation"
) { [llmService] in
try await llmService.generateResponse(
prompt: utterance,
systemPrompt: "You are BMO, an upbeat and helpful desktop companion.",
context: visionContext
)
}

continuation.yield(.generatedResponse(response))
await speak(response)
} catch {
emitError("Agent processing failed: \(error.localizedDescription)")
do {
try transition(to: .idle)
} catch {
emitError("State reset failed: \(error.localizedDescription)")
}
recoverToIdle()
}
}

Expand All @@ -144,11 +143,7 @@ public actor BMOAgent {
try transition(to: .idle)
} catch {
emitError("TTS failed: \(error.localizedDescription)")
do {
try transition(to: .idle)
} catch {
emitError("State reset failed: \(error.localizedDescription)")
}
recoverToIdle()
}
}

Expand Down Expand Up @@ -192,4 +187,56 @@ public actor BMOAgent {
continuation.yield(.stateChanged(.error))
continuation.yield(.error(message))
}

private func processTurn() async {
do {
try transition(to: .listening)
let utterance = try await withTimeout(
seconds: config.sttTimeoutSeconds,
label: "STT transcription"
) { [sttService, config] in
try await sttService.transcribeNextUtterance(timeout: config.sttTimeoutSeconds)
}
continuation.yield(.heardUtterance(utterance))
await handleUserUtterance(utterance)
} catch {
if error is CancellationError {
recoverToIdle()
return
}
emitError("Transcription failed: \(error.localizedDescription)")
recoverToIdle()
}
}

private func recoverToIdle() {
forceState(.idle)
}

private func forceState(_ next: BMOState) {
state = next
continuation.yield(.stateChanged(next))
}

private func withTimeout<T: Sendable>(
seconds: Double,
label: String,
operation: @escaping @Sendable () async throws -> T
) async throws -> T {
try await withThrowingTaskGroup(of: T.self) { group in
group.addTask {
try await operation()
}
group.addTask {
try await Task.sleep(nanoseconds: UInt64(seconds * 1_000_000_000))
throw AgentError.timeout("\(label) exceeded \(seconds)s")
}

guard let first = try await group.next() else {
throw AgentError.timeout("\(label) did not return a result")
}
group.cancelAll()
return first
}
}
}
70 changes: 69 additions & 1 deletion Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,18 @@ private actor MockWakeWordService: WakeWordService {

private actor MockSTTService: SpeechToTextService {
var value: String = "hello"
func transcribeNextUtterance(timeout: TimeInterval) async throws -> String { value }
var delayNanoseconds: UInt64 = 0

func setDelayNanoseconds(_ value: UInt64) {
delayNanoseconds = value
}

func transcribeNextUtterance(timeout: TimeInterval) async throws -> String {
if delayNanoseconds > 0 {
try await Task.sleep(nanoseconds: delayNanoseconds)
}
return value
}
}

private actor MockTTSService: TextToSpeechService {
Expand All @@ -32,7 +43,12 @@ private actor MockTTSService: TextToSpeechService {

private actor MockLLMService: LLMService {
private(set) var requests: [String] = []
var delayNanoseconds: UInt64 = 0

func generateResponse(prompt: String, systemPrompt: String, context: VisionContext?) async throws -> String {
if delayNanoseconds > 0 {
try await Task.sleep(nanoseconds: delayNanoseconds)
}
requests.append(prompt)
return "Hi from BMO!"
}
Expand Down Expand Up @@ -83,4 +99,56 @@ final class CoreAgentTests: XCTestCase {
XCTAssertEqual(spoken.count, 1)
XCTAssertEqual(spoken.first, "Hi from BMO!")
}

func testAgentRecoversToIdleAfterSTTTimeout() async {
let wake = MockWakeWordService()
let stt = MockSTTService()
let tts = MockTTSService()
let llm = MockLLMService()
let vision = MockVisionService()
await stt.setDelayNanoseconds(300_000_000)

let agent = BMOAgent(
config: AgentConfig(sttTimeoutSeconds: 0.05),
wakeWordService: wake,
sttService: stt,
ttsService: tts,
llmService: llm,
visionService: vision
)

await agent.handleWakeWordEvent()
try? await Task.sleep(nanoseconds: 120_000_000)

let state = await agent.state
XCTAssertEqual(state, .idle)
}

func testStopCancelsInFlightTurn() async {
let wake = MockWakeWordService()
let stt = MockSTTService()
let tts = MockTTSService()
let llm = MockLLMService()
let vision = MockVisionService()
await stt.setDelayNanoseconds(2_000_000_000)

let agent = BMOAgent(
config: AgentConfig(sttTimeoutSeconds: 5.0),
wakeWordService: wake,
sttService: stt,
ttsService: tts,
llmService: llm,
visionService: vision
)

let turn = Task {
await agent.handleWakeWordEvent()
}
try? await Task.sleep(nanoseconds: 150_000_000)
await agent.stop()
_ = await turn.value

let state = await agent.state
XCTAssertEqual(state, .idle)
}
}
4 changes: 4 additions & 0 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ The view model consumes `AsyncStream<AgentEvent>` from `BMOAgent`.
5. Agent enters `speaking`, emits face changes, and plays TTS.
6. Agent returns to `idle`.

Timeout and cancellation guards:
- STT and LLM steps run with explicit timeout wrappers.
- In-flight turn tasks are canceled on `stop()` and the agent force-recovers to `idle`.

## UI and Windowing

- `GlassSurface` provides Tahoe-first liquid-style panels with material fallback.
Expand Down