diff --git a/CHANGELOG.md b/CHANGELOG.md index 673a8e7..323e049 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,3 +14,4 @@ The format is based on Keep a Changelog and this project uses Semantic Versionin - Microphone permission-gated audio startup coordinator for wake-word and STT flow. - Model runtime bootstrap with first-run downloader, hash verification, and persona prompt builder. - Agent loop timeout and cancellation controls with deterministic recovery to `idle`. +- Persona-driven face expression mapping and interruption-safe TTS output behavior. diff --git a/Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift b/Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift index 7782a0a..2927811 100644 --- a/Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift +++ b/Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift @@ -150,28 +150,58 @@ public actor WhisperSpeechToTextService: SpeechToTextService { } } +@MainActor +protocol SpeechSynthesizing: AnyObject { + var isSpeaking: Bool { get } + func speak(_ utterance: AVSpeechUtterance) + func stopSpeaking(at boundary: AVSpeechBoundary) -> Bool +} + +@MainActor +final class SystemSpeechSynthesizerAdapter: SpeechSynthesizing { + private let synthesizer = AVSpeechSynthesizer() + + var isSpeaking: Bool { synthesizer.isSpeaking } + + func speak(_ utterance: AVSpeechUtterance) { + synthesizer.speak(utterance) + } + + func stopSpeaking(at boundary: AVSpeechBoundary) -> Bool { + synthesizer.stopSpeaking(at: boundary) + } +} + @MainActor public final class AVSpeechSynthesizerService: @unchecked Sendable, TextToSpeechService { - private let synthesizer: AVSpeechSynthesizer + private let synthesizer: SpeechSynthesizing + public private(set) var interruptionCount = 0 public init() { - self.synthesizer = AVSpeechSynthesizer() + self.synthesizer = SystemSpeechSynthesizerAdapter() + } + + init(synthesizer: SpeechSynthesizing) { + self.synthesizer = synthesizer } public func speak(_ text: String) async throws { + if synthesizer.isSpeaking { + _ = synthesizer.stopSpeaking(at: .immediate) + interruptionCount += 1 + } + let utterance = AVSpeechUtterance(string: text) utterance.rate = 0.42 - synthesize(utterance) + utterance.pitchMultiplier = 1.12 + utterance.postUtteranceDelay = 0.02 + synthesizer.speak(utterance) // Keep this async call cooperative for testability. try await Task.sleep(nanoseconds: 120_000_000) } public func stop() async { - synthesizer.stopSpeaking(at: .immediate) - } - - private func synthesize(_ utterance: AVSpeechUtterance) { - synthesizer.speak(utterance) + _ = synthesizer.stopSpeaking(at: .immediate) } } diff --git a/Packages/AudioPipeline/Tests/AudioPipelineTests/AudioPipelineTests.swift b/Packages/AudioPipeline/Tests/AudioPipelineTests/AudioPipelineTests.swift index 4565749..00d50c2 100644 --- a/Packages/AudioPipeline/Tests/AudioPipelineTests/AudioPipelineTests.swift +++ b/Packages/AudioPipeline/Tests/AudioPipelineTests/AudioPipelineTests.swift @@ -1,4 +1,5 @@ import Foundation +import AVFoundation import XCTest @testable import AudioPipeline @@ -91,4 +92,51 @@ final class AudioPipelineTests: XCTestCase { XCTFail("Expected permission flow to succeed: \(error)") } } + + @MainActor + func testTTSSpeakInterruptsActiveUtterance() async { + let synth = MockSpeechSynthesizer(initiallySpeaking: true) + let service = AVSpeechSynthesizerService(synthesizer: synth) + + do { + try await service.speak("First interruption test") + } catch { + XCTFail("Unexpected TTS error: \(error)") + } + + XCTAssertEqual(synth.stopCallCount, 1) + XCTAssertEqual(service.interruptionCount, 1) + XCTAssertEqual(synth.speakCallCount, 1) + } + + @MainActor + func testTTSStopCancelsSpeech() async { + let synth = MockSpeechSynthesizer(initiallySpeaking: true) + let service = AVSpeechSynthesizerService(synthesizer: synth) + await service.stop() + XCTAssertEqual(synth.stopCallCount, 1) + XCTAssertFalse(synth.isSpeaking) + } +} + +@MainActor +private final class MockSpeechSynthesizer: SpeechSynthesizing { + var isSpeaking: Bool + private(set) var stopCallCount = 0 + private(set) var speakCallCount = 0 + + init(initiallySpeaking: Bool) { + self.isSpeaking = initiallySpeaking + } + + func speak(_ utterance: AVSpeechUtterance) { + speakCallCount += 1 + isSpeaking = true + } + + func stopSpeaking(at boundary: AVSpeechBoundary) -> Bool { + stopCallCount += 1 + isSpeaking = false + return true + } } diff --git a/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift b/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift index 8b2b06e..d46c97d 100644 --- a/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift +++ b/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift @@ -173,13 +173,7 @@ public actor BMOAgent { } private func expression(for text: String) -> FaceExpression { - if text.contains("!") { - return .excited - } - if text.contains("?") { - return .curious - } - return .speaking + PersonaExpressionMapper.expression(for: text) } private func emitError(_ message: String) { diff --git a/Packages/CoreAgent/Sources/CoreAgent/PersonaExpressionMapper.swift b/Packages/CoreAgent/Sources/CoreAgent/PersonaExpressionMapper.swift new file mode 100644 index 0000000..b947bee --- /dev/null +++ b/Packages/CoreAgent/Sources/CoreAgent/PersonaExpressionMapper.swift @@ -0,0 +1,21 @@ +import Foundation + +public enum PersonaExpressionMapper { + public static func expression(for text: String) -> FaceExpression { + let normalized = text.lowercased() + + if normalized.contains("!") || normalized.contains("awesome") || normalized.contains("great") { + return .excited + } + + if normalized.contains("?") || normalized.contains("maybe") || normalized.contains("wonder") { + return .curious + } + + if normalized.contains("sorry") || normalized.contains("oops") { + return .squint + } + + return .speaking + } +} diff --git a/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift b/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift index eb55095..ff6212c 100644 --- a/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift +++ b/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift @@ -71,6 +71,13 @@ final class CoreAgentTests: XCTestCase { XCTAssertEqual(PromptRouter.route(for: "Tell me a joke"), .text) } + func testPersonaExpressionMapping() { + XCTAssertEqual(PersonaExpressionMapper.expression(for: "Awesome!"), .excited) + XCTAssertEqual(PersonaExpressionMapper.expression(for: "Maybe?"), .curious) + XCTAssertEqual(PersonaExpressionMapper.expression(for: "Sorry"), .squint) + XCTAssertEqual(PersonaExpressionMapper.expression(for: "Okay"), .speaking) + } + func testAgentPipelineWithMocks() async { let wake = MockWakeWordService() let stt = MockSTTService() diff --git a/docs/architecture.md b/docs/architecture.md index f69d7fd..e4706b2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -82,3 +82,4 @@ Timeout and cancellation guards: - `FloatingWindowStyler` configures a borderless, transparent, always-on-top desktop companion window. - `AudioStartupCoordinator` enforces microphone permission before activating wake-word listening. - `ModelStartupCoordinator` performs first-run model download and hash verification before LLM use. +- `AVSpeechSynthesizerService` supports interruption-aware speaking and explicit stop behavior.