Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ The format is based on Keep a Changelog and this project uses Semantic Versionin
- Microphone permission-gated audio startup coordinator for wake-word and STT flow.
- Model runtime bootstrap with first-run downloader, hash verification, and persona prompt builder.
- Agent loop timeout and cancellation controls with deterministic recovery to `idle`.
- Persona-driven face expression mapping and interruption-safe TTS output behavior.
46 changes: 38 additions & 8 deletions Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift
Original file line number Diff line number Diff line change
Expand Up @@ -150,28 +150,58 @@ public actor WhisperSpeechToTextService: SpeechToTextService {
}
}

@MainActor
protocol SpeechSynthesizing: AnyObject {
var isSpeaking: Bool { get }
func speak(_ utterance: AVSpeechUtterance)
func stopSpeaking(at boundary: AVSpeechBoundary) -> Bool
}

@MainActor
final class SystemSpeechSynthesizerAdapter: SpeechSynthesizing {
private let synthesizer = AVSpeechSynthesizer()

var isSpeaking: Bool { synthesizer.isSpeaking }

func speak(_ utterance: AVSpeechUtterance) {
synthesizer.speak(utterance)
}

func stopSpeaking(at boundary: AVSpeechBoundary) -> Bool {
synthesizer.stopSpeaking(at: boundary)
}
}

@MainActor
public final class AVSpeechSynthesizerService: @unchecked Sendable, TextToSpeechService {
private let synthesizer: AVSpeechSynthesizer
private let synthesizer: SpeechSynthesizing
public private(set) var interruptionCount = 0

public init() {
self.synthesizer = AVSpeechSynthesizer()
self.synthesizer = SystemSpeechSynthesizerAdapter()
}

init(synthesizer: SpeechSynthesizing) {
self.synthesizer = synthesizer
}

public func speak(_ text: String) async throws {
if synthesizer.isSpeaking {
_ = synthesizer.stopSpeaking(at: .immediate)
interruptionCount += 1
}

let utterance = AVSpeechUtterance(string: text)
utterance.rate = 0.42
synthesize(utterance)
utterance.pitchMultiplier = 1.12
utterance.postUtteranceDelay = 0.02
synthesizer.speak(utterance)

// Keep this async call cooperative for testability.
try await Task.sleep(nanoseconds: 120_000_000)
}

public func stop() async {
synthesizer.stopSpeaking(at: .immediate)
}

private func synthesize(_ utterance: AVSpeechUtterance) {
synthesizer.speak(utterance)
_ = synthesizer.stopSpeaking(at: .immediate)
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import Foundation
import AVFoundation
import XCTest
@testable import AudioPipeline

Expand Down Expand Up @@ -91,4 +92,51 @@ final class AudioPipelineTests: XCTestCase {
XCTFail("Expected permission flow to succeed: \(error)")
}
}

@MainActor
func testTTSSpeakInterruptsActiveUtterance() async {
let synth = MockSpeechSynthesizer(initiallySpeaking: true)
let service = AVSpeechSynthesizerService(synthesizer: synth)

do {
try await service.speak("First interruption test")
} catch {
XCTFail("Unexpected TTS error: \(error)")
}

XCTAssertEqual(synth.stopCallCount, 1)
XCTAssertEqual(service.interruptionCount, 1)
XCTAssertEqual(synth.speakCallCount, 1)
}

@MainActor
func testTTSStopCancelsSpeech() async {
let synth = MockSpeechSynthesizer(initiallySpeaking: true)
let service = AVSpeechSynthesizerService(synthesizer: synth)
await service.stop()
XCTAssertEqual(synth.stopCallCount, 1)
XCTAssertFalse(synth.isSpeaking)
}
}

@MainActor
private final class MockSpeechSynthesizer: SpeechSynthesizing {
var isSpeaking: Bool
private(set) var stopCallCount = 0
private(set) var speakCallCount = 0

init(initiallySpeaking: Bool) {
self.isSpeaking = initiallySpeaking
}

func speak(_ utterance: AVSpeechUtterance) {
speakCallCount += 1
isSpeaking = true
}

func stopSpeaking(at boundary: AVSpeechBoundary) -> Bool {
stopCallCount += 1
isSpeaking = false
return true
}
}
8 changes: 1 addition & 7 deletions Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,7 @@ public actor BMOAgent {
}

private func expression(for text: String) -> FaceExpression {
if text.contains("!") {
return .excited
}
if text.contains("?") {
return .curious
}
return .speaking
PersonaExpressionMapper.expression(for: text)
}

private func emitError(_ message: String) {
Expand Down
21 changes: 21 additions & 0 deletions Packages/CoreAgent/Sources/CoreAgent/PersonaExpressionMapper.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import Foundation

public enum PersonaExpressionMapper {
public static func expression(for text: String) -> FaceExpression {
let normalized = text.lowercased()

if normalized.contains("!") || normalized.contains("awesome") || normalized.contains("great") {
return .excited
}

if normalized.contains("?") || normalized.contains("maybe") || normalized.contains("wonder") {
return .curious
}

if normalized.contains("sorry") || normalized.contains("oops") {
return .squint
}

return .speaking
}
}
7 changes: 7 additions & 0 deletions Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ final class CoreAgentTests: XCTestCase {
XCTAssertEqual(PromptRouter.route(for: "Tell me a joke"), .text)
}

func testPersonaExpressionMapping() {
XCTAssertEqual(PersonaExpressionMapper.expression(for: "Awesome!"), .excited)
XCTAssertEqual(PersonaExpressionMapper.expression(for: "Maybe?"), .curious)
XCTAssertEqual(PersonaExpressionMapper.expression(for: "Sorry"), .squint)
XCTAssertEqual(PersonaExpressionMapper.expression(for: "Okay"), .speaking)
}

func testAgentPipelineWithMocks() async {
let wake = MockWakeWordService()
let stt = MockSTTService()
Expand Down
1 change: 1 addition & 0 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,4 @@ Timeout and cancellation guards:
- `FloatingWindowStyler` configures a borderless, transparent, always-on-top desktop companion window.
- `AudioStartupCoordinator` enforces microphone permission before activating wake-word listening.
- `ModelStartupCoordinator` performs first-run model download and hash verification before LLM use.
- `AVSpeechSynthesizerService` supports interruption-aware speaking and explicit stop behavior.