diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index cbd4847..890c27b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,7 +7,7 @@ - [ ] `./scripts/lint.sh` - [ ] `swift build --package-path KAMIBotApp` -- [ ] `swift test --package-path KAMIBotApp` +- [ ] `./scripts/test.sh` ## Checklist diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01c10d8..9e605f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,8 +42,4 @@ jobs: - uses: actions/checkout@v4 - name: Test run: | - if [ -d KAMIBotApp ]; then - swift test --package-path KAMIBotApp - else - echo "KAMIBotApp package not scaffolded yet" - fi + ./scripts/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b4e650..4e799df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,3 +8,5 @@ The format is based on Keep a Changelog and this project uses Semantic Versionin ### Added - Initial repository governance, licensing, and CI baseline. +- Modular Swift package scaffold for app, agent, audio, model, UI, and vision layers. +- Baseline local test harness via `scripts/test.sh` running package `xcodebuild` tests. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ac57212..5eb51b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,7 +28,7 @@ Why this change was needed and what behavior changed. ```bash ./scripts/lint.sh swift build --package-path KAMIBotApp -swift test --package-path KAMIBotApp +./scripts/test.sh ``` ## Code of Conduct diff --git a/KAMIBotApp/Package.swift b/KAMIBotApp/Package.swift new file mode 100644 index 0000000..d298ca8 --- /dev/null +++ b/KAMIBotApp/Package.swift @@ -0,0 +1,33 @@ +// swift-tools-version: 5.10 +import PackageDescription + +let package = Package( + name: "KAMIBotApp", + platforms: [.macOS(.v14)], + products: [ + .executable(name: "KAMIBotApp", targets: ["KAMIBotApp"]) + ], + dependencies: [ + .package(path: "../Packages/CoreAgent"), + .package(path: "../Packages/AudioPipeline"), + .package(path: "../Packages/ModelRuntime"), + .package(path: "../Packages/UIComponents"), + .package(path: "../Packages/VisionPipeline") + ], + targets: [ + .executableTarget( + name: "KAMIBotApp", + dependencies: [ + "CoreAgent", + "AudioPipeline", + "ModelRuntime", + "UIComponents", + "VisionPipeline" + ] + ), + .testTarget( + name: "KAMIBotAppTests", + dependencies: ["KAMIBotApp"] + ) + ] +) diff --git a/KAMIBotApp/Sources/KAMIBotApp/AppContainer.swift b/KAMIBotApp/Sources/KAMIBotApp/AppContainer.swift new file mode 100644 index 0000000..5ea3e82 --- /dev/null +++ b/KAMIBotApp/Sources/KAMIBotApp/AppContainer.swift @@ -0,0 +1,31 @@ +import AudioPipeline +import CoreAgent +import Foundation +import ModelRuntime +import VisionPipeline + +@MainActor +struct AppContainer { + let agent: BMOAgent + + init(config: AgentConfig = AgentConfig()) { + let wakeWord = PorcupineWakeWordService(keyword: config.wakeWord) + let stt = WhisperSpeechToTextService() + let tts = AVSpeechSynthesizerService() + + let modelStore = URL(fileURLWithPath: FileManager.default.currentDirectoryPath) + .appendingPathComponent("models", isDirectory: true) + let llm = MLXLLMService(modelID: config.llmModelID, modelStore: modelStore) + + let vision = SnapshotVisionService(enabled: config.visionEnabled) + + self.agent = BMOAgent( + config: config, + wakeWordService: wakeWord, + sttService: stt, + ttsService: tts, + llmService: llm, + visionService: vision + ) + } +} diff --git a/KAMIBotApp/Sources/KAMIBotApp/BMOViewModel.swift b/KAMIBotApp/Sources/KAMIBotApp/BMOViewModel.swift new file mode 100644 index 0000000..0fa14ca --- /dev/null +++ b/KAMIBotApp/Sources/KAMIBotApp/BMOViewModel.swift @@ -0,0 +1,45 @@ +import CoreAgent +import Observation + +@MainActor +@Observable +final class BMOViewModel { + private let agent: BMOAgent + private var streamTask: Task? + + var state: BMOState = .idle + var expression: FaceExpression = .happy + var transcript: [String] = [] + + init(agent: BMOAgent) { + self.agent = agent + } + + func start() { + streamTask = Task { + await agent.start() + for await event in agent.eventStream() { + switch event { + case .stateChanged(let state): + self.state = state + case .faceChanged(let expression): + self.expression = expression + case .heardUtterance(let utterance): + transcript.append("You: \(utterance)") + case .generatedResponse(let response): + transcript.append("BMO: \(response)") + case .error(let message): + transcript.append("Error: \(message)") + } + } + } + } + + func stop() { + streamTask?.cancel() + streamTask = nil + Task { + await agent.stop() + } + } +} diff --git a/KAMIBotApp/Sources/KAMIBotApp/ContentView.swift b/KAMIBotApp/Sources/KAMIBotApp/ContentView.swift new file mode 100644 index 0000000..3c1cb1f --- /dev/null +++ b/KAMIBotApp/Sources/KAMIBotApp/ContentView.swift @@ -0,0 +1,35 @@ +import CoreAgent +import SwiftUI +import UIComponents + +struct ContentView: View { + @Bindable var viewModel: BMOViewModel + + var body: some View { + VStack(spacing: 14) { + BMOFaceView(expression: viewModel.expression, state: viewModel.state) + + Text("State: \(viewModel.state.rawValue.capitalized)") + .font(.headline) + + VStack(alignment: .leading, spacing: 6) { + ForEach(Array(viewModel.transcript.suffix(4).enumerated()), id: \.offset) { _, line in + Text(line) + .font(.caption) + .lineLimit(2) + } + } + .frame(maxWidth: 280, alignment: .leading) + + HStack { + Button("Start") { + viewModel.start() + } + Button("Stop") { + viewModel.stop() + } + } + } + .padding(20) + } +} diff --git a/KAMIBotApp/Sources/KAMIBotApp/KAMIBotApp.swift b/KAMIBotApp/Sources/KAMIBotApp/KAMIBotApp.swift new file mode 100644 index 0000000..73589d9 --- /dev/null +++ b/KAMIBotApp/Sources/KAMIBotApp/KAMIBotApp.swift @@ -0,0 +1,14 @@ +import SwiftUI + +@main +struct KAMIBotApp: App { + @State private var viewModel = BMOViewModel(agent: AppContainer().agent) + + var body: some Scene { + WindowGroup("KAMI BOT") { + ContentView(viewModel: viewModel) + .frame(minWidth: 320, minHeight: 420) + } + .defaultSize(width: 360, height: 460) + } +} diff --git a/KAMIBotApp/Tests/KAMIBotAppTests/KAMIBotAppTests.swift b/KAMIBotApp/Tests/KAMIBotAppTests/KAMIBotAppTests.swift new file mode 100644 index 0000000..e57eabf --- /dev/null +++ b/KAMIBotApp/Tests/KAMIBotAppTests/KAMIBotAppTests.swift @@ -0,0 +1,10 @@ +import XCTest +@testable import KAMIBotApp + +@MainActor +final class KAMIBotAppTests: XCTestCase { + func testContainerBuildsAgent() { + let container = AppContainer() + _ = container.agent + } +} diff --git a/Packages/AudioPipeline/Package.swift b/Packages/AudioPipeline/Package.swift new file mode 100644 index 0000000..53c0d31 --- /dev/null +++ b/Packages/AudioPipeline/Package.swift @@ -0,0 +1,20 @@ +// swift-tools-version: 5.10 +import PackageDescription + +let package = Package( + name: "AudioPipeline", + platforms: [.macOS(.v14)], + products: [ + .library(name: "AudioPipeline", targets: ["AudioPipeline"]) + ], + dependencies: [ + .package(path: "../CoreAgent") + ], + targets: [ + .target(name: "AudioPipeline", dependencies: ["CoreAgent"]), + .testTarget( + name: "AudioPipelineTests", + dependencies: ["AudioPipeline"] + ) + ] +) diff --git a/Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift b/Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift new file mode 100644 index 0000000..2c6f9d6 --- /dev/null +++ b/Packages/AudioPipeline/Sources/AudioPipeline/AudioServices.swift @@ -0,0 +1,131 @@ +import AVFoundation +import CoreAgent +import Foundation + +public enum AudioPipelineError: Error, Equatable { + case timeout + case maxRetriesExceeded + case microphoneDenied +} + +public actor PorcupineWakeWordService: WakeWordService { + private let keyword: String + private let debounceSeconds: TimeInterval + private var isRunning = false + private var lastDetection: Date? + private let stream: AsyncStream + private let continuation: AsyncStream.Continuation + + public init(keyword: String, debounceSeconds: TimeInterval = 0.8) { + self.keyword = keyword + self.debounceSeconds = debounceSeconds + + var localContinuation: AsyncStream.Continuation? + self.stream = AsyncStream { continuation in + localContinuation = continuation + } + self.continuation = localContinuation! + } + + public func start() async throws { + isRunning = true + } + + public func stop() async { + isRunning = false + } + + public func events() async -> AsyncStream { + stream + } + + public func emitDetection(now: Date = Date()) { + guard isRunning else { + return + } + + if let lastDetection, now.timeIntervalSince(lastDetection) < debounceSeconds { + return + } + + lastDetection = now + continuation.yield(WakeWordEvent(keyword: keyword, detectedAt: now)) + } +} + +public final class MicrophonePermissionManager: @unchecked Sendable { + public init() {} + + public func requestPermission() async -> Bool { + await withCheckedContinuation { continuation in + AVCaptureDevice.requestAccess(for: .audio) { granted in + continuation.resume(returning: granted) + } + } + } + + public func hasPermission() -> Bool { + AVCaptureDevice.authorizationStatus(for: .audio) == .authorized + } +} + +public actor WhisperSpeechToTextService: SpeechToTextService { + private var mockQueue: [String] + + public init(initialMockQueue: [String] = []) { + self.mockQueue = initialMockQueue + } + + public func enqueueMockTranscription(_ value: String) { + mockQueue.append(value) + } + + public func transcribeNextUtterance(timeout: TimeInterval) async throws -> String { + let deadline = Date().addingTimeInterval(timeout) + while Date() < deadline { + if !mockQueue.isEmpty { + return mockQueue.removeFirst() + } + try await Task.sleep(nanoseconds: 50_000_000) + } + throw AudioPipelineError.timeout + } + + public func transcribeWithRetry(timeout: TimeInterval, retries: Int) async throws -> String { + var attempts = 0 + while attempts <= retries { + do { + return try await transcribeNextUtterance(timeout: timeout) + } catch AudioPipelineError.timeout { + attempts += 1 + } + } + throw AudioPipelineError.maxRetriesExceeded + } +} + +@MainActor +public final class AVSpeechSynthesizerService: @unchecked Sendable, TextToSpeechService { + private let synthesizer: AVSpeechSynthesizer + + public init() { + self.synthesizer = AVSpeechSynthesizer() + } + + public func speak(_ text: String) async throws { + let utterance = AVSpeechUtterance(string: text) + utterance.rate = 0.42 + synthesize(utterance) + + // Keep this async call cooperative for testability. + try await Task.sleep(nanoseconds: 120_000_000) + } + + public func stop() async { + synthesizer.stopSpeaking(at: .immediate) + } + + private func synthesize(_ utterance: AVSpeechUtterance) { + synthesizer.speak(utterance) + } +} diff --git a/Packages/AudioPipeline/Tests/AudioPipelineTests/AudioPipelineTests.swift b/Packages/AudioPipeline/Tests/AudioPipelineTests/AudioPipelineTests.swift new file mode 100644 index 0000000..7534eb2 --- /dev/null +++ b/Packages/AudioPipeline/Tests/AudioPipelineTests/AudioPipelineTests.swift @@ -0,0 +1,51 @@ +import Foundation +import XCTest +@testable import AudioPipeline + +final class AudioPipelineTests: XCTestCase { + func testWakeWordDebounceSuppressesDuplicates() async { + let service = PorcupineWakeWordService(keyword: "BMO", debounceSeconds: 1.0) + try? await service.start() + + let stream = await service.events() + let task = Task<[String], Never> { + var output: [String] = [] + for await event in stream { + output.append(event.keyword) + if output.count == 2 { + break + } + } + return output + } + + let t0 = Date() + await service.emitDetection(now: t0) + await service.emitDetection(now: t0.addingTimeInterval(0.2)) + await service.emitDetection(now: t0.addingTimeInterval(1.2)) + + let result = await task.value + XCTAssertEqual(result.count, 2) + } + + func testSTTTimeoutAndRetryPolicy() async { + let service = WhisperSpeechToTextService() + + do { + _ = try await service.transcribeWithRetry(timeout: 0.05, retries: 1) + XCTFail("Expected maxRetriesExceeded") + } catch AudioPipelineError.maxRetriesExceeded { + // expected + } catch { + XCTFail("Unexpected error: \(error)") + } + + await service.enqueueMockTranscription("hello bmo") + do { + let transcription = try await service.transcribeWithRetry(timeout: 0.2, retries: 1) + XCTAssertEqual(transcription, "hello bmo") + } catch { + XCTFail("Unexpected retry failure: \(error)") + } + } +} diff --git a/Packages/CoreAgent/Package.swift b/Packages/CoreAgent/Package.swift new file mode 100644 index 0000000..69ee570 --- /dev/null +++ b/Packages/CoreAgent/Package.swift @@ -0,0 +1,18 @@ +// swift-tools-version: 5.10 +import PackageDescription + +let package = Package( + name: "CoreAgent", + platforms: [.macOS(.v14)], + products: [ + .library(name: "CoreAgent", targets: ["CoreAgent"]) + ], + dependencies: [], + targets: [ + .target(name: "CoreAgent"), + .testTarget( + name: "CoreAgentTests", + dependencies: ["CoreAgent"] + ) + ] +) diff --git a/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift b/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift new file mode 100644 index 0000000..863ef41 --- /dev/null +++ b/Packages/CoreAgent/Sources/CoreAgent/BMOAgent.swift @@ -0,0 +1,195 @@ +import Foundation + +public enum AgentError: Error, Equatable { + case invalidTransition(from: BMOState, to: BMOState) + case unavailable(String) +} + +public actor BMOAgent { + public private(set) var state: BMOState = .idle + public private(set) var faceExpression: FaceExpression = .happy + + private let config: AgentConfig + private let wakeWordService: WakeWordService + private let sttService: SpeechToTextService + private let ttsService: TextToSpeechService + private let llmService: LLMService + private let visionService: VisionService? + + private var wakeTask: Task? + private let stream: AsyncStream + private let continuation: AsyncStream.Continuation + + public init( + config: AgentConfig, + wakeWordService: WakeWordService, + sttService: SpeechToTextService, + ttsService: TextToSpeechService, + llmService: LLMService, + visionService: VisionService? + ) { + self.config = config + self.wakeWordService = wakeWordService + self.sttService = sttService + self.ttsService = ttsService + self.llmService = llmService + self.visionService = visionService + + var localContinuation: AsyncStream.Continuation? + self.stream = AsyncStream { continuation in + localContinuation = continuation + } + self.continuation = localContinuation! + } + + deinit { + wakeTask?.cancel() + continuation.finish() + } + + public nonisolated func eventStream() -> AsyncStream { + stream + } + + public func start() async { + do { + try await wakeWordService.start() + wakeTask = Task { + let events = await wakeWordService.events() + for await event in events { + if Task.isCancelled { + break + } + if event.keyword.caseInsensitiveCompare(config.wakeWord) == .orderedSame { + await self.handleWakeWordEvent() + } + } + } + } catch { + emitError("Wake word service failed to start: \(error.localizedDescription)") + } + } + + public func stop() async { + wakeTask?.cancel() + wakeTask = nil + await wakeWordService.stop() + await ttsService.stop() + do { + try transition(to: .idle) + } catch { + emitError("Stop failed: \(error.localizedDescription)") + } + } + + public func handleWakeWordEvent() async { + do { + try transition(to: .listening) + let utterance = try await sttService.transcribeNextUtterance(timeout: config.sttTimeoutSeconds) + continuation.yield(.heardUtterance(utterance)) + await handleUserUtterance(utterance) + } catch { + emitError("Transcription failed: \(error.localizedDescription)") + do { + try transition(to: .idle) + } catch { + emitError("State reset failed: \(error.localizedDescription)") + } + } + } + + public func handleUserUtterance(_ utterance: String) async { + do { + try transition(to: .thinking) + + let route = PromptRouter.route(for: utterance) + let visionContext: VisionContext? + + if route == .vision && config.visionEnabled { + guard let visionService else { + throw AgentError.unavailable("Vision requested but service is not configured") + } + visionContext = try await visionService.captureSnapshotDescription() + } else { + visionContext = nil + } + + let response = try await llmService.generateResponse( + prompt: utterance, + systemPrompt: "You are BMO, an upbeat and helpful desktop companion.", + context: visionContext + ) + + continuation.yield(.generatedResponse(response)) + await speak(response) + } catch { + emitError("Agent processing failed: \(error.localizedDescription)") + do { + try transition(to: .idle) + } catch { + emitError("State reset failed: \(error.localizedDescription)") + } + } + } + + public func speak(_ text: String) async { + do { + try transition(to: .speaking) + let expression = expression(for: text) + faceExpression = expression + continuation.yield(.faceChanged(expression)) + try await ttsService.speak(text) + faceExpression = .happy + continuation.yield(.faceChanged(.happy)) + try transition(to: .idle) + } catch { + emitError("TTS failed: \(error.localizedDescription)") + do { + try transition(to: .idle) + } catch { + emitError("State reset failed: \(error.localizedDescription)") + } + } + } + + public static func isValidTransition(from: BMOState, to: BMOState) -> Bool { + switch (from, to) { + case (.idle, .listening), (.idle, .error): + true + case (.listening, .thinking), (.listening, .idle), (.listening, .error): + true + case (.thinking, .speaking), (.thinking, .idle), (.thinking, .error): + true + case (.speaking, .idle), (.speaking, .error): + true + case (.error, .idle): + true + case (let lhs, let rhs): + lhs == rhs + } + } + + private func transition(to next: BMOState) throws { + guard Self.isValidTransition(from: state, to: next) else { + throw AgentError.invalidTransition(from: state, to: next) + } + state = next + continuation.yield(.stateChanged(next)) + } + + private func expression(for text: String) -> FaceExpression { + if text.contains("!") { + return .excited + } + if text.contains("?") { + return .curious + } + return .speaking + } + + private func emitError(_ message: String) { + state = .error + continuation.yield(.stateChanged(.error)) + continuation.yield(.error(message)) + } +} diff --git a/Packages/CoreAgent/Sources/CoreAgent/CoreTypes.swift b/Packages/CoreAgent/Sources/CoreAgent/CoreTypes.swift new file mode 100644 index 0000000..a6c0741 --- /dev/null +++ b/Packages/CoreAgent/Sources/CoreAgent/CoreTypes.swift @@ -0,0 +1,87 @@ +import Foundation + +public enum BMOState: String, Codable, Sendable { + case idle + case listening + case thinking + case speaking + case error +} + +public enum FaceExpression: String, Codable, Sendable { + case happy + case neutral + case curious + case excited + case squint + case speaking +} + +public struct AgentConfig: Codable, Sendable { + public var wakeWord: String + public var llmModelID: String + public var visionModelID: String + public var sttTimeoutSeconds: Double + public var llmTimeoutSeconds: Double + public var telemetryEnabled: Bool + public var visionEnabled: Bool + + public init( + wakeWord: String = "BMO", + llmModelID: String = "llama-3.1-8b-4bit", + visionModelID: String = "moondream", + sttTimeoutSeconds: Double = 8.0, + llmTimeoutSeconds: Double = 25.0, + telemetryEnabled: Bool = false, + visionEnabled: Bool = false + ) { + self.wakeWord = wakeWord + self.llmModelID = llmModelID + self.visionModelID = visionModelID + self.sttTimeoutSeconds = sttTimeoutSeconds + self.llmTimeoutSeconds = llmTimeoutSeconds + self.telemetryEnabled = telemetryEnabled + self.visionEnabled = visionEnabled + } +} + +public struct WakeWordEvent: Sendable { + public var keyword: String + public var detectedAt: Date + + public init(keyword: String, detectedAt: Date = Date()) { + self.keyword = keyword + self.detectedAt = detectedAt + } +} + +public struct VisionContext: Sendable { + public var summary: String + public var capturedAt: Date + + public init(summary: String, capturedAt: Date = Date()) { + self.summary = summary + self.capturedAt = capturedAt + } +} + +public enum AgentEvent: Sendable { + case stateChanged(BMOState) + case faceChanged(FaceExpression) + case heardUtterance(String) + case generatedResponse(String) + case error(String) +} + +public enum PromptRoute: String, Sendable { + case text + case vision +} + +public enum PromptRouter { + public static func route(for utterance: String) -> PromptRoute { + let lowered = utterance.lowercased() + let visionTokens = ["look", "see", "what do you see", "show", "camera", "snapshot", "vision"] + return visionTokens.contains(where: { lowered.contains($0) }) ? .vision : .text + } +} diff --git a/Packages/CoreAgent/Sources/CoreAgent/Services.swift b/Packages/CoreAgent/Sources/CoreAgent/Services.swift new file mode 100644 index 0000000..4e2d84c --- /dev/null +++ b/Packages/CoreAgent/Sources/CoreAgent/Services.swift @@ -0,0 +1,28 @@ +import Foundation + +public protocol WakeWordService: Sendable { + func start() async throws + func stop() async + func events() async -> AsyncStream +} + +public protocol SpeechToTextService: Sendable { + func transcribeNextUtterance(timeout: TimeInterval) async throws -> String +} + +public protocol TextToSpeechService: Sendable { + func speak(_ text: String) async throws + func stop() async +} + +public protocol LLMService: Sendable { + func generateResponse( + prompt: String, + systemPrompt: String, + context: VisionContext? + ) async throws -> String +} + +public protocol VisionService: Sendable { + func captureSnapshotDescription() async throws -> VisionContext +} diff --git a/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift b/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift new file mode 100644 index 0000000..1534795 --- /dev/null +++ b/Packages/CoreAgent/Tests/CoreAgentTests/CoreAgentTests.swift @@ -0,0 +1,86 @@ +import Foundation +import XCTest +@testable import CoreAgent + +private actor MockWakeWordService: WakeWordService { + private var continuation: AsyncStream.Continuation? + private lazy var stream: AsyncStream = { + AsyncStream { continuation in + self.continuation = continuation + } + }() + + func start() async throws {} + func stop() async {} + func events() async -> AsyncStream { stream } + + func emit(keyword: String) { + continuation?.yield(WakeWordEvent(keyword: keyword)) + } +} + +private actor MockSTTService: SpeechToTextService { + var value: String = "hello" + func transcribeNextUtterance(timeout: TimeInterval) async throws -> String { value } +} + +private actor MockTTSService: TextToSpeechService { + private(set) var spoken: [String] = [] + func speak(_ text: String) async throws { spoken.append(text) } + func stop() async {} +} + +private actor MockLLMService: LLMService { + private(set) var requests: [String] = [] + func generateResponse(prompt: String, systemPrompt: String, context: VisionContext?) async throws -> String { + requests.append(prompt) + return "Hi from BMO!" + } +} + +private actor MockVisionService: VisionService { + func captureSnapshotDescription() async throws -> VisionContext { + VisionContext(summary: "A desk with a keyboard") + } +} + +final class CoreAgentTests: XCTestCase { + func testStateTransitionValidationRejectsInvalidPath() { + XCTAssertFalse(BMOAgent.isValidTransition(from: .idle, to: .thinking)) + XCTAssertTrue(BMOAgent.isValidTransition(from: .idle, to: .listening)) + } + + func testPromptRouterRoutesVisionRequests() { + XCTAssertEqual(PromptRouter.route(for: "What do you see right now?"), .vision) + XCTAssertEqual(PromptRouter.route(for: "Tell me a joke"), .text) + } + + func testAgentPipelineWithMocks() async { + let wake = MockWakeWordService() + let stt = MockSTTService() + let tts = MockTTSService() + let llm = MockLLMService() + let vision = MockVisionService() + + let agent = BMOAgent( + config: AgentConfig(visionEnabled: true), + wakeWordService: wake, + sttService: stt, + ttsService: tts, + llmService: llm, + visionService: vision + ) + + await agent.start() + try? await Task.sleep(nanoseconds: 100_000_000) + await wake.emit(keyword: "BMO") + try? await Task.sleep(nanoseconds: 450_000_000) + + let state = await agent.state + XCTAssertEqual(state, .idle) + + let spoken = await tts.spoken + XCTAssertEqual(spoken.count, 1) + XCTAssertEqual(spoken.first, "Hi from BMO!") + } +} diff --git a/Packages/ModelRuntime/Package.swift b/Packages/ModelRuntime/Package.swift new file mode 100644 index 0000000..297894f --- /dev/null +++ b/Packages/ModelRuntime/Package.swift @@ -0,0 +1,20 @@ +// swift-tools-version: 5.10 +import PackageDescription + +let package = Package( + name: "ModelRuntime", + platforms: [.macOS(.v14)], + products: [ + .library(name: "ModelRuntime", targets: ["ModelRuntime"]) + ], + dependencies: [ + .package(path: "../CoreAgent") + ], + targets: [ + .target(name: "ModelRuntime", dependencies: ["CoreAgent"]), + .testTarget( + name: "ModelRuntimeTests", + dependencies: ["ModelRuntime"] + ) + ] +) diff --git a/Packages/ModelRuntime/Sources/ModelRuntime/ModelRuntime.swift b/Packages/ModelRuntime/Sources/ModelRuntime/ModelRuntime.swift new file mode 100644 index 0000000..235a029 --- /dev/null +++ b/Packages/ModelRuntime/Sources/ModelRuntime/ModelRuntime.swift @@ -0,0 +1,90 @@ +import CoreAgent +import CryptoKit +import Foundation + +public struct ModelDescriptor: Sendable, Codable { + public var id: String + public var url: URL + public var sha256: String + public var license: String + + public init(id: String, url: URL, sha256: String, license: String) { + self.id = id + self.url = url + self.sha256 = sha256 + self.license = license + } +} + +public enum ModelRuntimeError: Error, Equatable { + case modelNotFound(String) + case downloadFailed(String) + case hashMismatch(expected: String, got: String) +} + +public actor ModelDownloader { + public let baseDirectory: URL + + public init(baseDirectory: URL) { + self.baseDirectory = baseDirectory + } + + public func ensureModelAvailable(_ descriptor: ModelDescriptor) async throws -> URL { + let destination = baseDirectory.appendingPathComponent(descriptor.id) + + if FileManager.default.fileExists(atPath: destination.path()) { + return destination + } + + try FileManager.default.createDirectory(at: baseDirectory, withIntermediateDirectories: true) + + do { + let (data, _) = try await URLSession.shared.data(from: descriptor.url) + let digest = SHA256.hash(data: data).map { String(format: "%02x", $0) }.joined() + guard digest == descriptor.sha256 else { + throw ModelRuntimeError.hashMismatch(expected: descriptor.sha256, got: digest) + } + try data.write(to: destination) + return destination + } catch let error as ModelRuntimeError { + throw error + } catch { + throw ModelRuntimeError.downloadFailed(error.localizedDescription) + } + } +} + +public actor MLXLLMService: LLMService { + private let modelID: String + private let modelStore: URL + private(set) var loadedModelPath: URL? + + public init(modelID: String, modelStore: URL) { + self.modelID = modelID + self.modelStore = modelStore + } + + public func loadIfNeeded() throws { + let candidate = modelStore.appendingPathComponent(modelID) + guard FileManager.default.fileExists(atPath: candidate.path()) else { + throw ModelRuntimeError.modelNotFound(modelID) + } + loadedModelPath = candidate + } + + public func generateResponse( + prompt: String, + systemPrompt: String, + context: VisionContext? + ) async throws -> String { + if loadedModelPath == nil { + try loadIfNeeded() + } + + let prefix = "[BMO]" + if let context { + return "\(prefix) I can see \(context.summary). You said: \(prompt)" + } + return "\(prefix) You said: \(prompt)" + } +} diff --git a/Packages/ModelRuntime/Tests/ModelRuntimeTests/ModelRuntimeTests.swift b/Packages/ModelRuntime/Tests/ModelRuntimeTests/ModelRuntimeTests.swift new file mode 100644 index 0000000..e2d697e --- /dev/null +++ b/Packages/ModelRuntime/Tests/ModelRuntimeTests/ModelRuntimeTests.swift @@ -0,0 +1,40 @@ +import Foundation +import XCTest +@testable import ModelRuntime + +final class ModelRuntimeTests: XCTestCase { + func testModelNotFound() async { + let tmp = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("kami-model-tests-\(UUID().uuidString)") + let service = MLXLLMService(modelID: "missing-model", modelStore: tmp) + + do { + _ = try await service.generateResponse(prompt: "hello", systemPrompt: "sys", context: nil) + XCTFail("Expected modelNotFound") + } catch ModelRuntimeError.modelNotFound(let id) { + XCTAssertEqual(id, "missing-model") + } catch { + XCTFail("Unexpected error: \(error)") + } + } + + func testDownloadFailure() async { + let tmp = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("kami-download-tests-\(UUID().uuidString)") + let downloader = ModelDownloader(baseDirectory: tmp) + + let descriptor = ModelDescriptor( + id: "llama-3.1-8b-4bit", + url: URL(string: "https://invalid.invalid/not-found.bin")!, + sha256: "deadbeef", + license: "custom" + ) + + do { + _ = try await downloader.ensureModelAvailable(descriptor) + XCTFail("Expected downloadFailed") + } catch ModelRuntimeError.downloadFailed { + // expected + } catch { + XCTFail("Unexpected error: \(error)") + } + } +} diff --git a/Packages/UIComponents/Package.swift b/Packages/UIComponents/Package.swift new file mode 100644 index 0000000..323edeb --- /dev/null +++ b/Packages/UIComponents/Package.swift @@ -0,0 +1,20 @@ +// swift-tools-version: 5.10 +import PackageDescription + +let package = Package( + name: "UIComponents", + platforms: [.macOS(.v14)], + products: [ + .library(name: "UIComponents", targets: ["UIComponents"]) + ], + dependencies: [ + .package(path: "../CoreAgent") + ], + targets: [ + .target(name: "UIComponents", dependencies: ["CoreAgent"]), + .testTarget( + name: "UIComponentsTests", + dependencies: ["UIComponents"] + ) + ] +) diff --git a/Packages/UIComponents/Sources/UIComponents/BMOFaceView.swift b/Packages/UIComponents/Sources/UIComponents/BMOFaceView.swift new file mode 100644 index 0000000..d10289b --- /dev/null +++ b/Packages/UIComponents/Sources/UIComponents/BMOFaceView.swift @@ -0,0 +1,76 @@ +import CoreAgent +import SwiftUI + +public struct BMOFaceView: View { + public let expression: FaceExpression + public let state: BMOState + + @Namespace private var faceNamespace + + public init(expression: FaceExpression, state: BMOState) { + self.expression = expression + self.state = state + } + + public var body: some View { + VStack(spacing: 16) { + HStack(spacing: 24) { + eyeView(left: true) + eyeView(left: false) + } + mouthView + } + .padding(28) + .frame(width: 220, height: 220) + .background( + RoundedRectangle(cornerRadius: 28, style: .continuous) + .fill(.ultraThinMaterial) + ) + .overlay( + RoundedRectangle(cornerRadius: 28, style: .continuous) + .strokeBorder(.white.opacity(0.18), lineWidth: 1) + ) + .animation(.spring(response: 0.35, dampingFraction: 0.72), value: expression) + .animation(.easeInOut(duration: 0.2), value: state) + } + + @ViewBuilder + private func eyeView(left: Bool) -> some View { + switch expression { + case .squint: + Capsule() + .fill(.white) + .frame(width: 36, height: 8) + .matchedGeometryEffect(id: left ? "left-eye" : "right-eye", in: faceNamespace) + default: + RoundedRectangle(cornerRadius: 6, style: .continuous) + .fill(.white) + .frame(width: 30, height: state == .listening ? 38 : 30) + .matchedGeometryEffect(id: left ? "left-eye" : "right-eye", in: faceNamespace) + } + } + + private var mouthView: some View { + Group { + switch expression { + case .excited: + RoundedRectangle(cornerRadius: 10, style: .continuous) + .fill(.white) + .frame(width: 66, height: 18) + case .speaking: + Capsule() + .fill(.white) + .frame(width: 52, height: 14) + case .curious: + Circle() + .stroke(.white, lineWidth: 4) + .frame(width: 22, height: 22) + default: + Capsule() + .fill(.white.opacity(0.85)) + .frame(width: 56, height: 8) + } + } + .matchedGeometryEffect(id: "mouth", in: faceNamespace) + } +} diff --git a/Packages/UIComponents/Tests/UIComponentsTests/UIComponentsTests.swift b/Packages/UIComponents/Tests/UIComponentsTests/UIComponentsTests.swift new file mode 100644 index 0000000..eb6d532 --- /dev/null +++ b/Packages/UIComponents/Tests/UIComponentsTests/UIComponentsTests.swift @@ -0,0 +1,10 @@ +import XCTest +import CoreAgent +@testable import UIComponents + +final class UIComponentsTests: XCTestCase { + func testFaceViewInit() { + XCTAssertNotNil(BMOFaceView(expression: .happy, state: .idle)) + XCTAssertNotNil(BMOFaceView(expression: .excited, state: .speaking)) + } +} diff --git a/Packages/VisionPipeline/Package.swift b/Packages/VisionPipeline/Package.swift new file mode 100644 index 0000000..2549ceb --- /dev/null +++ b/Packages/VisionPipeline/Package.swift @@ -0,0 +1,20 @@ +// swift-tools-version: 5.10 +import PackageDescription + +let package = Package( + name: "VisionPipeline", + platforms: [.macOS(.v14)], + products: [ + .library(name: "VisionPipeline", targets: ["VisionPipeline"]) + ], + dependencies: [ + .package(path: "../CoreAgent") + ], + targets: [ + .target(name: "VisionPipeline", dependencies: ["CoreAgent"]), + .testTarget( + name: "VisionPipelineTests", + dependencies: ["VisionPipeline"] + ) + ] +) diff --git a/Packages/VisionPipeline/Sources/VisionPipeline/VisionServices.swift b/Packages/VisionPipeline/Sources/VisionPipeline/VisionServices.swift new file mode 100644 index 0000000..12186bb --- /dev/null +++ b/Packages/VisionPipeline/Sources/VisionPipeline/VisionServices.swift @@ -0,0 +1,31 @@ +import CoreAgent +import Foundation + +public enum VisionPipelineError: Error, Equatable { + case disabled + case captureUnavailable +} + +public actor SnapshotVisionService: VisionService { + private let enabled: Bool + private var queuedSnapshot: VisionContext? + + public init(enabled: Bool = false) { + self.enabled = enabled + } + + public func queueSnapshotSummary(_ summary: String) { + queuedSnapshot = VisionContext(summary: summary) + } + + public func captureSnapshotDescription() async throws -> VisionContext { + guard enabled else { + throw VisionPipelineError.disabled + } + if let queuedSnapshot { + self.queuedSnapshot = nil + return queuedSnapshot + } + throw VisionPipelineError.captureUnavailable + } +} diff --git a/Packages/VisionPipeline/Tests/VisionPipelineTests/VisionPipelineTests.swift b/Packages/VisionPipeline/Tests/VisionPipelineTests/VisionPipelineTests.swift new file mode 100644 index 0000000..c2d6013 --- /dev/null +++ b/Packages/VisionPipeline/Tests/VisionPipelineTests/VisionPipelineTests.swift @@ -0,0 +1,25 @@ +import XCTest +@testable import VisionPipeline + +final class VisionPipelineTests: XCTestCase { + func testVisionFeatureFlag() async { + let disabled = SnapshotVisionService(enabled: false) + do { + _ = try await disabled.captureSnapshotDescription() + XCTFail("Expected disabled error") + } catch VisionPipelineError.disabled { + // expected + } catch { + XCTFail("Unexpected error: \(error)") + } + + let enabled = SnapshotVisionService(enabled: true) + await enabled.queueSnapshotSummary("A monitor and a cup") + do { + let context = try await enabled.captureSnapshotDescription() + XCTAssertEqual(context.summary, "A monitor and a cup") + } catch { + XCTFail("Unexpected error: \(error)") + } + } +} diff --git a/README.md b/README.md index aed77f1..59efb22 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ swift build --package-path KAMIBotApp ### Test ```bash -swift test --package-path KAMIBotApp +./scripts/test.sh ``` ## Model and License Policy diff --git a/scripts/lint.sh b/scripts/lint.sh index 4bd2927..45d334a 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -4,20 +4,27 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$ROOT_DIR" -files=$(find . -type f \( -name "*.swift" -o -name "*.md" -o -name "*.yml" -o -name "*.yaml" -o -name "*.sh" \) \ - -not -path "./.git/*") +files=() +while IFS= read -r -d '' file; do + files+=("$file") +done < <(find . -type f \ + \( -name "*.swift" -o -name "*.md" -o -name "*.yml" -o -name "*.yaml" -o -name "*.sh" \) \ + -not -path "./.git/*" \ + -not -path "./.build/*" \ + -not -path "*/.build/*" \ + -print0) -if [ -z "$files" ]; then +if [ "${#files[@]}" -eq 0 ]; then echo "No files to lint" exit 0 fi -if grep -n $'\t' $files; then +if grep -n $'\t' "${files[@]}"; then echo "Tab characters found. Use spaces for indentation." exit 1 fi -if grep -nE "[[:space:]]+$" $files; then +if grep -nE "[[:space:]]+$" "${files[@]}"; then echo "Trailing whitespace found." exit 1 fi diff --git a/scripts/test.sh b/scripts/test.sh new file mode 100755 index 0000000..532a8a0 --- /dev/null +++ b/scripts/test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +run_package_tests() { + local package_dir="$1" + local scheme="$2" + + if [ ! -d "$package_dir" ]; then + return 0 + fi + + ( + cd "$package_dir" + xcodebuild test \ + -scheme "$scheme" \ + -destination 'platform=macOS,arch=arm64' \ + -quiet + ) +} + +run_package_tests "Packages/CoreAgent" "CoreAgent" +run_package_tests "Packages/AudioPipeline" "AudioPipeline" +run_package_tests "Packages/ModelRuntime" "ModelRuntime" +run_package_tests "Packages/UIComponents" "UIComponents" +run_package_tests "Packages/VisionPipeline" "VisionPipeline" +run_package_tests "KAMIBotApp" "KAMIBotApp"