Skip to content

Commit 2cbd2bc

Browse files
Align SwiftBuddy settings with local server config
1 parent 08ceed8 commit 2cbd2bc

11 files changed

Lines changed: 1169 additions & 339 deletions

File tree

Sources/MLXInferenceCore/GenerationConfig.swift

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,24 @@ public struct GenerationConfig: Sendable {
1212
public var seed: UInt64?
1313
public var enableThinking: Bool
1414

15+
// ── SwiftLM Engine Parameters ──────────────────────────────────────
16+
/// Enable TurboQuant KV-cache compression (3-bit PolarQuant+QJL).
17+
/// Compresses KV history > 8192 tokens to ~3.5 bits/token.
18+
public var turboKV: Bool
19+
20+
/// Enable SSD expert streaming for MoE models.
21+
public var streamExperts: Bool
22+
23+
/// Chunk size for prefill evaluation.
24+
/// Lower values prevent GPU timeout on large models.
25+
public var prefillSize: Int
26+
27+
/// KV-cache quantization bits (nil = no quantization, 4 or 8 typical).
28+
public var kvBits: Int?
29+
30+
/// KV-cache quantization group size (default 64).
31+
public var kvGroupSize: Int
32+
1533
public init(
1634
maxTokens: Int = 2048,
1735
temperature: Float = 0.6,
@@ -20,7 +38,12 @@ public struct GenerationConfig: Sendable {
2038
minP: Float = 0.0,
2139
repetitionPenalty: Float = 1.05,
2240
seed: UInt64? = nil,
23-
enableThinking: Bool = false
41+
enableThinking: Bool = false,
42+
turboKV: Bool = false,
43+
streamExperts: Bool = false,
44+
prefillSize: Int = 512,
45+
kvBits: Int? = nil,
46+
kvGroupSize: Int = 64
2447
) {
2548
self.maxTokens = maxTokens
2649
self.temperature = temperature
@@ -30,6 +53,11 @@ public struct GenerationConfig: Sendable {
3053
self.repetitionPenalty = repetitionPenalty
3154
self.seed = seed
3255
self.enableThinking = enableThinking
56+
self.turboKV = turboKV
57+
self.streamExperts = streamExperts
58+
self.prefillSize = prefillSize
59+
self.kvBits = kvBits
60+
self.kvGroupSize = kvGroupSize
3361
}
3462

3563
public static let `default` = GenerationConfig()

SwiftBuddy/SwiftBuddy/SwiftBuddyApp.swift

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ struct SwiftBuddyApp: App {
5454
.commands {
5555
CommandGroup(replacing: .newItem) {}
5656
CommandMenu("Model") {
57-
Button("Choose Model") {
58-
NotificationCenter.default.post(name: .showModelPicker, object: nil)
57+
Button("Load Model...") {
58+
NotificationCenter.default.post(name: .showModelManagement, object: nil)
5959
}.keyboardShortcut("m", modifiers: [.command, .shift])
6060
Button("Unload Model") {
6161
engine.unload()
@@ -72,7 +72,6 @@ struct SwiftBuddyApp: App {
7272
}
7373

7474
extension Notification.Name {
75-
static let showModelPicker = Notification.Name("showModelPicker")
7675
static let showTextIngestion = Notification.Name("showTextIngestion")
7776
static let showPersonaDiscovery = Notification.Name("showPersonaDiscovery")
7877
static let showModelManagement = Notification.Name("showModelManagement")
@@ -91,6 +90,7 @@ struct MainContentView: View {
9190
var body: some View {
9291
RootView()
9392
.environmentObject(engine)
93+
.environmentObject(engine.downloadManager)
9494
.environmentObject(appearance)
9595
.environmentObject(server)
9696
.preferredColorScheme(appearance.colorScheme)
@@ -99,7 +99,9 @@ struct MainContentView: View {
9999
.onAppear {
100100
MemoryPalaceService.shared.modelContext = modelContext
101101
GraphPalaceService.shared.modelContext = modelContext
102-
server.start(engine: engine)
102+
if server.startupConfiguration.autoStart {
103+
server.start(engine: engine)
104+
}
103105

104106
// Pre-load the JSON personas so the UI Wings instantly populate!
105107
PersonaLoader.loadDevDefaults()
Lines changed: 165 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,207 @@
11
import Foundation
2+
import HTTPTypes
23
import Hummingbird
34
import NIOCore
45
#if canImport(MLXInferenceCore)
56
import MLXInferenceCore
67
#endif
78

9+
struct ServerStartupConfiguration: Codable, Equatable, Sendable {
10+
var autoStart: Bool = true
11+
var host: String = "127.0.0.1"
12+
var port: Int = 5413
13+
var parallelSlots: Int = 1
14+
var corsOrigin: String = ""
15+
var apiKey: String = ""
16+
17+
private static let storageKey = "swiftlm.server.startupConfiguration"
18+
19+
var normalized: ServerStartupConfiguration {
20+
var copy = self
21+
copy.host = copy.host.trimmingCharacters(in: .whitespacesAndNewlines)
22+
if copy.host.isEmpty { copy.host = "127.0.0.1" }
23+
copy.port = min(max(copy.port, 1), 65_535)
24+
copy.parallelSlots = max(copy.parallelSlots, 1)
25+
copy.corsOrigin = copy.corsOrigin.trimmingCharacters(in: .whitespacesAndNewlines)
26+
copy.apiKey = copy.apiKey.trimmingCharacters(in: .whitespacesAndNewlines)
27+
return copy
28+
}
29+
30+
static func load() -> ServerStartupConfiguration {
31+
guard let data = UserDefaults.standard.data(forKey: storageKey),
32+
let decoded = try? JSONDecoder().decode(ServerStartupConfiguration.self, from: data) else {
33+
return ServerStartupConfiguration()
34+
}
35+
return decoded.normalized
36+
}
37+
38+
func save() {
39+
guard let data = try? JSONEncoder().encode(normalized) else { return }
40+
UserDefaults.standard.set(data, forKey: Self.storageKey)
41+
}
42+
}
43+
44+
private var swiftBuddyJSONHeaders: HTTPFields {
45+
HTTPFields([HTTPField(name: .contentType, value: "application/json")])
46+
}
47+
48+
private func swiftBuddyJSONString(_ value: String) -> String {
49+
guard let data = try? JSONEncoder().encode(value),
50+
let string = String(data: data, encoding: .utf8) else {
51+
return #"""#
52+
}
53+
return string
54+
}
55+
56+
private struct SwiftBuddyCORSMiddleware<Context: RequestContext>: RouterMiddleware {
57+
let allowedOrigin: String
58+
59+
func handle(_ request: Request, context: Context, next: (Request, Context) async throws -> Response) async throws -> Response {
60+
if request.method == .options {
61+
return Response(status: .noContent, headers: corsHeaders(for: request))
62+
}
63+
64+
var response = try await next(request, context)
65+
for field in corsHeaders(for: request) {
66+
response.headers.append(field)
67+
}
68+
return response
69+
}
70+
71+
private func corsHeaders(for request: Request) -> HTTPFields {
72+
var fields: [HTTPField] = []
73+
if allowedOrigin == "*" {
74+
fields.append(HTTPField(name: HTTPField.Name("Access-Control-Allow-Origin")!, value: "*"))
75+
} else {
76+
let requestOrigin = request.headers[values: HTTPField.Name("Origin")!].first ?? ""
77+
if requestOrigin == allowedOrigin {
78+
fields.append(HTTPField(name: HTTPField.Name("Access-Control-Allow-Origin")!, value: allowedOrigin))
79+
fields.append(HTTPField(name: HTTPField.Name("Vary")!, value: "Origin"))
80+
}
81+
}
82+
fields.append(HTTPField(name: HTTPField.Name("Access-Control-Allow-Methods")!, value: "GET, POST, OPTIONS"))
83+
fields.append(HTTPField(name: HTTPField.Name("Access-Control-Allow-Headers")!, value: "Content-Type, Authorization, X-SwiftLM-Prefill-Progress"))
84+
return HTTPFields(fields)
85+
}
86+
}
87+
88+
private struct SwiftBuddyAPIKeyMiddleware<Context: RequestContext>: RouterMiddleware {
89+
let apiKey: String
90+
91+
func handle(_ request: Request, context: Context, next: (Request, Context) async throws -> Response) async throws -> Response {
92+
let path = request.uri.path
93+
if path == "/health" || path == "/metrics" {
94+
return try await next(request, context)
95+
}
96+
97+
let authHeader = request.headers[values: .authorization].first ?? ""
98+
if authHeader == "Bearer \(apiKey)" || authHeader == apiKey {
99+
return try await next(request, context)
100+
}
101+
102+
return Response(
103+
status: .unauthorized,
104+
headers: swiftBuddyJSONHeaders,
105+
body: .init(byteBuffer: ByteBuffer(string: #"{"error":{"message":"Invalid API key","type":"invalid_request_error","code":"invalid_api_key"}}"#))
106+
)
107+
}
108+
}
109+
8110
@MainActor
9111
final class ServerManager: ObservableObject {
10112
@Published var isOnline = false
11-
@Published var port: Int = 8080
113+
@Published var host: String = "127.0.0.1"
114+
@Published var port: Int = 5413
115+
@Published private(set) var startupConfiguration: ServerStartupConfiguration
116+
@Published private(set) var runningConfiguration: ServerStartupConfiguration?
117+
@Published private(set) var restartRequired = false
12118

13119
// In a real implementation this would hold the Hummingbird App and tie into `engine`
14120
private var task: Task<Void, Never>?
15-
121+
122+
init() {
123+
let configuration = ServerStartupConfiguration.load()
124+
self.startupConfiguration = configuration
125+
self.host = configuration.host
126+
self.port = configuration.port
127+
}
128+
16129
func start(engine: InferenceEngine) {
17130
guard !isOnline else { return }
18-
131+
let configuration = startupConfiguration.normalized
132+
19133
task = Task {
20134
do {
21135
let router = Router()
22-
136+
137+
if !configuration.corsOrigin.isEmpty {
138+
router.add(middleware: SwiftBuddyCORSMiddleware(allowedOrigin: configuration.corsOrigin))
139+
}
140+
141+
if !configuration.apiKey.isEmpty {
142+
router.add(middleware: SwiftBuddyAPIKeyMiddleware(apiKey: configuration.apiKey))
143+
}
144+
23145
router.get("/health") { _, _ -> Response in
24-
let buffer = ByteBuffer(string: #"{"status": "ok", "message": "SwiftBuddy Local Server"}"#)
25-
return Response(status: .ok, body: .init(byteBuffer: buffer))
146+
let body = """
147+
{"status":"ok","message":"SwiftBuddy Local Server","host":\(swiftBuddyJSONString(configuration.host)),"port":\(configuration.port),"parallel":\(configuration.parallelSlots),"cors":\(swiftBuddyJSONString(configuration.corsOrigin.isEmpty ? "disabled" : configuration.corsOrigin)),"auth":"\(configuration.apiKey.isEmpty ? "disabled" : "enabled")"}
148+
"""
149+
let buffer = ByteBuffer(string: body)
150+
return Response(status: .ok, headers: swiftBuddyJSONHeaders, body: .init(byteBuffer: buffer))
26151
}
27-
152+
28153
// Simple V1 models mock
29154
router.get("/v1/models") { _, _ -> Response in
30155
let buffer = ByteBuffer(string: #"{"object": "list", "data": [{"id": "local", "object": "model"}]}"#)
31-
return Response(status: .ok, body: .init(byteBuffer: buffer))
156+
return Response(status: .ok, headers: swiftBuddyJSONHeaders, body: .init(byteBuffer: buffer))
32157
}
33158

34159
let app = Application(
35160
router: router,
36-
configuration: .init(address: .hostname("127.0.0.1", port: 8080))
161+
configuration: .init(address: .hostname(configuration.host, port: configuration.port))
37162
)
38-
163+
39164
self.isOnline = true
40-
self.port = 8080
41-
165+
self.host = configuration.host
166+
self.port = configuration.port
167+
self.runningConfiguration = configuration
168+
self.restartRequired = false
169+
ConsoleLog.shared.info("Server online at http://\(configuration.host):\(configuration.port)")
170+
42171
try await app.runService()
43172
} catch {
44173
print("Server failed: \(error)")
174+
ConsoleLog.shared.error("Server failed: \(error.localizedDescription)")
45175
self.isOnline = false
46176
}
47177
}
48178
}
49-
179+
180+
@discardableResult
181+
func saveStartupConfiguration(_ configuration: ServerStartupConfiguration) -> Bool {
182+
let normalized = configuration.normalized
183+
let changed = normalized != startupConfiguration
184+
startupConfiguration = normalized
185+
host = normalized.host
186+
port = normalized.port
187+
normalized.save()
188+
restartRequired = isOnline && runningConfiguration != nil && runningConfiguration != normalized
189+
if changed {
190+
ConsoleLog.shared.info("Server startup configuration saved")
191+
}
192+
return changed
193+
}
194+
195+
func restart(engine: InferenceEngine) {
196+
stop()
197+
start(engine: engine)
198+
}
199+
50200
func stop() {
51201
task?.cancel()
52202
task = nil
53203
isOnline = false
204+
runningConfiguration = nil
205+
restartRequired = false
54206
}
55207
}

0 commit comments

Comments
 (0)