Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,7 @@ jobs:
run: xcodebuild -downloadComponent MetalToolchain
- name: Build
run: xcodebuild build-for-testing -scheme MLXAudio-Package -destination 'platform=macOS' MACOSX_DEPLOYMENT_TARGET=14.0 CODE_SIGNING_ALLOWED=NO
- name: Build consumer graph fixture
run: swift build --package-path Integration/ConsumerGraphFixture
- name: Run tests
run: xcodebuild test-without-building -scheme MLXAudio-Package -destination 'platform=macOS' -skip-testing:'MLXAudioTests/SmokeTests' -parallel-testing-enabled NO CODE_SIGNING_ALLOWED=NO
26 changes: 26 additions & 0 deletions Integration/ConsumerGraphFixture/Package.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// swift-tools-version: 6.2
import PackageDescription

let package = Package(
name: "ConsumerGraphFixture",
platforms: [.macOS(.v14)],
products: [
.executable(name: "ConsumerGraphFixture", targets: ["ConsumerGraphFixture"]),
],
dependencies: [
.package(path: "../.."),
.package(url: "https://github.com/huggingface/swift-transformers.git", from: "1.3.0"),
.package(url: "https://github.com/ml-explore/mlx-swift-lm.git", branch: "main"),
],
targets: [
.executableTarget(
name: "ConsumerGraphFixture",
dependencies: [
.product(name: "MLXAudioTTS", package: "mlx-audio-swift"),
.product(name: "MLXLMCommon", package: "mlx-swift-lm"),
.product(name: "Hub", package: "swift-transformers"),
.product(name: "Tokenizers", package: "swift-transformers"),
]
),
]
)
19 changes: 19 additions & 0 deletions Integration/ConsumerGraphFixture/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Consumer Graph Fixture

This fixture reproduces package-graph behavior that does not show up when building `mlx-audio-swift` by itself.

It depends on:

- the local package via `.package(path: "../..")`
- `swift-transformers` from `1.3.0`
- `mlx-swift-lm` from the `main` branch

It intentionally depends on `MLXAudioTTS`, not just `MLXAudioCodecs`, because the current consumer-graph failure shows up while compiling the higher-level TTS target against that newer shared stack.

Build it locally with:

```sh
swift build --package-path Integration/ConsumerGraphFixture
```

That same command is intended to run in CI as a regression check.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import Hub
import MLXAudioTTS
import MLXLMCommon
import Tokenizers

print("ConsumerGraphFixture built successfully")
1 change: 1 addition & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ let package = Package(
.product(name: "MLXNN", package: "mlx-swift"),
.product(name: "MLXLMCommon", package: "mlx-swift-lm"),
.product(name: "HuggingFace", package: "swift-huggingface"),
.product(name: "Tokenizers", package: "swift-transformers"),
],
path: "Sources/MLXAudioCodecs"
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public final class ChatterboxModel: Module, SpeechGenerationModel, @unchecked Se
// MARK: - State

/// Text tokenizer loaded from tokenizer.json.
public var tokenizer: Tokenizer?
public var tokenizer: TTSModelTokenizer?

/// S3TokenizerV2: converts audio → speech token IDs (loaded separately).
public var s3Tokenizer: S3TokenizerV2?
Expand Down
2 changes: 1 addition & 1 deletion Sources/MLXAudioTTS/Models/Llama/LlamaTTS.swift
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ private class LlamaTTSModelInner: Module {
public class LlamaTTSModel: Module, KVCacheDimensionProvider, SpeechGenerationModel, @unchecked Sendable {
public let vocabularySize: Int
public let kvHeads: [Int]
public var tokenizer: Tokenizer?
public var tokenizer: TTSModelTokenizer?
public var _snacModel: SNAC?

private let model: LlamaTTSModelInner
Expand Down
6 changes: 3 additions & 3 deletions Sources/MLXAudioTTS/Models/Marvis/MarvisTTSModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ public final class MarvisTTSModel: Module {

private let model: CSMModel
private let _promptURLs: [URL]?
private let _textTokenizer: Tokenizer
private let _textTokenizer: TTSModelTokenizer
private let _audio_tokenizer: MimiTokenizer
private let _streamingDecoder: MimiStreamingDecoder

public init(
config: CSMModelArgs,
repoId: String,
promptURLs: [URL]? = nil,
textTokenizer: Tokenizer,
textTokenizer: TTSModelTokenizer,
audioTokenizer: MimiTokenizer
) {
_ = repoId
Expand All @@ -55,7 +55,7 @@ public final class MarvisTTSModel: Module {
promptURLs: [URL]? = nil,
progressHandler: @Sendable @escaping (Progress) -> Void
) async throws {
let textTokenizer = try await loadTokenizer(configuration: ModelConfiguration(id: repoId), hub: hub)
let textTokenizer = try await AutoTokenizer.from(pretrained: repoId, hubApi: hub)
let codec = try await Mimi.fromPretrained(progressHandler: progressHandler)
let audioTokenizer = MimiTokenizer(codec)
self.init(
Expand Down
2 changes: 1 addition & 1 deletion Sources/MLXAudioTTS/Models/Qwen3/Qwen3.swift
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ public class Qwen3Model: Module, KVCacheDimensionProvider, SpeechGenerationModel

public let vocabularySize: Int
public let kvHeads: [Int]
public var tokenizer: Tokenizer?
public var tokenizer: TTSModelTokenizer?
public var _snacModel: SNAC?

private let model: Qwen3ModelInner
Expand Down
2 changes: 1 addition & 1 deletion Sources/MLXAudioTTS/Models/Qwen3TTS/Qwen3TTS.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public final class Qwen3TTSModel: Module, SpeechGenerationModel, @unchecked Send
let talker: Qwen3TTSTalkerForConditionalGeneration
var speakerEncoder: Qwen3TTSSpeakerEncoder?
var speechTokenizer: Qwen3TTSSpeechTokenizer?
var tokenizer: Tokenizer?
var tokenizer: TTSModelTokenizer?

public var sampleRate: Int { config.sampleRate }

Expand Down
2 changes: 1 addition & 1 deletion Sources/MLXAudioTTS/Models/Soprano/Soprano.swift
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ private class SopranoModelInner: Module {
public class SopranoModel: Module, KVCacheDimensionProvider, SpeechGenerationModel, @unchecked Sendable {
public let vocabularySize: Int
public let kvHeads: [Int]
public var tokenizer: Tokenizer?
public var tokenizer: TTSModelTokenizer?

private let model: SopranoModelInner
let configuration: SopranoConfiguration
Expand Down
5 changes: 5 additions & 0 deletions Sources/MLXAudioTTS/TokenizerTypes.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import Tokenizers

// Both MLXLMCommon and swift-transformers expose a `Tokenizer` protocol.
// TTS model loading uses the swift-transformers tokenizer API explicitly.
public typealias TTSModelTokenizer = any Tokenizers.Tokenizer