Skip to content

Commit f4d2ff7

Browse files
author
bealqiu
committed
refactor: 优化 Edge TTS 音频预取机制
1 parent 0122cb1 commit f4d2ff7

2 files changed

Lines changed: 82 additions & 40 deletions

File tree

packages/app/src/lib/tts/edge-tts.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,15 @@ function genMessage(headers: Record<string, string>, content: string): string {
205205

206206
// ── Edge TTS WebSocket Client (using Tauri WebSocket plugin) ──
207207

208+
// Cache the dynamic import so it's only loaded once
209+
let _TauriWebSocket: Awaited<typeof import("@tauri-apps/plugin-websocket")>["default"] | null = null;
210+
async function getTauriWebSocket() {
211+
if (!_TauriWebSocket) {
212+
_TauriWebSocket = (await import("@tauri-apps/plugin-websocket")).default;
213+
}
214+
return _TauriWebSocket;
215+
}
216+
208217
export interface EdgeTTSPayload {
209218
text: string;
210219
voice: string;
@@ -220,7 +229,7 @@ export interface EdgeTTSPayload {
220229
* Returns the accumulated MP3 audio as an ArrayBuffer.
221230
*/
222231
export async function fetchEdgeTTSAudio(payload: EdgeTTSPayload): Promise<ArrayBuffer> {
223-
const TauriWebSocket = (await import("@tauri-apps/plugin-websocket")).default;
232+
const TauriWebSocket = await getTauriWebSocket();
224233

225234
const connectId = randomHex(16);
226235
const secMsGec = await generateSecMsGec();

packages/app/src/lib/tts/tts-service.ts

Lines changed: 72 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -502,8 +502,13 @@ export class EdgeTTSPlayer {
502502
/** Timer to detect when all audio has finished playing */
503503
private checkEndTimer: ReturnType<typeof setInterval> | null = null;
504504
private allChunksDone = false;
505-
/** Prefetched audio data keyed by chunk index */
506-
private prefetchCache = new Map<number, Promise<ArrayBuffer>>();
505+
/** Buffer of pre-fetched audio: producer fills up to BUFFER_SIZE, consumer drains */
506+
private fetchBuffer = new Map<number, Promise<ArrayBuffer>>();
507+
/** Next chunk index the producer should fetch */
508+
private producerIndex = 0;
509+
/** Resolve function to wake up producer when buffer has room */
510+
private producerWake: (() => void) | null = null;
511+
private static readonly BUFFER_SIZE = 3;
507512

508513
onStateChange?: (state: "playing" | "paused" | "stopped") => void;
509514
onChunkChange?: (index: number, total: number) => void;
@@ -513,20 +518,22 @@ export class EdgeTTSPlayer {
513518
get paused() { return this._paused; }
514519

515520
async speak(text: string, config: TTSConfig) {
516-
// Clean up previous playback without firing onStateChange (same pattern as DashScope)
521+
// Clean up previous playback without firing onStateChange
517522
this.aborted = true;
518523
this.cleanupAudio();
519-
this.prefetchCache.clear();
524+
this.fetchBuffer.clear();
525+
this.producerWake?.();
520526
if (this.checkEndTimer) { clearInterval(this.checkEndTimer); this.checkEndTimer = null; }
521527

522-
this.chunks = splitIntoChunks(text);
528+
// Moderate chunk size: small enough for fast first-chunk response,
529+
// large enough to reduce total WebSocket connections
530+
this.chunks = splitIntoChunks(text, 800);
523531
this._playing = true;
524532
this._paused = false;
525533
this.aborted = false;
526534
this.allChunksDone = false;
527535
this.hasAudioData = false;
528536
this.playingNotified = false;
529-
this.prefetchCache.clear();
530537

531538
// Create AudioContext for gapless playback
532539
this.audioCtx = new AudioContext();
@@ -556,57 +563,81 @@ export class EdgeTTSPlayer {
556563

557564
const voice = config.edgeVoice || "zh-CN-XiaoxiaoNeural";
558565
const lang = voice.split("-").slice(0, 2).join("-");
559-
const fetchPayloadBase = { voice, lang, rate: config.rate, pitch: config.pitch };
566+
const base = { voice, lang, rate: config.rate, pitch: config.pitch };
560567

561-
// Kick off prefetch for chunk 0 (and chunk 1 if exists)
562-
this.startPrefetch(0, fetchPayloadBase);
563-
if (this.chunks.length > 1) {
564-
this.startPrefetch(1, fetchPayloadBase);
565-
}
568+
// Producer-consumer: producer sequentially fetches chunks one by one,
569+
// keeping up to BUFFER_SIZE completed results buffered ahead.
570+
this.producerIndex = 0;
571+
this.fetchBuffer.clear();
566572

573+
// Start producer (runs in background)
574+
this.runProducer(base);
575+
576+
// Consumer: consume chunks in order
567577
for (let i = 0; i < this.chunks.length; i++) {
568578
if (!this._playing || this.aborted) return;
569579
this.onChunkChange?.(i, this.chunks.length);
570580

571-
// Prefetch the chunk after next (i+2) while we play chunk i
572-
if (i + 2 < this.chunks.length) {
573-
this.startPrefetch(i + 2, fetchPayloadBase);
574-
}
575-
576581
try {
577-
const audioData = await this.getPrefetchedAudio(i, fetchPayloadBase);
582+
// Wait for the producer to have this chunk ready, then await the fetch result
583+
const audioData = await this.waitForChunk(i);
578584
if (!this._playing || this.aborted) return;
579-
580585
await this.decodeAndSchedule(audioData);
581586
} catch (err) {
582587
console.error("[Edge TTS] chunk error:", err);
583588
}
584589

585-
// Clean up used cache entry
586-
this.prefetchCache.delete(i);
590+
// Release completed entry and wake producer to fill the slot
591+
this.fetchBuffer.delete(i);
592+
this.producerWake?.();
587593
}
588594

589595
this.allChunksDone = true;
590596
}
591597

592-
/** Start prefetching audio for chunk at given index (no-op if already started) */
593-
private startPrefetch(index: number, base: { voice: string; lang: string; rate: number; pitch: number }) {
594-
if (this.prefetchCache.has(index) || index >= this.chunks.length) return;
595-
const promise = fetchEdgeTTSAudio({
596-
text: this.chunks[index],
597-
...base,
598-
}).catch((err) => {
599-
this.prefetchCache.delete(index);
600-
throw err;
601-
});
602-
this.prefetchCache.set(index, promise);
598+
/**
599+
* Producer: sequentially fetches chunks one at a time.
600+
* After each fetch is *initiated* (not awaited), checks if buffer is full.
601+
* If full, pauses until consumer drains a slot.
602+
* This avoids high concurrency while keeping the buffer filled.
603+
*/
604+
private async runProducer(base: { voice: string; lang: string; rate: number; pitch: number }) {
605+
while (this.producerIndex < this.chunks.length) {
606+
if (!this._playing || this.aborted) return;
607+
608+
// Wait until buffer has room
609+
while (this.fetchBuffer.size >= EdgeTTSPlayer.BUFFER_SIZE) {
610+
if (!this._playing || this.aborted) return;
611+
await new Promise<void>((resolve) => { this.producerWake = resolve; });
612+
this.producerWake = null;
613+
}
614+
615+
if (!this._playing || this.aborted) return;
616+
617+
const idx = this.producerIndex++;
618+
// Initiate fetch (non-blocking), store the promise
619+
const promise = fetchEdgeTTSAudio({ text: this.chunks[idx], ...base });
620+
this.fetchBuffer.set(idx, promise);
621+
622+
// Wait for this fetch to complete before starting the next one
623+
// This ensures serial WebSocket connections — no concurrent connections
624+
try {
625+
await promise;
626+
} catch {
627+
// Error will be handled by the consumer
628+
}
629+
}
603630
}
604631

605-
/** Get prefetched audio, or fetch on-demand as fallback */
606-
private async getPrefetchedAudio(index: number, base: { voice: string; lang: string; rate: number; pitch: number }): Promise<ArrayBuffer> {
607-
const cached = this.prefetchCache.get(index);
608-
if (cached) return cached;
609-
return fetchEdgeTTSAudio({ text: this.chunks[index], ...base });
632+
/** Wait until the producer has queued this chunk, then return the fetched audio data */
633+
private async waitForChunk(index: number): Promise<ArrayBuffer> {
634+
while (!this.fetchBuffer.has(index)) {
635+
if (!this._playing || this.aborted) {
636+
throw new Error("aborted");
637+
}
638+
await new Promise<void>((r) => setTimeout(r, 50));
639+
}
640+
return this.fetchBuffer.get(index)!;
610641
}
611642

612643
/**
@@ -643,7 +674,8 @@ export class EdgeTTSPlayer {
643674
}
644675
const onEnd = this.onEnd;
645676
this.cleanupAudio();
646-
this.prefetchCache.clear();
677+
this.fetchBuffer.clear();
678+
this.producerWake?.();
647679
this.chunks = [];
648680
this._playing = false;
649681
this._paused = false;
@@ -672,8 +704,9 @@ export class EdgeTTSPlayer {
672704
this.checkEndTimer = null;
673705
}
674706
this.cleanupAudio();
707+
this.fetchBuffer.clear();
708+
this.producerWake?.();
675709
this.chunks = [];
676-
this.prefetchCache.clear();
677710
this._playing = false;
678711
this._paused = false;
679712
this.onStateChange?.("stopped");

0 commit comments

Comments
 (0)