diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index 97414aef8..b5388c3e6 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -728,7 +728,10 @@ function GenerationPreviewContent() { ttsVoice: settings.ttsVoice, ttsSpeed: settings.ttsSpeed, ttsApiKey: ttsProviderConfig?.apiKey || undefined, - ttsBaseUrl: ttsProviderConfig?.baseUrl || undefined, + ttsBaseUrl: + ttsProviderConfig?.baseUrl || + ttsProviderConfig?.customDefaultBaseUrl || + undefined, }), signal, }); diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 9379d9353..5e9727edf 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -113,7 +113,10 @@ function AgentVoicePill({ ttsVoice: voiceId, ttsSpeed: 1, ttsApiKey: providerConfig?.apiKey, - ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, + ttsBaseUrl: + providerConfig?.serverBaseUrl || + providerConfig?.baseUrl || + providerConfig?.customDefaultBaseUrl, }), signal: controller.signal, }); @@ -337,7 +340,10 @@ function TeacherVoicePill({ ttsVoice: voiceId, ttsSpeed: 1, ttsApiKey: providerConfig?.apiKey, - ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, + ttsBaseUrl: + providerConfig?.serverBaseUrl || + providerConfig?.baseUrl || + providerConfig?.customDefaultBaseUrl, }), signal: controller.signal, }); diff --git a/components/audio/tts-config-popover.tsx b/components/audio/tts-config-popover.tsx index 79ff0e899..7156d1d35 100644 --- a/components/audio/tts-config-popover.tsx +++ b/components/audio/tts-config-popover.tsx @@ -68,7 +68,7 @@ export function TtsConfigPopover() { voice: ttsVoice, speed: ttsSpeed, apiKey: providerConfig?.apiKey, - baseUrl: providerConfig?.baseUrl, + baseUrl: providerConfig?.baseUrl || providerConfig?.customDefaultBaseUrl, }); } catch (error) { const message = diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index a09a32432..a5564eb7c 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -32,10 +32,11 @@ import { useSettingsStore } from '@/lib/store/settings'; import { useTTSPreview } from '@/lib/audio/use-tts-preview'; import { IMAGE_PROVIDERS } from '@/lib/media/image-providers'; import { VIDEO_PROVIDERS } from '@/lib/media/video-providers'; -import { TTS_PROVIDERS, getTTSVoices } from '@/lib/audio/constants'; +import { TTS_PROVIDERS, getTTSVoices, CUSTOM_ASR_DEFAULT_LANGUAGES } from '@/lib/audio/constants'; import { ASR_PROVIDERS, getASRSupportedLanguages } from '@/lib/audio/constants'; import type { ImageProviderId, VideoProviderId } from '@/lib/media/types'; import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types'; +import { isCustomASRProvider } from '@/lib/audio/types'; import type { SettingsSection } from '@/lib/types/settings'; interface MediaPopoverProps { @@ -167,8 +168,6 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { needsKey: boolean, ) => !needsKey || !!configs[id]?.apiKey || !!configs[id]?.isServerConfigured; - const ttsSpeedRange = TTS_PROVIDERS[ttsProviderId]?.speedRange; - // ─── Dynamic browser voices ─── const [browserVoices, setBrowserVoices] = useState([]); useEffect(() => { @@ -275,7 +274,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { voice: ttsVoice, speed: ttsSpeed, apiKey: providerConfig?.apiKey, - baseUrl: providerConfig?.baseUrl, + baseUrl: providerConfig?.baseUrl || providerConfig?.customDefaultBaseUrl, }); } catch (error) { const message = @@ -293,23 +292,41 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { ttsVoice, ]); - // ASR: only available providers - const asrGroups = useMemo( - () => - Object.values(ASR_PROVIDERS) - .filter((p) => cfgOk(asrProvidersConfig, p.id, p.requiresApiKey)) - .map((p) => ({ - groupId: p.id, - groupName: p.name, - groupIcon: p.icon, - available: true, - items: getASRSupportedLanguages(p.id).map((l) => ({ - id: l, - name: l, - })), + // ASR: built-in + custom providers + const asrGroups = useMemo(() => { + const groups: SelectGroupData[] = []; + + // Built-in providers + for (const p of Object.values(ASR_PROVIDERS)) { + if (!cfgOk(asrProvidersConfig, p.id, p.requiresApiKey)) continue; + groups.push({ + groupId: p.id, + groupName: p.name, + groupIcon: p.icon, + available: true, + items: getASRSupportedLanguages(p.id).map((l) => ({ + id: l, + name: l, })), - [asrProvidersConfig], - ); + }); + } + + // Custom providers — only show if at least one model is configured + for (const [id, cfg] of Object.entries(asrProvidersConfig)) { + if (!isCustomASRProvider(id)) continue; + const customModels = cfg.customModels || []; + if (customModels.length === 0) continue; + const providerName = cfg.customName || id; + groups.push({ + groupId: id, + groupName: providerName, + available: true, + items: CUSTOM_ASR_DEFAULT_LANGUAGES.map((l) => ({ id: l, name: l })), + }); + } + + return groups; + }, [asrProvidersConfig]); // Auto-select first enabled tab on open const handleOpenChange = (isOpen: boolean) => { diff --git a/components/settings/add-audio-provider-dialog.tsx b/components/settings/add-audio-provider-dialog.tsx new file mode 100644 index 000000000..6ece1b731 --- /dev/null +++ b/components/settings/add-audio-provider-dialog.tsx @@ -0,0 +1,141 @@ +'use client'; + +import { useState } from 'react'; +import { Dialog, DialogContent, DialogTitle, DialogDescription } from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import { Checkbox } from '@/components/ui/checkbox'; +import { Plus } from 'lucide-react'; +import { useI18n } from '@/lib/hooks/use-i18n'; + +export interface NewAudioProviderData { + name: string; + baseUrl: string; + defaultModel: string; + requiresApiKey: boolean; +} + +interface AddAudioProviderDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + onAdd: (data: NewAudioProviderData) => void; + type: 'tts' | 'asr'; +} + +export function AddAudioProviderDialog({ + open, + onOpenChange, + onAdd, + type, +}: AddAudioProviderDialogProps) { + const { t } = useI18n(); + + const [name, setName] = useState(''); + const [baseUrl, setBaseUrl] = useState(''); + const [defaultModel, setDefaultModel] = useState(''); + const [requiresApiKey, setRequiresApiKey] = useState(false); + + // Reset form when dialog closes + const [prevOpen, setPrevOpen] = useState(open); + if (open !== prevOpen) { + setPrevOpen(open); + if (!open) { + setName(''); + setBaseUrl(''); + setDefaultModel(''); + setRequiresApiKey(false); + } + } + + const handleAdd = () => { + if (!name.trim() || !baseUrl.trim()) return; + onAdd({ + name: name.trim(), + baseUrl: baseUrl.trim(), + defaultModel: defaultModel.trim(), + requiresApiKey, + }); + onOpenChange(false); + }; + + const titleKey = + type === 'tts' ? 'settings.addCustomTTSProvider' : 'settings.addCustomASRProvider'; + + return ( + + + {t(titleKey)} + + {t('settings.addCustomAudioProviderDescription')} + +
+
+

{t(titleKey)}

+

+ {t('settings.addCustomAudioProviderDescription')} +

+
+ +
+ + setName(e.target.value)} + /> +
+ +
+ + setBaseUrl(e.target.value)} + /> +
+ + {/* Default Model — TTS only (ASR models are managed in provider settings) */} + {type === 'tts' && ( +
+ + setDefaultModel(e.target.value)} + /> +

{t('settings.defaultModelHint')}

+
+ )} + +
+ setRequiresApiKey(checked as boolean)} + /> + +
+ +
+ + +
+
+
+
+ ); +} diff --git a/components/settings/asr-settings.tsx b/components/settings/asr-settings.tsx index 4a0cfb021..67f67bfbe 100644 --- a/components/settings/asr-settings.tsx +++ b/components/settings/asr-settings.tsx @@ -4,6 +4,16 @@ import { useState, useRef } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog'; import { Select, SelectContent, @@ -15,8 +25,10 @@ import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { ASR_PROVIDERS } from '@/lib/audio/constants'; import type { ASRProviderId } from '@/lib/audio/types'; -import { Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; +import { isCustomASRProvider } from '@/lib/audio/types'; +import { Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff, Plus, Loader2 } from 'lucide-react'; import { cn } from '@/lib/utils'; +import { toast } from 'sonner'; import { createLogger } from '@/lib/logger'; const log = createLogger('ASRSettings'); @@ -31,12 +43,20 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { const asrLanguage = useSettingsStore((state) => state.asrLanguage); const asrProvidersConfig = useSettingsStore((state) => state.asrProvidersConfig); const setASRProviderConfig = useSettingsStore((state) => state.setASRProviderConfig); + const removeCustomASRProvider = useSettingsStore((state) => state.removeCustomASRProvider); - const asrProvider = ASR_PROVIDERS[selectedProviderId] ?? ASR_PROVIDERS['openai-whisper']; - const isServerConfigured = !!asrProvidersConfig[selectedProviderId]?.isServerConfigured; + const asrProvider = ASR_PROVIDERS[selectedProviderId as keyof typeof ASR_PROVIDERS]; + const isCustom = isCustomASRProvider(selectedProviderId); + const providerConfig = asrProvidersConfig[selectedProviderId]; + const isServerConfigured = !!providerConfig?.isServerConfigured; + const requiresApiKey = isCustom + ? !!providerConfig?.requiresApiKey + : !!asrProvider?.requiresApiKey; const [showApiKey, setShowApiKey] = useState(false); + const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); const [isRecording, setIsRecording] = useState(false); + const [isProcessing, setIsProcessing] = useState(false); const [asrResult, setASRResult] = useState(''); const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); const [testMessage, setTestMessage] = useState(''); @@ -107,18 +127,22 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { }; mediaRecorder.onstop = async () => { stream.getTracks().forEach((track) => track.stop()); + setIsProcessing(true); const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); const formData = new FormData(); formData.append('audio', audioBlob, 'recording.webm'); formData.append('providerId', selectedProviderId); formData.append( 'modelId', - asrProvidersConfig[selectedProviderId]?.modelId || asrProvider.defaultModelId, + asrProvidersConfig[selectedProviderId]?.modelId || asrProvider?.defaultModelId || '', ); formData.append('language', asrLanguage); const apiKeyValue = asrProvidersConfig[selectedProviderId]?.apiKey; if (apiKeyValue?.trim()) formData.append('apiKey', apiKeyValue); - const baseUrlValue = asrProvidersConfig[selectedProviderId]?.baseUrl; + const baseUrlValue = + asrProvidersConfig[selectedProviderId]?.baseUrl || + providerConfig?.customDefaultBaseUrl || + ''; if (baseUrlValue?.trim()) formData.append('baseUrl', baseUrlValue); try { @@ -128,9 +152,14 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { }); if (response.ok) { const data = await response.json(); - setASRResult(data.text); - setTestStatus('success'); - setTestMessage(t('settings.asrTestSuccess')); + if (data.text?.trim()) { + setASRResult(data.text); + setTestStatus('success'); + setTestMessage(t('settings.asrTestSuccess')); + } else { + setTestStatus('error'); + setTestMessage(data.error || t('settings.asrNoTranscription')); + } } else { setTestStatus('error'); const errorData = await response @@ -141,7 +170,13 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { } catch (error) { log.error('ASR test failed:', error); setTestStatus('error'); - setTestMessage(t('settings.asrTestFailed')); + setTestMessage( + error instanceof Error && error.message + ? `${t('settings.asrTestFailed')}: ${error.message}` + : t('settings.asrTestFailed'), + ); + } finally { + setIsProcessing(false); } }; mediaRecorder.start(); @@ -164,8 +199,15 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { )} + {/* No models warning for custom providers */} + {isCustom && ((providerConfig?.customModels as Array<{ id: string }>) || []).length === 0 && ( +
+ {t('settings.noModelsWarning')} +
+ )} + {/* API Key & Base URL */} - {(asrProvider.requiresApiKey || isServerConfigured) && ( + {(requiresApiKey || isServerConfigured || isCustom) && ( <>
@@ -206,7 +248,11 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { autoCapitalize="none" autoCorrect="off" spellCheck={false} - placeholder={asrProvider.defaultBaseUrl || t('settings.enterCustomBaseUrl')} + placeholder={ + isCustom + ? providerConfig?.customDefaultBaseUrl || 'http://localhost:8000/v1' + : asrProvider?.defaultBaseUrl || t('settings.enterCustomBaseUrl') + } value={asrProvidersConfig[selectedProviderId]?.baseUrl || ''} onChange={(e) => setASRProviderConfig(selectedProviderId, { @@ -220,16 +266,22 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { {/* Request URL Preview */} {(() => { const effectiveBaseUrl = - asrProvidersConfig[selectedProviderId]?.baseUrl || asrProvider.defaultBaseUrl || ''; + asrProvidersConfig[selectedProviderId]?.baseUrl || + (isCustom ? providerConfig?.customDefaultBaseUrl : asrProvider?.defaultBaseUrl) || + ''; if (!effectiveBaseUrl) return null; let endpointPath = ''; - switch (selectedProviderId) { - case 'openai-whisper': - endpointPath = '/audio/transcriptions'; - break; - case 'qwen-asr': - endpointPath = '/services/aigc/multimodal-generation/generation'; - break; + if (isCustom) { + endpointPath = '/audio/transcriptions'; + } else { + switch (selectedProviderId) { + case 'openai-whisper': + endpointPath = '/audio/transcriptions'; + break; + case 'qwen-asr': + endpointPath = '/services/aigc/multimodal-generation/generation'; + break; + } } if (!endpointPath) return null; return ( @@ -254,13 +306,19 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
)} - {/* Model Selection */} - {asrProvider.models.length > 0 && ( + {/* Model Selection — built-in providers */} + {!isCustom && asrProvider?.models?.length > 0 && (
- +
)} + + {/* Model Management — custom providers */} + {isCustom && ( +
+ + {(() => { + const customModels = + (providerConfig?.customModels as Array<{ id: string; name: string }>) || []; + const activeModelId = + asrProvidersConfig[selectedProviderId]?.modelId || customModels[0]?.id || ''; + return ( + <> + {customModels.length > 0 ? ( +
+
+ + + ID + + + {t('settings.modelNamePlaceholder')} + + +
+ {customModels.map((model, index) => { + const isActive = model.id === activeModelId; + return ( +
+ setASRProviderConfig(selectedProviderId, { modelId: model.id }) + } + className={cn( + 'grid grid-cols-[20px_1fr_1fr_36px] gap-0 items-center px-3 py-2 group cursor-pointer transition-colors', + isActive ? 'bg-primary/5' : 'hover:bg-muted/20', + index > 0 && 'border-t border-border/30', + )} + > +
+
+ {isActive &&
} +
+
+ + {model.id} + + + {model.name} + + +
+ ); + })} +
+ ) : ( +

+ {t('settings.noModelsAdded')} +

+ )} + m.id)} + onAdd={(modelId, modelName) => { + const models = [...customModels, { id: modelId, name: modelName }]; + setASRProviderConfig(selectedProviderId, { + customModels: models, + modelId: models[0].id, + }); + }} + /> + + ); + })()} +
+ )} + + {/* Delete Custom Provider */} + {isCustom && ( +
+ +
+ )} + + {/* Delete Confirmation Dialog */} + !open && setShowDeleteConfirm(false)} + > + + + {t('settings.deleteProvider')} + {t('settings.deleteProviderConfirm')} + + + {t('settings.cancelEdit')} + { + removeCustomASRProvider(selectedProviderId); + setShowDeleteConfirm(false); + }} + > + {t('settings.deleteProvider')} + + + + +
+ ); +} + +function AddModelRow({ + onAdd, + existingIds, +}: { + onAdd: (id: string, name: string) => void; + existingIds: string[]; +}) { + const { t } = useI18n(); + const [modelId, setModelId] = useState(''); + const [modelName, setModelName] = useState(''); + + const handleAdd = () => { + if (!modelId.trim()) return; + if (existingIds.includes(modelId.trim())) { + toast.error('Duplicate ID'); + return; + } + onAdd(modelId.trim(), modelName.trim() || modelId.trim()); + setModelId(''); + setModelName(''); + }; + + return ( +
+ setModelId(e.target.value)} + onKeyDown={(e) => e.key === 'Enter' && handleAdd()} + className="text-sm font-mono" + placeholder={t('settings.modelIdPlaceholder')} + /> + setModelName(e.target.value)} + onKeyDown={(e) => e.key === 'Enter' && handleAdd()} + className="text-sm" + placeholder={t('settings.modelNamePlaceholder')} + /> +
); } diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx index d88590ac0..de5abbb7d 100644 --- a/components/settings/audio-settings.tsx +++ b/components/settings/audio-settings.tsx @@ -21,6 +21,7 @@ import { getASRSupportedLanguages, } from '@/lib/audio/constants'; import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types'; +import { isCustomASRProvider } from '@/lib/audio/types'; import { Volume2, Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; import { cn } from '@/lib/utils'; import azureVoicesData from '@/lib/audio/azure.json'; @@ -89,7 +90,8 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { const setTTSEnabled = useSettingsStore((state) => state.setTTSEnabled); const setASREnabled = useSettingsStore((state) => state.setASREnabled); - const ttsProvider = TTS_PROVIDERS[ttsProviderId] ?? TTS_PROVIDERS['openai-tts']; + const ttsProvider = + TTS_PROVIDERS[ttsProviderId as keyof typeof TTS_PROVIDERS] ?? TTS_PROVIDERS['openai-tts']; // Azure voices - load from static JSON const azureVoices = useMemo(() => azureVoicesData.voices, []); @@ -146,7 +148,8 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { const ttsTestRequestIdRef = useRef(0); const mediaRecorderRef = useRef(null); - const asrProvider = ASR_PROVIDERS[asrProviderId] ?? ASR_PROVIDERS['openai-whisper']; + const asrProvider = ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]; + const isCustomASR = isCustomASRProvider(asrProviderId); // Reset locale filter when provider changes (derived state pattern) const [prevTTSProviderId, setPrevTTSProviderId] = useState(ttsProviderId); @@ -326,7 +329,10 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { if (apiKeyValue && apiKeyValue.trim()) { formData.append('apiKey', apiKeyValue); } - const baseUrlValue = asrProvidersConfig[asrProviderId]?.baseUrl; + const baseUrlValue = + asrProvidersConfig[asrProviderId]?.baseUrl || + asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl || + ''; if (baseUrlValue && baseUrlValue.trim()) { formData.append('baseUrl', baseUrlValue); } @@ -575,11 +581,19 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
))} + {Object.entries(asrProvidersConfig) + .filter(([id]) => isCustomASRProvider(id)) + .map(([id, cfg]) => ( + +
{cfg.customName || id}
+
+ ))} - {(asrProvider.requiresApiKey || + {(asrProvider?.requiresApiKey || + isCustomASR || asrProvidersConfig[asrProviderId]?.isServerConfigured) && ( <>
@@ -614,7 +628,12 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
handleASRProviderConfigChange(asrProviderId, { @@ -627,20 +646,28 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
{(() => { const effectiveBaseUrl = - asrProvidersConfig[asrProviderId]?.baseUrl || asrProvider.defaultBaseUrl || ''; + asrProvidersConfig[asrProviderId]?.baseUrl || + (isCustomASR + ? asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl + : asrProvider?.defaultBaseUrl) || + ''; if (!effectiveBaseUrl) return null; // Get endpoint path based on provider let endpointPath = ''; - switch (asrProviderId) { - case 'openai-whisper': - endpointPath = '/audio/transcriptions'; - break; - case 'qwen-asr': - endpointPath = '/services/aigc/multimodal-generation/generation'; - break; - default: - endpointPath = ''; + if (isCustomASR) { + endpointPath = '/audio/transcriptions'; + } else { + switch (asrProviderId) { + case 'openai-whisper': + endpointPath = '/audio/transcriptions'; + break; + case 'qwen-asr': + endpointPath = '/services/aigc/multimodal-generation/generation'; + break; + default: + endpointPath = ''; + } } if (!endpointPath) return null; diff --git a/components/settings/index.tsx b/components/settings/index.tsx index 25eb4b526..bbe8e6893 100644 --- a/components/settings/index.tsx +++ b/components/settings/index.tsx @@ -26,6 +26,7 @@ import { Search, Volume2, Mic, + Plus, } from 'lucide-react'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; @@ -57,6 +58,8 @@ import type { WebSearchProviderId } from '@/lib/web-search/types'; import { GeneralSettings } from './general-settings'; import { ModelEditDialog } from './model-edit-dialog'; import { AddProviderDialog, type NewProviderData } from './add-provider-dialog'; +import { AddAudioProviderDialog, type NewAudioProviderData } from './add-audio-provider-dialog'; +import { isCustomTTSProvider, isCustomASRProvider } from '@/lib/audio/types'; import type { SettingsSection, EditingModel } from '@/lib/types/settings'; // ─── Provider List Column (reusable) ─── @@ -67,6 +70,7 @@ function ProviderListColumn({ onSelect, width, t, + onAdd, }: { providers: Array<{ id: T; name: string; icon?: string }>; configs: Record; @@ -74,6 +78,7 @@ function ProviderListColumn({ onSelect: (id: T) => void; width: number; t: (key: string) => string; + onAdd?: () => void; }) { return (
@@ -113,13 +118,25 @@ function ProviderListColumn({ ))}
+ {onAdd && ( +
+ +
+ )}
); } // ─── Helper: get TTS/ASR provider display name ─── function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string { - const names: Record = { + if (isCustomTTSProvider(providerId)) { + const cfg = useSettingsStore.getState().ttsProvidersConfig[providerId]; + return cfg?.customName || providerId; + } + const names: Record = { 'openai-tts': t('settings.providerOpenAITTS'), 'azure-tts': t('settings.providerAzureTTS'), 'glm-tts': t('settings.providerGLMTTS'), @@ -129,16 +146,20 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin 'minimax-tts': t('settings.providerMiniMaxTTS'), 'browser-native-tts': t('settings.providerBrowserNativeTTS'), }; - return names[providerId]; + return names[providerId] || providerId; } function getASRProviderName(providerId: ASRProviderId, t: (key: string) => string): string { - const names: Record = { + if (isCustomASRProvider(providerId)) { + const cfg = useSettingsStore.getState().asrProvidersConfig[providerId]; + return cfg?.customName || providerId; + } + const names: Record = { 'openai-whisper': t('settings.providerOpenAIWhisper'), 'browser-native': t('settings.providerBrowserNative'), 'qwen-asr': t('settings.providerQwenASR'), }; - return names[providerId]; + return names[providerId] || providerId; } // ─── Image/Video provider name helpers ─── @@ -236,6 +257,20 @@ export function SettingsDialog({ open, onOpenChange, initialSection }: SettingsD // Add provider dialog const [showAddProviderDialog, setShowAddProviderDialog] = useState(false); + const [showAddTTSProviderDialog, setShowAddTTSProviderDialog] = useState(false); + const [showAddASRProviderDialog, setShowAddASRProviderDialog] = useState(false); + const addCustomTTSProvider = useSettingsStore((state) => state.addCustomTTSProvider); + const addCustomASRProvider = useSettingsStore((state) => state.addCustomASRProvider); + + const handleAddTTSProvider = (data: NewAudioProviderData) => { + const id = `custom-tts-${Date.now()}` as TTSProviderId; + addCustomTTSProvider(id, data.name, data.baseUrl, data.requiresApiKey, data.defaultModel); + }; + + const handleAddASRProvider = (data: NewAudioProviderData) => { + const id = `custom-asr-${Date.now()}` as ASRProviderId; + addCustomASRProvider(id, data.name, data.baseUrl, data.requiresApiKey); + }; // Save status indicator const [saveStatus, setSaveStatus] = useState<'idle' | 'saved' | 'error'>('idle'); @@ -629,7 +664,7 @@ export function SettingsDialog({ open, onOpenChange, initialSection }: SettingsD ); } case 'tts': { - const ttsIcon = TTS_PROVIDERS[ttsProviderId]?.icon; + const ttsIcon = TTS_PROVIDERS[ttsProviderId as keyof typeof TTS_PROVIDERS]?.icon; return ( <> {ttsIcon ? ( @@ -649,7 +684,7 @@ export function SettingsDialog({ open, onOpenChange, initialSection }: SettingsD ); } case 'asr': { - const asrIcon = ASR_PROVIDERS[asrProviderId]?.icon; + const asrIcon = ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]?.icon; return ( <> {asrIcon ? ( @@ -900,16 +935,26 @@ export function SettingsDialog({ open, onOpenChange, initialSection }: SettingsD {activeSection === 'tts' && ( <> ({ - id: p.id, - name: getTTSProviderName(p.id, t), - icon: p.icon, - }))} + providers={[ + ...Object.values(TTS_PROVIDERS).map((p) => ({ + id: p.id, + name: getTTSProviderName(p.id, t), + icon: p.icon, + })), + ...Object.entries(ttsProvidersConfig) + .filter(([id]) => isCustomTTSProvider(id)) + .map(([id, cfg]) => ({ + id: id as TTSProviderId, + name: cfg.customName || id, + icon: undefined, + })), + ]} configs={ttsProvidersConfig} selectedId={ttsProviderId} onSelect={setTTSProvider} width={providerListWidth} t={t} + onAdd={() => setShowAddTTSProviderDialog(true)} />
handleResizeStart(e, 'providerList')} @@ -923,16 +968,26 @@ export function SettingsDialog({ open, onOpenChange, initialSection }: SettingsD {activeSection === 'asr' && ( <> ({ - id: p.id, - name: getASRProviderName(p.id, t), - icon: p.icon, - }))} + providers={[ + ...Object.values(ASR_PROVIDERS).map((p) => ({ + id: p.id, + name: getASRProviderName(p.id, t), + icon: p.icon, + })), + ...Object.entries(asrProvidersConfig) + .filter(([id]) => isCustomASRProvider(id)) + .map(([id, cfg]) => ({ + id: id as ASRProviderId, + name: cfg.customName || id, + icon: undefined, + })), + ]} configs={asrProvidersConfig} selectedId={asrProviderId} onSelect={setASRProvider} width={providerListWidth} t={t} + onAdd={() => setShowAddASRProviderDialog(true)} />
handleResizeStart(e, 'providerList')} @@ -1055,6 +1110,22 @@ export function SettingsDialog({ open, onOpenChange, initialSection }: SettingsD onAdd={handleAddProvider} /> + {/* Add TTS Provider Dialog */} + + + {/* Add ASR Provider Dialog */} + + {/* Delete Provider Confirmation */} state.ttsProvidersConfig); const setTTSProviderConfig = useSettingsStore((state) => state.setTTSProviderConfig); const activeProviderId = useSettingsStore((state) => state.ttsProviderId); + const setTTSVoice = useSettingsStore((state) => state.setTTSVoice); + const removeCustomTTSProvider = useSettingsStore((state) => state.removeCustomTTSProvider); + + const ttsProvider = TTS_PROVIDERS[selectedProviderId as keyof typeof TTS_PROVIDERS]; + const isCustom = isCustomTTSProvider(selectedProviderId); + const providerConfig = ttsProvidersConfig[selectedProviderId]; + const isServerConfigured = !!providerConfig?.isServerConfigured; + const requiresApiKey = isCustom + ? !!providerConfig?.requiresApiKey + : !!ttsProvider?.requiresApiKey; // When testing a non-active provider, use that provider's default voice // instead of the active provider's voice (which may be incompatible). const effectiveVoice = selectedProviderId === activeProviderId ? ttsVoice - : DEFAULT_TTS_VOICES[selectedProviderId] || 'default'; - - const ttsProvider = TTS_PROVIDERS[selectedProviderId] ?? TTS_PROVIDERS['openai-tts']; - const isServerConfigured = !!ttsProvidersConfig[selectedProviderId]?.isServerConfigured; + : isCustomTTSProvider(selectedProviderId) + ? ((providerConfig?.customVoices as Array<{ id: string }> | undefined) || [])[0]?.id || + 'default' + : DEFAULT_TTS_VOICES[selectedProviderId as keyof typeof DEFAULT_TTS_VOICES] || 'default'; const [showApiKey, setShowApiKey] = useState(false); + const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); const [testText, setTestText] = useState(t('settings.ttsTestTextDefault')); const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); const [testMessage, setTestMessage] = useState(''); @@ -84,11 +107,15 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { await startPreview({ text: testText, providerId: selectedProviderId, - modelId: ttsProvidersConfig[selectedProviderId]?.modelId || ttsProvider.defaultModelId, + modelId: + ttsProvidersConfig[selectedProviderId]?.modelId || ttsProvider?.defaultModelId || '', voice: effectiveVoice, speed: ttsSpeed, apiKey: ttsProvidersConfig[selectedProviderId]?.apiKey, - baseUrl: ttsProvidersConfig[selectedProviderId]?.baseUrl, + baseUrl: + ttsProvidersConfig[selectedProviderId]?.baseUrl || + providerConfig?.customDefaultBaseUrl || + '', }); setTestStatus('success'); setTestMessage(t('settings.ttsTestSuccess')); @@ -113,7 +140,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { )} {/* API Key & Base URL */} - {(ttsProvider.requiresApiKey || isServerConfigured) && ( + {(requiresApiKey || isServerConfigured || isCustom) && ( <>
{isDoubao ? ( @@ -217,7 +244,11 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { autoCapitalize="none" autoCorrect="off" spellCheck={false} - placeholder={ttsProvider.defaultBaseUrl || t('settings.enterCustomBaseUrl')} + placeholder={ + isCustom + ? providerConfig?.customDefaultBaseUrl || 'http://localhost:8000/v1' + : ttsProvider?.defaultBaseUrl || t('settings.enterCustomBaseUrl') + } value={ttsProvidersConfig[selectedProviderId]?.baseUrl || ''} onChange={(e) => setTTSProviderConfig(selectedProviderId, { @@ -231,26 +262,32 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { {/* Request URL Preview */} {(() => { const effectiveBaseUrl = - ttsProvidersConfig[selectedProviderId]?.baseUrl || ttsProvider.defaultBaseUrl || ''; + ttsProvidersConfig[selectedProviderId]?.baseUrl || + (isCustom ? providerConfig?.customDefaultBaseUrl : ttsProvider?.defaultBaseUrl) || + ''; if (!effectiveBaseUrl) return null; let endpointPath = ''; - switch (selectedProviderId) { - case 'openai-tts': - case 'glm-tts': - endpointPath = '/audio/speech'; - break; - case 'azure-tts': - endpointPath = '/cognitiveservices/v1'; - break; - case 'qwen-tts': - endpointPath = '/services/aigc/multimodal-generation/generation'; - break; - case 'elevenlabs-tts': - endpointPath = '/text-to-speech'; - break; - case 'doubao-tts': - endpointPath = '/unidirectional'; - break; + if (isCustom) { + endpointPath = '/audio/speech'; + } else { + switch (selectedProviderId) { + case 'openai-tts': + case 'glm-tts': + endpointPath = '/audio/speech'; + break; + case 'azure-tts': + endpointPath = '/cognitiveservices/v1'; + break; + case 'qwen-tts': + endpointPath = '/services/aigc/multimodal-generation/generation'; + break; + case 'elevenlabs-tts': + endpointPath = '/text-to-speech'; + break; + case 'doubao-tts': + endpointPath = '/unidirectional'; + break; + } } if (!endpointPath) return null; return ( @@ -277,7 +314,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { disabled={ testingTTS || !testText.trim() || - (ttsProvider.requiresApiKey && + (requiresApiKey && !ttsProvidersConfig[selectedProviderId]?.apiKey?.trim() && !isServerConfigured) } @@ -313,7 +350,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { )} {/* Available Models */} - {ttsProvider.models.length > 0 && ( + {ttsProvider?.models?.length > 0 && (
@@ -332,6 +369,175 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {

)} + + {/* Custom Voice List Management */} + {isCustom && ( +
+ + {(providerConfig?.customVoices as Array<{ id: string; name: string }> | undefined) + ?.length ? ( +
+ {/* Column headers */} +
+ + ID + + + {t('settings.voiceNamePlaceholder')} + + +
+ {/* Voice rows */} + {( + providerConfig?.customVoices as Array<{ + id: string; + name: string; + }> + ).map((voice, index) => ( +
0 && 'border-t border-border/30', + )} + > + + {voice.id} + + {voice.name} + +
+ ))} +
+ ) : ( +

{t('settings.noVoicesAdded')}

+ )} + | undefined) || + [] + ).map((v) => v.id)} + onAdd={(voiceId, voiceName) => { + const voices = [ + ...((providerConfig?.customVoices as + | Array<{ id: string; name: string }> + | undefined) || []), + { id: voiceId, name: voiceName }, + ]; + setTTSProviderConfig(selectedProviderId, { + customVoices: voices, + } as Record); + // Auto-select the first voice if current voice is 'default' + if (ttsVoice === 'default' && selectedProviderId === activeProviderId) { + setTTSVoice(voiceId); + } + }} + /> +
+ )} + + {/* Delete Custom Provider */} + {isCustom && ( +
+ +
+ )} + + {/* Delete Confirmation Dialog */} + !open && setShowDeleteConfirm(false)} + > + + + {t('settings.deleteProvider')} + {t('settings.deleteProviderConfirm')} + + + {t('settings.cancelEdit')} + { + removeCustomTTSProvider(selectedProviderId); + setShowDeleteConfirm(false); + }} + > + {t('settings.deleteProvider')} + + + + +
+ ); +} + +function AddVoiceRow({ + onAdd, + existingIds, +}: { + onAdd: (id: string, name: string) => void; + existingIds: string[]; +}) { + const { t } = useI18n(); + const [voiceId, setVoiceId] = useState(''); + const [voiceName, setVoiceName] = useState(''); + + const handleAdd = () => { + if (!voiceId.trim()) return; + if (existingIds.includes(voiceId.trim())) { + toast.error('Duplicate ID'); + return; + } + onAdd(voiceId.trim(), voiceName.trim() || voiceId.trim()); + setVoiceId(''); + setVoiceName(''); + }; + + return ( +
+ setVoiceId(e.target.value)} + onKeyDown={(e) => e.key === 'Enter' && handleAdd()} + className="text-sm font-mono" + placeholder={t('settings.voiceIdPlaceholder')} + /> + setVoiceName(e.target.value)} + onKeyDown={(e) => e.key === 'Enter' && handleAdd()} + className="text-sm" + placeholder={t('settings.voiceNamePlaceholder')} + /> +
); } diff --git a/lib/audio/asr-providers.ts b/lib/audio/asr-providers.ts index 93365899a..0fec13dc0 100644 --- a/lib/audio/asr-providers.ts +++ b/lib/audio/asr-providers.ts @@ -148,6 +148,7 @@ import { createOpenAI } from '@ai-sdk/openai'; import { experimental_transcribe as transcribe } from 'ai'; import type { ASRModelConfig } from './types'; +import { isCustomASRProvider } from './types'; import { ASR_PROVIDERS } from './constants'; /** @@ -164,13 +165,10 @@ export async function transcribeAudio( config: ASRModelConfig, audioBuffer: Buffer | Blob, ): Promise { - const provider = ASR_PROVIDERS[config.providerId]; - if (!provider) { - throw new Error(`Unknown ASR provider: ${config.providerId}`); - } + const provider = ASR_PROVIDERS[config.providerId as keyof typeof ASR_PROVIDERS]; - // Validate API key if required - if (provider.requiresApiKey && !config.apiKey) { + // Validate API key if required (only for built-in providers with known config) + if (provider?.requiresApiKey && !config.apiKey) { throw new Error(`API key required for ASR provider: ${config.providerId}`); } @@ -185,6 +183,9 @@ export async function transcribeAudio( return await transcribeQwenASR(config, audioBuffer); default: + if (isCustomASRProvider(config.providerId)) { + return await transcribeOpenAIWhisper(config, audioBuffer); + } throw new Error(`Unsupported ASR provider: ${config.providerId}`); } } @@ -343,9 +344,12 @@ export async function getCurrentASRConfig(): Promise { return { providerId: asrProviderId, - modelId: providerConfig?.modelId || ASR_PROVIDERS[asrProviderId]?.defaultModelId || '', + modelId: + providerConfig?.modelId || + ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]?.defaultModelId || + '', apiKey: providerConfig?.apiKey, - baseUrl: providerConfig?.baseUrl, + baseUrl: providerConfig?.baseUrl || providerConfig?.customDefaultBaseUrl, language: asrLanguage, }; } diff --git a/lib/audio/constants.ts b/lib/audio/constants.ts index 423f5b82c..db4a54315 100644 --- a/lib/audio/constants.ts +++ b/lib/audio/constants.ts @@ -28,13 +28,35 @@ */ import type { + BuiltInTTSProviderId, TTSProviderId, TTSProviderConfig, TTSVoiceInfo, + BuiltInASRProviderId, ASRProviderId, ASRProviderConfig, } from './types'; +/** + * Default supported languages for custom OpenAI-compatible ASR providers. + * A practical subset of commonly used languages + auto-detect. + */ +export const CUSTOM_ASR_DEFAULT_LANGUAGES = [ + 'auto', + 'zh', + 'en', + 'ja', + 'ko', + 'es', + 'fr', + 'de', + 'ru', + 'ar', + 'pt', + 'it', + 'hi', +]; + /** * TTS Provider Registry * @@ -50,7 +72,7 @@ export const MINIMAX_TTS_MODELS = [ { id: 'speech-02-turbo', name: 'Speech 02 Turbo' }, ] as const; -export const TTS_PROVIDERS: Record = { +export const TTS_PROVIDERS: Record = { 'openai-tts': { id: 'openai-tts', name: 'OpenAI TTS', @@ -906,7 +928,7 @@ export const TTS_PROVIDERS: Record = { * Central registry for all ASR providers. * Keep in sync with ASRProviderId type definition. */ -export const ASR_PROVIDERS: Record = { +export const ASR_PROVIDERS: Record = { 'openai-whisper': { id: 'openai-whisper', name: 'OpenAI Whisper', @@ -1099,25 +1121,11 @@ export const ASR_PROVIDERS: Record = { }, }; -/** - * Get all available TTS providers - */ -export function getAllTTSProviders(): TTSProviderConfig[] { - return Object.values(TTS_PROVIDERS); -} - -/** - * Get TTS provider by ID - */ -export function getTTSProvider(providerId: TTSProviderId): TTSProviderConfig | undefined { - return TTS_PROVIDERS[providerId]; -} - /** * Default voice for each TTS provider. * Used when switching providers or testing a non-active provider. */ -export const DEFAULT_TTS_VOICES: Record = { +export const DEFAULT_TTS_VOICES: Record = { 'openai-tts': 'alloy', 'azure-tts': 'zh-CN-XiaoxiaoNeural', 'glm-tts': 'tongtong', @@ -1128,7 +1136,7 @@ export const DEFAULT_TTS_VOICES: Record = { 'browser-native-tts': 'default', }; -export const DEFAULT_TTS_MODELS: Record = { +export const DEFAULT_TTS_MODELS: Record = { 'openai-tts': 'gpt-4o-mini-tts', 'azure-tts': '', 'glm-tts': 'glm-tts', @@ -1139,30 +1147,70 @@ export const DEFAULT_TTS_MODELS: Record = { 'browser-native-tts': '', }; +/** + * Get all available TTS providers (built-in + custom) + */ +export function getAllTTSProviders( + customProviders?: Record, +): TTSProviderConfig[] { + const builtIn = Object.values(TTS_PROVIDERS); + const custom = customProviders ? Object.values(customProviders) : []; + return [...builtIn, ...custom]; +} + +/** + * Get TTS provider by ID (checks built-in first, then custom) + */ +export function getTTSProvider( + providerId: TTSProviderId, + customProviders?: Record, +): TTSProviderConfig | undefined { + if (providerId in TTS_PROVIDERS) { + return TTS_PROVIDERS[providerId as BuiltInTTSProviderId]; + } + return customProviders?.[providerId]; +} + /** * Get voices for a specific TTS provider */ -export function getTTSVoices(providerId: TTSProviderId): TTSVoiceInfo[] { - return TTS_PROVIDERS[providerId]?.voices || []; +export function getTTSVoices( + providerId: TTSProviderId, + customProviders?: Record, +): TTSVoiceInfo[] { + return getTTSProvider(providerId, customProviders)?.voices || []; } /** - * Get all available ASR providers + * Get all available ASR providers (built-in + custom) */ -export function getAllASRProviders(): ASRProviderConfig[] { - return Object.values(ASR_PROVIDERS); +export function getAllASRProviders( + customProviders?: Record, +): ASRProviderConfig[] { + const builtIn = Object.values(ASR_PROVIDERS); + const custom = customProviders ? Object.values(customProviders) : []; + return [...builtIn, ...custom]; } /** - * Get ASR provider by ID + * Get ASR provider by ID (checks built-in first, then custom) */ -export function getASRProvider(providerId: ASRProviderId): ASRProviderConfig | undefined { - return ASR_PROVIDERS[providerId]; +export function getASRProvider( + providerId: ASRProviderId, + customProviders?: Record, +): ASRProviderConfig | undefined { + if (providerId in ASR_PROVIDERS) { + return ASR_PROVIDERS[providerId as BuiltInASRProviderId]; + } + return customProviders?.[providerId]; } /** * Get supported languages for a specific ASR provider */ -export function getASRSupportedLanguages(providerId: ASRProviderId): string[] { - return ASR_PROVIDERS[providerId]?.supportedLanguages || []; +export function getASRSupportedLanguages( + providerId: ASRProviderId, + customProviders?: Record, +): string[] { + return getASRProvider(providerId, customProviders)?.supportedLanguages || []; } diff --git a/lib/audio/tts-providers.ts b/lib/audio/tts-providers.ts index 67f0e7cc0..54fd2f801 100644 --- a/lib/audio/tts-providers.ts +++ b/lib/audio/tts-providers.ts @@ -93,6 +93,7 @@ */ import type { TTSModelConfig } from './types'; +import { isCustomTTSProvider } from './types'; import { TTS_PROVIDERS } from './constants'; /** @@ -127,13 +128,10 @@ export async function generateTTS( config: TTSModelConfig, text: string, ): Promise { - const provider = TTS_PROVIDERS[config.providerId]; - if (!provider) { - throw new Error(`Unknown TTS provider: ${config.providerId}`); - } + const provider = TTS_PROVIDERS[config.providerId as keyof typeof TTS_PROVIDERS]; - // Validate API key if required - if (provider.requiresApiKey && !config.apiKey) { + // Validate API key if required (only for built-in providers with known config) + if (provider?.requiresApiKey && !config.apiKey) { throw new Error(`API key required for TTS provider: ${config.providerId}`); } @@ -163,6 +161,9 @@ export async function generateTTS( ); default: + if (isCustomTTSProvider(config.providerId)) { + return await generateOpenAITTS(config, text); + } throw new Error(`Unsupported TTS provider: ${config.providerId}`); } } @@ -475,9 +476,12 @@ export async function getCurrentTTSConfig(): Promise { return { providerId: ttsProviderId, - modelId: providerConfig?.modelId || TTS_PROVIDERS[ttsProviderId]?.defaultModelId || '', + modelId: + providerConfig?.modelId || + TTS_PROVIDERS[ttsProviderId as keyof typeof TTS_PROVIDERS]?.defaultModelId || + '', apiKey: providerConfig?.apiKey, - baseUrl: providerConfig?.baseUrl, + baseUrl: providerConfig?.baseUrl || providerConfig?.customDefaultBaseUrl, voice: ttsVoice, speed: ttsSpeed, }; diff --git a/lib/audio/types.ts b/lib/audio/types.ts index 0c3c91792..87636b48d 100644 --- a/lib/audio/types.ts +++ b/lib/audio/types.ts @@ -78,7 +78,7 @@ * Add new TTS providers here as union members. * Keep in sync with TTS_PROVIDERS registry in constants.ts */ -export type TTSProviderId = +export type BuiltInTTSProviderId = | 'openai-tts' | 'azure-tts' | 'glm-tts' @@ -87,10 +87,8 @@ export type TTSProviderId = | 'elevenlabs-tts' | 'minimax-tts' | 'browser-native-tts'; -// Add new TTS providers below (uncomment and modify): -// | 'fish-audio-tts' -// | 'cartesia-tts' -// | 'playht-tts' + +export type TTSProviderId = BuiltInTTSProviderId | `custom-tts-${string}`; /** * Voice information for TTS @@ -152,12 +150,9 @@ export interface TTSModelConfig { * Add new ASR providers here as union members. * Keep in sync with ASR_PROVIDERS registry in constants.ts */ -export type ASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr'; -// Add new ASR providers below (uncomment and modify): -// | 'elevenlabs-asr' -// | 'assemblyai-asr' -// | 'deepgram-asr' -// | 'azure-asr' +export type BuiltInASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr'; + +export type ASRProviderId = BuiltInASRProviderId | `custom-asr-${string}`; /** * ASR Provider Configuration @@ -184,3 +179,13 @@ export interface ASRModelConfig { baseUrl?: string; language?: string; } + +/** Returns true if the provider ID is a user-defined custom TTS provider. */ +export function isCustomTTSProvider(id: string): boolean { + return id.startsWith('custom-tts-'); +} + +/** Returns true if the provider ID is a user-defined custom ASR provider. */ +export function isCustomASRProvider(id: string): boolean { + return id.startsWith('custom-asr-'); +} diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts index b4e54eee8..86e80b039 100644 --- a/lib/audio/voice-resolver.ts +++ b/lib/audio/voice-resolver.ts @@ -1,4 +1,5 @@ import type { TTSProviderId } from '@/lib/audio/types'; +import { isCustomTTSProvider } from '@/lib/audio/types'; import type { AgentConfig } from '@/lib/orchestration/registry/types'; import { TTS_PROVIDERS } from '@/lib/audio/constants'; @@ -29,7 +30,12 @@ export function resolveAgentVoice( }; } const list = getServerVoiceList(agent.voiceConfig.providerId); - if (list.includes(agent.voiceConfig.voiceId)) { + // Also check available providers (covers custom providers with dynamic voice lists) + const fromAvailable = availableProviders + .find((p) => p.providerId === agent.voiceConfig!.providerId) + ?.voices.map((v) => v.id); + const allVoiceIds = new Set([...list, ...(fromAvailable || [])]); + if (allVoiceIds.has(agent.voiceConfig.voiceId)) { return { providerId: agent.voiceConfig.providerId, modelId: agent.voiceConfig.modelId, @@ -53,10 +59,20 @@ export function resolveAgentVoice( /** * Get the list of voice IDs for a TTS provider. * For browser-native-tts, returns empty (browser voices are dynamic). + * For custom providers, reads from ttsProvidersConfig.customVoices. */ -export function getServerVoiceList(providerId: TTSProviderId): string[] { +export function getServerVoiceList( + providerId: TTSProviderId, + ttsProvidersConfig?: Record>, +): string[] { if (providerId === 'browser-native-tts') return []; - const provider = TTS_PROVIDERS[providerId]; + if (isCustomTTSProvider(providerId) && ttsProvidersConfig) { + const customVoices = ttsProvidersConfig[providerId]?.customVoices as + | Array<{ id: string }> + | undefined; + return customVoices?.map((v) => v.id) || []; + } + const provider = TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]; if (!provider) return []; return provider.voices.map((v) => v.id); } @@ -77,16 +93,25 @@ export interface ProviderWithVoices { /** * Get all available providers and their voices for the voice picker UI. * A provider is available if it has an API key or is server-configured. + * Custom providers are available if they have voices configured. * Browser-native-tts is excluded (no static voice list). */ export function getAvailableProvidersWithVoices( ttsProvidersConfig: Record< string, - { apiKey?: string; enabled?: boolean; isServerConfigured?: boolean } + { + apiKey?: string; + enabled?: boolean; + isServerConfigured?: boolean; + modelId?: string; + customName?: string; + customVoices?: Array<{ id: string; name: string }>; + } >, ): ProviderWithVoices[] { const result: ProviderWithVoices[] = []; + // Built-in providers for (const [id, config] of Object.entries(TTS_PROVIDERS)) { const providerId = id as TTSProviderId; if (providerId === 'browser-native-tts') continue; @@ -113,7 +138,6 @@ export function getAvailableProvidersWithVoices( }); } } else { - // Provider has no model concept (Azure, Browser Native, Doubao) modelGroups.push({ modelId: '', modelName: config.name, @@ -130,14 +154,43 @@ export function getAvailableProvidersWithVoices( } } + // Custom providers + for (const [id, providerConfig] of Object.entries(ttsProvidersConfig)) { + if (!isCustomTTSProvider(id)) continue; + const customVoices = providerConfig.customVoices || []; + if (customVoices.length === 0) continue; + + const providerId = id as TTSProviderId; + const providerName = providerConfig.customName || id; + const voices = customVoices.map((v) => ({ id: v.id, name: v.name })); + + result.push({ + providerId, + providerName, + voices, + modelGroups: [{ modelId: '', modelName: providerName, voices }], + }); + } + return result; } /** * Find a voice display name across all providers. */ -export function findVoiceDisplayName(providerId: TTSProviderId, voiceId: string): string { - const provider = TTS_PROVIDERS[providerId]; +export function findVoiceDisplayName( + providerId: TTSProviderId, + voiceId: string, + ttsProvidersConfig?: Record>, +): string { + if (isCustomTTSProvider(providerId) && ttsProvidersConfig) { + const customVoices = ttsProvidersConfig[providerId]?.customVoices as + | Array<{ id: string; name: string }> + | undefined; + const voice = customVoices?.find((v) => v.id === voiceId); + return voice?.name ?? voiceId; + } + const provider = TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]; if (!provider) return voiceId; const voice = provider.voices.find((v) => v.id === voiceId); return voice?.name ?? voiceId; diff --git a/lib/hooks/use-audio-recorder.ts b/lib/hooks/use-audio-recorder.ts index 0249e94f1..327985bb2 100644 --- a/lib/hooks/use-audio-recorder.ts +++ b/lib/hooks/use-audio-recorder.ts @@ -53,7 +53,7 @@ export function useAudioRecorder(options: UseAudioRecorderOptions = {}) { formData.append( 'modelId', asrProvidersConfig?.[asrProviderId]?.modelId || - ASR_PROVIDERS[asrProviderId]?.defaultModelId || + ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]?.defaultModelId || '', ); formData.append('language', asrLanguage); @@ -63,8 +63,10 @@ export function useAudioRecorder(options: UseAudioRecorderOptions = {}) { if (providerConfig?.apiKey?.trim()) { formData.append('apiKey', providerConfig.apiKey); } - if (providerConfig?.baseUrl?.trim()) { - formData.append('baseUrl', providerConfig.baseUrl); + const effectiveBaseUrl = + providerConfig?.baseUrl?.trim() || providerConfig?.customDefaultBaseUrl || ''; + if (effectiveBaseUrl) { + formData.append('baseUrl', effectiveBaseUrl); } } diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index abd9dd364..2d90b8845 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -159,7 +159,10 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus ttsVoice: item.voiceId, ttsSpeed: ttsSpeed, ttsApiKey: providerConfig?.apiKey, - ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, + ttsBaseUrl: + providerConfig?.serverBaseUrl || + providerConfig?.baseUrl || + providerConfig?.customDefaultBaseUrl, }), signal: controller.signal, }); diff --git a/lib/hooks/use-scene-generator.ts b/lib/hooks/use-scene-generator.ts index ce52e95bc..19bdd5e83 100644 --- a/lib/hooks/use-scene-generator.ts +++ b/lib/hooks/use-scene-generator.ts @@ -140,7 +140,8 @@ export async function generateAndStoreTTS( ttsVoice: settings.ttsVoice, ttsSpeed: settings.ttsSpeed, ttsApiKey: ttsProviderConfig?.apiKey || undefined, - ttsBaseUrl: ttsProviderConfig?.baseUrl || undefined, + ttsBaseUrl: + ttsProviderConfig?.baseUrl || ttsProviderConfig?.customDefaultBaseUrl || undefined, }), signal, }); diff --git a/lib/i18n/locales/en-US.json b/lib/i18n/locales/en-US.json index 997857983..a6e7f790b 100644 --- a/lib/i18n/locales/en-US.json +++ b/lib/i18n/locales/en-US.json @@ -456,6 +456,20 @@ "requiresApiKey": "Requires API Key", "deleteProvider": "Delete Provider", "deleteProviderConfirm": "Are you sure you want to delete this provider?", + "addCustomTTSProvider": "Add Custom TTS Provider", + "addCustomASRProvider": "Add Custom ASR Provider", + "addCustomAudioProviderDescription": "Add a custom OpenAI-compatible audio provider", + "customVoices": "Voices", + "voiceIdPlaceholder": "Voice ID (e.g. alloy)", + "voiceNamePlaceholder": "Display Name", + "addVoice": "Add", + "modelNamePlaceholder": "Display Name", + "addModel": "Add", + "defaultModelHint": "Model name sent in API requests (e.g. kokoro, tts-1)", + "noVoicesAdded": "No voices added yet. Add voices below for per-agent selection.", + "noModelsAdded": "No models added yet. Add models below to enable model selection.", + "noModelsWarning": "Please add at least one model below before using this provider.", + "asrNoTranscription": "No transcription generated. Try speaking louder or longer.", "cannotDeleteBuiltIn": "Cannot delete built-in provider", "resetToDefault": "Reset to Default", "resetToDefaultDescription": "Restore model list to default configuration (API key and Base URL will be preserved)", @@ -551,6 +565,7 @@ "ttsTestFailed": "TTS test failed", "asrTestSuccess": "Speech recognition successful", "asrTestFailed": "Speech recognition failed", + "asrProcessing": "Processing...", "asrResult": "Recognition Result", "asrNotSupported": "Browser does not support Speech Recognition API", "browserTTSNotSupported": "Browser does not support Speech Synthesis API", diff --git a/lib/i18n/locales/ja-JP.json b/lib/i18n/locales/ja-JP.json index 6d82ebf07..2929d0fbb 100644 --- a/lib/i18n/locales/ja-JP.json +++ b/lib/i18n/locales/ja-JP.json @@ -456,6 +456,20 @@ "requiresApiKey": "APIキーが必要", "deleteProvider": "プロバイダーを削除", "deleteProviderConfirm": "このプロバイダーを削除してもよろしいですか?", + "addCustomTTSProvider": "カスタムTTSプロバイダーを追加", + "addCustomASRProvider": "カスタムASRプロバイダーを追加", + "addCustomAudioProviderDescription": "OpenAI互換のオーディオプロバイダーを追加", + "customVoices": "音声リスト", + "voiceIdPlaceholder": "音声ID(例: alloy)", + "voiceNamePlaceholder": "表示名", + "addVoice": "追加", + "modelNamePlaceholder": "表示名", + "addModel": "追加", + "defaultModelHint": "APIリクエストで送信されるモデル名(例: kokoro、tts-1)", + "noVoicesAdded": "音声がまだ追加されていません。エージェントごとの音声選択のために下で追加してください。", + "noModelsAdded": "モデルがまだ追加されていません。モデル選択のために下で追加してください。", + "noModelsWarning": "このプロバイダーを使用するには、まず下でモデルを追加してください。", + "asrNoTranscription": "文字起こし結果がありません。もう少し大きな声で、長めに話してみてください。", "cannotDeleteBuiltIn": "組み込みプロバイダーは削除できません", "resetToDefault": "デフォルトに戻す", "resetToDefaultDescription": "モデルリストをデフォルト設定に復元します(APIキーとベースURLは保持されます)", @@ -551,6 +565,7 @@ "ttsTestFailed": "TTSテストに失敗しました", "asrTestSuccess": "音声認識に成功しました", "asrTestFailed": "音声認識に失敗しました", + "asrProcessing": "処理中...", "asrResult": "認識結果", "asrNotSupported": "お使いのブラウザは音声認識APIに対応していません", "browserTTSNotSupported": "お使いのブラウザは音声合成APIに対応していません", diff --git a/lib/i18n/locales/ru-RU.json b/lib/i18n/locales/ru-RU.json index 9afa76e68..d3529d905 100644 --- a/lib/i18n/locales/ru-RU.json +++ b/lib/i18n/locales/ru-RU.json @@ -456,6 +456,20 @@ "requiresApiKey": "Требуется API-ключ", "deleteProvider": "Удалить провайдера", "deleteProviderConfirm": "Вы уверены, что хотите удалить этого провайдера?", + "addCustomTTSProvider": "Добавить TTS-провайдер", + "addCustomASRProvider": "Добавить ASR-провайдер", + "addCustomAudioProviderDescription": "Добавить OpenAI-совместимый аудио-провайдер", + "customVoices": "Голоса", + "voiceIdPlaceholder": "ID голоса (напр. alloy)", + "voiceNamePlaceholder": "Отображаемое имя", + "addVoice": "Добавить", + "modelNamePlaceholder": "Отображаемое имя", + "addModel": "Добавить", + "defaultModelHint": "Имя модели в API-запросах (напр. kokoro, tts-1)", + "noVoicesAdded": "Голоса ещё не добавлены. Добавьте ниже для выбора в агентах.", + "noModelsAdded": "Модели ещё не добавлены. Добавьте ниже для выбора модели.", + "noModelsWarning": "Добавьте хотя бы одну модель ниже перед использованием этого провайдера.", + "asrNoTranscription": "Транскрипция не получена. Попробуйте говорить громче или дольше.", "cannotDeleteBuiltIn": "Нельзя удалить встроенного провайдера", "resetToDefault": "Сбросить на стандартные", "resetToDefaultDescription": "Восстановить список моделей по умолчанию (API-ключ и Base URL будут сохранены)", @@ -551,6 +565,7 @@ "ttsTestFailed": "TTS-тест не пройден", "asrTestSuccess": "Распознавание речи успешно", "asrTestFailed": "Распознавание речи не удалось", + "asrProcessing": "Обработка...", "asrResult": "Результат распознавания", "asrNotSupported": "Браузер не поддерживает Speech Recognition API", "browserTTSNotSupported": "Браузер не поддерживает Speech Synthesis API", diff --git a/lib/i18n/locales/zh-CN.json b/lib/i18n/locales/zh-CN.json index b2f7a2a79..679ae6fa3 100644 --- a/lib/i18n/locales/zh-CN.json +++ b/lib/i18n/locales/zh-CN.json @@ -456,6 +456,20 @@ "requiresApiKey": "需要 API 密钥", "deleteProvider": "删除提供方", "deleteProviderConfirm": "确定要删除此提供方吗?", + "addCustomTTSProvider": "添加自定义语音合成", + "addCustomASRProvider": "添加自定义语音识别", + "addCustomAudioProviderDescription": "添加兼容 OpenAI 协议的音频服务", + "customVoices": "音色列表", + "voiceIdPlaceholder": "音色 ID(如 alloy)", + "voiceNamePlaceholder": "显示名称", + "addVoice": "添加", + "modelNamePlaceholder": "显示名称", + "addModel": "添加", + "defaultModelHint": "API 请求中的模型名(如 kokoro、tts-1)", + "noVoicesAdded": "暂无音色,请在下方添加以支持 Agent 选择不同音色。", + "noModelsAdded": "暂无模型,请在下方添加以支持模型选择。", + "noModelsWarning": "请先在下方添加至少一个模型,才能使用此服务。", + "asrNoTranscription": "未生成转写结果,请尝试说大声一些或说长一些。", "cannotDeleteBuiltIn": "无法删除内置提供方", "resetToDefault": "重置为默认配置", "resetToDefaultDescription": "将模型列表恢复到默认状态(保留 API 密钥和 Base URL)", @@ -551,6 +565,7 @@ "ttsTestFailed": "TTS 测试失败", "asrTestSuccess": "语音识别成功", "asrTestFailed": "语音识别失败", + "asrProcessing": "处理中...", "asrResult": "识别结果", "asrNotSupported": "浏览器不支持语音识别 API", "browserTTSNotSupported": "浏览器不支持语音合成 API", diff --git a/lib/server/classroom-media-generation.ts b/lib/server/classroom-media-generation.ts index 175787a34..239d3f7c4 100644 --- a/lib/server/classroom-media-generation.ts +++ b/lib/server/classroom-media-generation.ts @@ -224,9 +224,12 @@ export async function generateTTSForClassroom( log.warn(`No API key for TTS provider "${providerId}", skipping TTS generation`); return; } - const ttsBaseUrl = resolveTTSBaseUrl(providerId) || TTS_PROVIDERS[providerId]?.defaultBaseUrl; - const voice = DEFAULT_TTS_VOICES[providerId] || 'default'; - const format = TTS_PROVIDERS[providerId]?.supportedFormats?.[0] || 'mp3'; + const ttsBaseUrl = + resolveTTSBaseUrl(providerId) || + TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.defaultBaseUrl; + const voice = DEFAULT_TTS_VOICES[providerId as keyof typeof DEFAULT_TTS_VOICES] || 'default'; + const format = + TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.supportedFormats?.[0] || 'mp3'; for (const scene of scenes) { if (!scene.actions) continue; @@ -244,7 +247,7 @@ export async function generateTTSForClassroom( const result = await generateTTS( { providerId, - modelId: DEFAULT_TTS_MODELS[providerId] || '', + modelId: DEFAULT_TTS_MODELS[providerId as keyof typeof DEFAULT_TTS_MODELS] || '', apiKey, baseUrl: ttsBaseUrl, voice, diff --git a/lib/store/settings.ts b/lib/store/settings.ts index 4b088bbc6..80fcf031d 100644 --- a/lib/store/settings.ts +++ b/lib/store/settings.ts @@ -8,7 +8,8 @@ import { persist } from 'zustand/middleware'; import type { ProviderId } from '@/lib/ai/providers'; import type { ProvidersConfig } from '@/lib/types/settings'; import { PROVIDERS } from '@/lib/ai/providers'; -import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types'; +import type { TTSProviderId, ASRProviderId, BuiltInTTSProviderId } from '@/lib/audio/types'; +import { isCustomTTSProvider, isCustomASRProvider } from '@/lib/audio/types'; import { ASR_PROVIDERS, DEFAULT_TTS_VOICES, TTS_PROVIDERS } from '@/lib/audio/constants'; import { PDF_PROVIDERS } from '@/lib/pdf/constants'; import type { PDFProviderId } from '@/lib/pdf/types'; @@ -56,6 +57,12 @@ export interface SettingsState { providerOptions?: Record; isServerConfigured?: boolean; serverBaseUrl?: string; + // Custom provider fields + customName?: string; + customDefaultBaseUrl?: string; + customVoices?: Array<{ id: string; name: string }>; + isBuiltIn?: boolean; + requiresApiKey?: boolean; } >; @@ -70,6 +77,11 @@ export interface SettingsState { providerOptions?: Record; isServerConfigured?: boolean; serverBaseUrl?: string; + // Custom provider fields + customName?: string; + customDefaultBaseUrl?: string; + isBuiltIn?: boolean; + requiresApiKey?: boolean; } >; @@ -190,6 +202,7 @@ export interface SettingsState { enabled: boolean; modelId: string; customModels: Array<{ id: string; name: string }>; + customVoices: Array<{ id: string; name: string }>; providerOptions: Record; }>, ) => void; @@ -207,6 +220,23 @@ export interface SettingsState { setTTSEnabled: (enabled: boolean) => void; setASREnabled: (enabled: boolean) => void; + // Custom audio provider actions + addCustomTTSProvider: ( + id: TTSProviderId, + name: string, + baseUrl: string, + requiresApiKey: boolean, + defaultModel?: string, + ) => void; + removeCustomTTSProvider: (id: TTSProviderId) => void; + addCustomASRProvider: ( + id: ASRProviderId, + name: string, + baseUrl: string, + requiresApiKey: boolean, + ) => void; + removeCustomASRProvider: (id: ASRProviderId) => void; + // PDF actions setPDFProvider: (providerId: PDFProviderId) => void; setPDFProviderConfig: ( @@ -381,11 +411,27 @@ function ensureValidProviderSelections(state: Partial): void { state.videoProviderId = defaultVideoConfig.videoProviderId; } - if (!hasProviderId(TTS_PROVIDERS, state.ttsProviderId)) { + if ( + !hasProviderId(TTS_PROVIDERS, state.ttsProviderId) && + !( + state.ttsProviderId && + isCustomTTSProvider(state.ttsProviderId) && + state.ttsProvidersConfig && + state.ttsProviderId in state.ttsProvidersConfig + ) + ) { state.ttsProviderId = defaultAudioConfig.ttsProviderId; } - if (!hasProviderId(ASR_PROVIDERS, state.asrProviderId)) { + if ( + !hasProviderId(ASR_PROVIDERS, state.asrProviderId) && + !( + state.asrProviderId && + isCustomASRProvider(state.asrProviderId) && + state.asrProvidersConfig && + state.asrProviderId in state.asrProvidersConfig + ) + ) { state.asrProviderId = defaultAudioConfig.asrProviderId; } } @@ -625,9 +671,12 @@ export const useSettingsStore = create()( set((state) => { // If switching provider, set default voice for that provider const shouldUpdateVoice = state.ttsProviderId !== providerId; + const defaultVoice = isCustomTTSProvider(providerId) + ? state.ttsProvidersConfig[providerId]?.customVoices?.[0]?.id || 'default' + : DEFAULT_TTS_VOICES[providerId as BuiltInTTSProviderId] || 'default'; return { ttsProviderId: providerId, - ...(shouldUpdateVoice && { ttsVoice: DEFAULT_TTS_VOICES[providerId] }), + ...(shouldUpdateVoice && { ttsVoice: defaultVoice }), }; }), @@ -639,7 +688,13 @@ export const useSettingsStore = create()( // (e.g. browser-native uses BCP-47 "en-US", OpenAI Whisper uses ISO 639-1 "en") setASRProvider: (providerId) => set((state) => { - const supportedLanguages = ASR_PROVIDERS[providerId]?.supportedLanguages || []; + let supportedLanguages: string[]; + if (isCustomASRProvider(providerId)) { + supportedLanguages = ['auto']; + } else { + supportedLanguages = + ASR_PROVIDERS[providerId as keyof typeof ASR_PROVIDERS]?.supportedLanguages || []; + } const isLanguageValid = supportedLanguages.includes(state.asrLanguage); return { asrProviderId: providerId, @@ -735,6 +790,71 @@ export const useSettingsStore = create()( setTTSEnabled: (enabled) => set({ ttsEnabled: enabled }), setASREnabled: (enabled) => set({ asrEnabled: enabled }), + // Custom audio provider actions + addCustomTTSProvider: (id, name, baseUrl, requiresApiKey, defaultModel) => + set((state) => ({ + ttsProvidersConfig: { + ...state.ttsProvidersConfig, + [id]: { + apiKey: '', + baseUrl: '', + enabled: true, + modelId: defaultModel || '', + customName: name, + customDefaultBaseUrl: baseUrl, + customVoices: [], + isBuiltIn: false, + requiresApiKey, + }, + }, + ttsProviderId: id, + })), + + removeCustomTTSProvider: (id) => + set((state) => { + if (!isCustomTTSProvider(id)) return state; + const { [id]: _, ...rest } = state.ttsProvidersConfig; + return { + ttsProvidersConfig: rest as typeof state.ttsProvidersConfig, + ...(state.ttsProviderId === id && { + ttsProviderId: 'browser-native-tts' as TTSProviderId, + ttsVoice: 'default', + }), + }; + }), + + addCustomASRProvider: (id, name, baseUrl, requiresApiKey) => + set((state) => ({ + asrProvidersConfig: { + ...state.asrProvidersConfig, + [id]: { + apiKey: '', + baseUrl: '', + enabled: true, + modelId: '', + customModels: [], + customName: name, + customDefaultBaseUrl: baseUrl, + isBuiltIn: false, + requiresApiKey, + }, + }, + asrProviderId: id, + })), + + removeCustomASRProvider: (id) => + set((state) => { + if (!isCustomASRProvider(id)) return state; + const { [id]: _, ...rest } = state.asrProvidersConfig; + return { + asrProvidersConfig: rest as typeof state.asrProvidersConfig, + ...(state.asrProviderId === id && { + asrProviderId: 'browser-native' as ASRProviderId, + asrLanguage: 'zh', + }), + }; + }), + // Web Search actions setWebSearchProvider: (providerId) => set({ webSearchProviderId: providerId }), setWebSearchProviderConfig: (providerId, config) => @@ -1030,7 +1150,7 @@ export const useSettingsStore = create()( const validTTSVoice = validTTSProvider !== state.ttsProviderId - ? DEFAULT_TTS_VOICES[validTTSProvider as TTSProviderId] || 'default' + ? DEFAULT_TTS_VOICES[validTTSProvider as BuiltInTTSProviderId] || 'default' : state.ttsVoice; // Auto-disable image/video generation when no provider is usable @@ -1062,7 +1182,8 @@ export const useSettingsStore = create()( !newTTSConfig[state.ttsProviderId]?.isServerConfigured ) { autoTtsProvider = serverTtsIds[0]; - autoTtsVoice = DEFAULT_TTS_VOICES[autoTtsProvider] || 'default'; + autoTtsVoice = + DEFAULT_TTS_VOICES[autoTtsProvider as BuiltInTTSProviderId] || 'default'; } // ASR: select first server provider if current is not server-configured diff --git a/tests/store/settings-server-sync.test.ts b/tests/store/settings-server-sync.test.ts index fc49747c1..f4d41ed42 100644 --- a/tests/store/settings-server-sync.test.ts +++ b/tests/store/settings-server-sync.test.ts @@ -100,7 +100,10 @@ vi.mock('@/lib/audio/constants', () => ({ }, })); -vi.mock('@/lib/audio/types', () => ({})); +vi.mock('@/lib/audio/types', () => ({ + isCustomTTSProvider: (id: string) => id.startsWith('custom-tts-'), + isCustomASRProvider: (id: string) => id.startsWith('custom-asr-'), +})); vi.mock('@/lib/pdf/constants', () => ({ PDF_PROVIDERS: {