From 8b87c24d50009ece49674861d274fe1498ab9f7f Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Mon, 9 Jun 2025 17:34:17 +0530 Subject: [PATCH 01/17] init sonix --- template/bridge/rtc/webNg/RtcEngine.ts | 79 +++++++++++++++ template/package.json | 1 + .../src/pages/video-call/VideoCallScreen.tsx | 2 + .../caption/SonixCaptionContainer.tsx | 98 +++++++++++++++++++ 4 files changed, 180 insertions(+) create mode 100644 template/src/subComponents/caption/SonixCaptionContainer.tsx diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index 761910582..c720b7040 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -32,6 +32,8 @@ import type { Subscription, } from 'react-native-agora/lib/typescript/src/common/RtcEvents'; +import {RecordTranscribe} from '@soniox/speech-to-text-web'; + import {IRtcEngine} from 'react-native-agora'; import {VideoProfile} from '../quality'; import {ChannelProfileType, ClientRoleType} from '../../../agora-rn-uikit'; @@ -222,6 +224,7 @@ export default class RtcEngine { // public AgoraRTC: any; public client: IAgoraRTCClient; public screenClient: any | IAgoraRTCClient; + public eventsMap = new Map([ ['onUserJoined', () => null], ['onUserOffline', () => null], @@ -232,7 +235,9 @@ export default class RtcEngine { ['onNetworkQuality', () => null], ['onActiveSpeaker', () => null], ['onStreamMessage', () => null], + ['onSonioxTranscriptionResult', () => null], ]); + public localStream: LocalStream = {}; public screenStream: ScreenStream = {}; public remoteStreams = new Map(); @@ -261,6 +266,80 @@ export default class RtcEngine { const {appId} = context; logger.log(LogSource.AgoraSDK, 'Log', 'RTC engine initialized'); this.appId = appId; + this.sonioxTranscribers = new Map(); + this.customEvents = new Map(); + } + addCustomListener(eventName: string, callback: (...args: any[]) => void) { + this.customEvents.set(eventName, callback); + } + + removeCustomListener(eventName: string) { + this.customEvents.delete(eventName); + } + + async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) { + let stream: MediaStream; + + if (isLocal) { + if (!this.localStream.audio) return; + stream = new MediaStream([this.localStream.audio.getMediaStreamTrack()]); + } else { + const remoteAudio = this.remoteStreams.get(uid)?.audio; + if (!remoteAudio) return; + stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); + } + + const transcriber = new RecordTranscribe({apiKey}); + + await transcriber.start({ + model: 'stt-rt-preview', + stream, + // sampleRate: 48000, + // numChannels: 1, + enableSpeakerDiarization: true, + onPartialResult: results => { + const callback = this.customEvents.get('onSonioxTranscriptionResult'); + if (callback) callback(uid, {uid, ...results}); + }, + onError: (status, message, code) => { + console.error( + `Soniox Transcription Error (${uid}):`, + status, + message, + code, + ); + }, + onStarted: () => { + console.log(` Soniox started transcription session for ${uid}`); + }, + onStateChange: ({oldState, newState}) => { + console.log(`Soniox state change (${uid}): ${oldState} → ${newState}`); + }, + onFinished: () => { + console.log(` Soniox transcription session finished for ${uid}`); + }, + }); + + this.sonioxTranscribers.set(uid, transcriber); + + logger.log( + LogSource.AgoraSDK, + 'Soniox', + `Started Soniox transcription for ${uid}`, + ); + } + + stopSonioxTranscription(uid: UID): void { + const transcriber = this.sonioxTranscribers.get(uid); + if (transcriber) { + transcriber.stop(); + this.sonioxTranscribers.delete(uid); + logger.log( + LogSource.AgoraSDK, + 'Soniox', + `Stopped Soniox transcription for remote user ${uid}`, + ); + } } getLocalVideoStats() { try { diff --git a/template/package.json b/template/package.json index 5d5d8136d..1ec548c9f 100644 --- a/template/package.json +++ b/template/package.json @@ -56,6 +56,7 @@ "@react-native-async-storage/async-storage": "1.19.2", "@react-native-community/checkbox": "0.5.16", "@react-native-community/clipboard": "1.5.1", + "@soniox/speech-to-text-web": "^1.1.2", "@splinetool/runtime": "^1.9.69", "@supersami/rn-foreground-service": "^1.1.1", "add": "^2.0.6", diff --git a/template/src/pages/video-call/VideoCallScreen.tsx b/template/src/pages/video-call/VideoCallScreen.tsx index 4093cc44e..74d55119b 100644 --- a/template/src/pages/video-call/VideoCallScreen.tsx +++ b/template/src/pages/video-call/VideoCallScreen.tsx @@ -55,6 +55,7 @@ import {useIsRecordingBot} from '../../subComponents/recording/useIsRecordingBot import {ToolbarPresetProps} from '../../atoms/ToolbarPreset'; import CustomSidePanelView from '../../components/CustomSidePanel'; import {useControlPermissionMatrix} from '../../components/controls/useControlPermissionMatrix'; +import SonixCaptionContainer from '../../subComponents/caption/SonixCaptionContainer'; const VideoCallScreen = () => { useFindActiveSpeaker(); @@ -454,6 +455,7 @@ const VideoCallScreen = () => { /> ) : ( <> + {isCaptionON ? : <>} {isCaptionON ? : <>} { + const {RtcEngineUnsafe} = useRtc(); + const [caption, setCaption] = useState('Listening...'); + const listenerRef = useRef(null); + const [captions, setCaptions] = useState< + Record + >({}); + const {defaultContent, activeUids, customContent} = useContent(); + const localUid = useLocalUid(); + + useEffect(() => { + // Add listener for transcription result + RtcEngineUnsafe.addCustomListener( + 'onSonioxTranscriptionResult', + (uid, transcript) => { + console.log('sonix Captions =>', uid, transcript); + const newFinalTokens: string[] = []; + let newNonFinal = ''; + + for (const token of transcript.tokens || []) { + if (token.is_final) { + newFinalTokens.push(token.text); + } else { + newNonFinal += token.text; + } + } + + setCaptions(prev => { + const prevFinal = prev[uid]?.final || []; + return { + ...prev, + [uid]: { + final: [...prevFinal, ...newFinalTokens], + nonFinal: newNonFinal, + }, + }; + }); + }, + ); + + // Start transcription for the users in the call , later move to start / button + activeUids.map(uid => { + RtcEngineUnsafe.startSonioxTranscription( + uid, + $config.SONIOX_API_KEY, + uid === localUid, + ); + }); + + return () => { + //RtcEngineUnsafe.stopSonioxTranscription(); // move to action menu + }; + }, []); + + return ( + + {Object.entries(captions).map(([uid, {final, nonFinal}]) => ( + + {final.map((word, i) => ( + + {word} + + ))} + {nonFinal} + + ))} + + ); +}; + +export default SonixCaptionContainer; + +const styles = StyleSheet.create({ + container: { + paddingVertical: 4, + paddingHorizontal: 20, + height: CAPTION_CONTAINER_HEIGHT, + justifyContent: 'center', + alignItems: 'center', + backgroundColor: 'grey', + borderRadius: ThemeConfig.BorderRadius.small, + marginTop: $config.ICON_TEXT ? 8 : 0, + }, + captionText: { + fontFamily: ThemeConfig.FontFamily.sansPro, + fontWeight: '400', + color: $config.FONT_COLOR, + fontSize: 24, + textAlign: 'left', + }, +}); From 3a7339f5de2b709f52bc69870b9f5f000b673111 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Mon, 9 Jun 2025 18:48:47 +0530 Subject: [PATCH 02/17] combing audio strems for all users --- template/bridge/rtc/webNg/RtcEngine.ts | 58 ++++++++++++------- .../caption/SonixCaptionContainer.tsx | 10 +--- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index c720b7040..6bb6f4d91 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -277,55 +277,71 @@ export default class RtcEngine { this.customEvents.delete(eventName); } - async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) { + async startSonioxTranscription(apiKey: string) { let stream: MediaStream; - if (isLocal) { - if (!this.localStream.audio) return; - stream = new MediaStream([this.localStream.audio.getMediaStreamTrack()]); - } else { - const remoteAudio = this.remoteStreams.get(uid)?.audio; - if (!remoteAudio) return; - stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); + // combined audio strem for all users + const audioContext = new AudioContext(); + const destination = audioContext.createMediaStreamDestination(); + // Add local stream if available + if (this.localStream?.audio) { + const localSource = audioContext.createMediaStreamSource( + new MediaStream([this.localStream.audio.getMediaStreamTrack()]), + ); + localSource.connect(destination); } + // Add all remote audio tracks + for (const remote of this.remoteStreams.values()) { + if (remote.audio) { + const remoteSource = audioContext.createMediaStreamSource( + new MediaStream([remote.audio.getMediaStreamTrack()]), + ); + remoteSource.connect(destination); + } + } + const combinedStream = destination.stream; + + // if (isLocal) { + // if (!this.localStream.audio) return; + // stream = new MediaStream([this.localStream.audio.getMediaStreamTrack()]); + // } else { + // const remoteAudio = this.remoteStreams.get(uid)?.audio; + // if (!remoteAudio) return; + // stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); + // } const transcriber = new RecordTranscribe({apiKey}); await transcriber.start({ model: 'stt-rt-preview', - stream, + stream: combinedStream, // sampleRate: 48000, // numChannels: 1, enableSpeakerDiarization: true, onPartialResult: results => { const callback = this.customEvents.get('onSonioxTranscriptionResult'); - if (callback) callback(uid, {uid, ...results}); + if (callback) callback(101, results); }, onError: (status, message, code) => { - console.error( - `Soniox Transcription Error (${uid}):`, - status, - message, - code, - ); + console.error(`Soniox Transcription Error:`, status, message, code); }, onStarted: () => { - console.log(` Soniox started transcription session for ${uid}`); + console.log(` Soniox started transcription`); }, onStateChange: ({oldState, newState}) => { - console.log(`Soniox state change (${uid}): ${oldState} → ${newState}`); + console.log(`Soniox state change : ${oldState} → ${newState}`); }, onFinished: () => { - console.log(` Soniox transcription session finished for ${uid}`); + console.log(` Soniox transcription session finished}`); }, }); - this.sonioxTranscribers.set(uid, transcriber); + this.sonioxTranscribers.set(101, transcriber); logger.log( LogSource.AgoraSDK, 'Soniox', - `Started Soniox transcription for ${uid}`, + `Started Soniox transcription started`, ); } diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index f8f42baba..3617beca2 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -46,13 +46,9 @@ const SonixCaptionContainer = () => { ); // Start transcription for the users in the call , later move to start / button - activeUids.map(uid => { - RtcEngineUnsafe.startSonioxTranscription( - uid, - $config.SONIOX_API_KEY, - uid === localUid, - ); - }); + //activeUids.map(uid => { + RtcEngineUnsafe.startSonioxTranscription($config.SONIOX_API_KEY); + // }); return () => { //RtcEngineUnsafe.stopSonioxTranscription(); // move to action menu From c2b86571192ff252c51537405a4029c88cd0ff82 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Mon, 9 Jun 2025 18:52:21 +0530 Subject: [PATCH 03/17] added missing key --- config.json | 3 ++- template/bridge/rtc/webNg/RtcEngine.ts | 9 --------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/config.json b/config.json index 5cafb4c44..f1cd80125 100644 --- a/config.json +++ b/config.json @@ -98,5 +98,6 @@ "ENABLE_SPOTLIGHT": false, "AUTO_CONNECT_RTM": false, "ENABLE_CONVERSATIONAL_AI": false, - "CUSTOMIZE_AGENT": false + "CUSTOMIZE_AGENT": false, + "SONIOX_API_KEY": "5d1b0276e3b1e647485d42e7d1439430dea7459b3248a55f98928c8b1074f1d7" } diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index 6bb6f4d91..1c3fa7795 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -301,15 +301,6 @@ export default class RtcEngine { } const combinedStream = destination.stream; - // if (isLocal) { - // if (!this.localStream.audio) return; - // stream = new MediaStream([this.localStream.audio.getMediaStreamTrack()]); - // } else { - // const remoteAudio = this.remoteStreams.get(uid)?.audio; - // if (!remoteAudio) return; - // stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); - // } - const transcriber = new RecordTranscribe({apiKey}); await transcriber.start({ From ce3e47187200db7be34ae00ed2cd73fd9987a436 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Mon, 9 Jun 2025 23:42:55 +0530 Subject: [PATCH 04/17] lifing state of sonix captins up --- .../subComponents/caption/SonixCaptionContainer.tsx | 13 +++++++------ template/src/subComponents/caption/useCaption.tsx | 9 +++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 3617beca2..e3937a601 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -3,17 +3,18 @@ import {StyleSheet, Text, View} from 'react-native'; import React, {useEffect, useRef, useState} from 'react'; import ThemeConfig from '../../theme'; import {CAPTION_CONTAINER_HEIGHT} from '../../components/CommonStyles'; -import {useRtc, useContent, useLocalUid} from 'customization-api'; +import {useRtc, useContent, useLocalUid, useCaption} from 'customization-api'; const SonixCaptionContainer = () => { const {RtcEngineUnsafe} = useRtc(); const [caption, setCaption] = useState('Listening...'); const listenerRef = useRef(null); - const [captions, setCaptions] = useState< - Record - >({}); + // const [captions, setCaptions] = useState< + // Record + // >({}); const {defaultContent, activeUids, customContent} = useContent(); const localUid = useLocalUid(); + const {sonixCaptions, setSonixCaptions} = useCaption(); useEffect(() => { // Add listener for transcription result @@ -32,7 +33,7 @@ const SonixCaptionContainer = () => { } } - setCaptions(prev => { + setSonixCaptions(prev => { const prevFinal = prev[uid]?.final || []; return { ...prev, @@ -57,7 +58,7 @@ const SonixCaptionContainer = () => { return ( - {Object.entries(captions).map(([uid, {final, nonFinal}]) => ( + {Object.entries(sonixCaptions).map(([uid, {final, nonFinal}]) => ( {final.map((word, i) => ( diff --git a/template/src/subComponents/caption/useCaption.tsx b/template/src/subComponents/caption/useCaption.tsx index a924d4d2c..bfd99fb16 100644 --- a/template/src/subComponents/caption/useCaption.tsx +++ b/template/src/subComponents/caption/useCaption.tsx @@ -50,6 +50,8 @@ export const CaptionContext = React.createContext<{ activeSpeakerRef: React.MutableRefObject; prevSpeakerRef: React.MutableRefObject; + sonixCaptions: Object; + setSonixCaptions: React.Dispatch>; }>({ isCaptionON: false, setIsCaptionON: () => {}, @@ -69,6 +71,8 @@ export const CaptionContext = React.createContext<{ setIsSTTListenerAdded: () => {}, activeSpeakerRef: {current: ''}, prevSpeakerRef: {current: ''}, + sonixCaptions: {}, + setSonixCaptions: () => {}, }); interface CaptionProviderProps { @@ -98,6 +102,9 @@ const CaptionProvider: React.FC = ({ const activeSpeakerRef = React.useRef(''); const prevSpeakerRef = React.useRef(''); + const [sonixCaptions, setSonixCaptions] = React.useState< + Record + >({}); return ( = ({ setIsSTTListenerAdded, activeSpeakerRef, prevSpeakerRef, + sonixCaptions, + setSonixCaptions, }}> {children} From 5bf749b7083b30c5b5d947242cdab5f16dc7e24d Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Wed, 11 Jun 2025 11:52:04 +0530 Subject: [PATCH 05/17] updated for mutliple transcriptions --- template/bridge/rtc/webNg/RtcEngine.ts | 136 +++++++++++---- .../caption/SonixCaptionContainer.tsx | 161 ++++++++++++++---- .../src/subComponents/caption/useCaption.tsx | 14 ++ 3 files changed, 242 insertions(+), 69 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index 1c3fa7795..7d0f79df3 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -277,77 +277,143 @@ export default class RtcEngine { this.customEvents.delete(eventName); } - async startSonioxTranscription(apiKey: string) { - let stream: MediaStream; - - // combined audio strem for all users - const audioContext = new AudioContext(); - const destination = audioContext.createMediaStreamDestination(); - // Add local stream if available - if (this.localStream?.audio) { - const localSource = audioContext.createMediaStreamSource( - new MediaStream([this.localStream.audio.getMediaStreamTrack()]), - ); - localSource.connect(destination); - } - // Add all remote audio tracks - for (const remote of this.remoteStreams.values()) { - if (remote.audio) { - const remoteSource = audioContext.createMediaStreamSource( - new MediaStream([remote.audio.getMediaStreamTrack()]), - ); - remoteSource.connect(destination); + // async startSonioxTranscriptionCombined(apiKey: string) { + // let stream: MediaStream; + + // // combined audio strem for all users + // const audioContext = new AudioContext(); + // const destination = audioContext.createMediaStreamDestination(); + // // Add local stream if available + // if (this.localStream?.audio) { + // const localSource = audioContext.createMediaStreamSource( + // new MediaStream([this.localStream.audio.getMediaStreamTrack()]), + // ); + // localSource.connect(destination); + // } + // // Add all remote audio tracks + // for (const remote of this.remoteStreams.values()) { + // if (remote.audio) { + // const remoteSource = audioContext.createMediaStreamSource( + // new MediaStream([remote.audio.getMediaStreamTrack()]), + // ); + // remoteSource.connect(destination); + // } + // } + // const combinedStream = destination.stream; + + // const transcriber = new RecordTranscribe({apiKey}); + + // await transcriber.start({ + // model: 'stt-rt-preview', + // stream: combinedStream, + // // sampleRate: 48000, + // // numChannels: 1, + // enableSpeakerDiarization: true, + // onPartialResult: results => { + // const callback = this.customEvents.get('onSonioxTranscriptionResult'); + // if (callback) callback(101, results); + // }, + // onError: (status, message, code) => { + // console.error(`Soniox Transcription Error:`, status, message, code); + // }, + // onStarted: () => { + // console.log(` Soniox started transcription`); + // }, + // onStateChange: ({oldState, newState}) => { + // console.log(`Soniox state change : ${oldState} → ${newState}`); + // }, + // onFinished: () => { + // console.log(` Soniox transcription session finished}`); + // }, + // }); + + // this.sonioxTranscribers.set(101, transcriber); + + // logger.log( + // LogSource.AgoraSDK, + // 'Soniox', + // `Started Soniox transcription started`, + // ); + // } + + async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) { + let stream: MediaStream | null = null; + + // Select local or remote stream + if (isLocal) { + if (!this.localStream?.audio) { + console.warn('No local audio stream available'); + return; } + stream = new MediaStream([this.localStream.audio.getMediaStreamTrack()]); + } else { + const remoteAudio = this.remoteStreams.get(uid)?.audio; + if (!remoteAudio) { + console.warn(`No remote audio stream found for UID ${uid}`); + return; + } + stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); } - const combinedStream = destination.stream; + // Create a new transcriber instance const transcriber = new RecordTranscribe({apiKey}); + // Start transcription for the single stream await transcriber.start({ model: 'stt-rt-preview', - stream: combinedStream, + stream, // sampleRate: 48000, // numChannels: 1, - enableSpeakerDiarization: true, + translation: { + type: 'one_way', + language_a: 'en', + language_b: 'hi', + }, onPartialResult: results => { const callback = this.customEvents.get('onSonioxTranscriptionResult'); - if (callback) callback(101, results); + if (callback) callback(uid, {uid, ...results}); }, onError: (status, message, code) => { - console.error(`Soniox Transcription Error:`, status, message, code); + console.error( + `Soniox Transcription Error (${uid}):`, + status, + message, + code, + ); }, onStarted: () => { - console.log(` Soniox started transcription`); + console.log(`Soniox started transcription for UID: ${uid}`); }, onStateChange: ({oldState, newState}) => { - console.log(`Soniox state change : ${oldState} → ${newState}`); + console.log(`Soniox state (${uid}): ${oldState} → ${newState}`); }, onFinished: () => { - console.log(` Soniox transcription session finished}`); + console.log(` Soniox transcription session finished for UID: ${uid}`); }, }); - this.sonioxTranscribers.set(101, transcriber); + // Track this transcriber + this.sonioxTranscribers.set(uid, transcriber); logger.log( LogSource.AgoraSDK, 'Soniox', - `Started Soniox transcription started`, + `Started transcription for ${uid}`, ); } - stopSonioxTranscription(uid: UID): void { - const transcriber = this.sonioxTranscribers.get(uid); - if (transcriber) { + stopSonioxTranscription(): void { + for (const [uid, transcriber] of this.sonioxTranscribers.entries()) { transcriber.stop(); - this.sonioxTranscribers.delete(uid); logger.log( LogSource.AgoraSDK, 'Soniox', - `Stopped Soniox transcription for remote user ${uid}`, + `Stopped Soniox transcription for user ${uid}`, ); } + this.sonioxTranscribers.clear(); } + getLocalVideoStats() { try { logger.log( diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index e3937a601..04db189be 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -1,5 +1,5 @@ // @ts-nocheck -import {StyleSheet, Text, View} from 'react-native'; +import {StyleSheet, Text, View, ScrollView} from 'react-native'; import React, {useEffect, useRef, useState} from 'react'; import ThemeConfig from '../../theme'; import {CAPTION_CONTAINER_HEIGHT} from '../../components/CommonStyles'; @@ -14,61 +14,124 @@ const SonixCaptionContainer = () => { // >({}); const {defaultContent, activeUids, customContent} = useContent(); const localUid = useLocalUid(); - const {sonixCaptions, setSonixCaptions} = useCaption(); + const {sonixCaptions, setSonixCaptions, captionFeed, setCaptionFeed} = + useCaption(); useEffect(() => { // Add listener for transcription result + // RtcEngineUnsafe.addCustomListener( + // 'onSonioxTranscriptionResult', + // (uid, transcript) => { + // console.log('sonix Captions =>', uid, transcript); + // const newFinalTokens: string[] = []; + // let newNonFinal = ''; + + // for (const token of transcript.tokens || []) { + // if (token.is_final) { + // newFinalTokens.push(token.text); + // } else { + // newNonFinal += token.text; + // } + // } + + // setSonixCaptions(prev => { + // const prevFinal = prev[uid]?.final || []; + // return { + // ...prev, + // [uid]: { + // final: [...prevFinal, ...newFinalTokens], + // nonFinal: newNonFinal, + // }, + // }; + // }); + // }, + // ); + RtcEngineUnsafe.addCustomListener( 'onSonioxTranscriptionResult', (uid, transcript) => { - console.log('sonix Captions =>', uid, transcript); - const newFinalTokens: string[] = []; - let newNonFinal = ''; + console.log('sonix transcript =>', transcript); + const finalText = transcript.tokens + .filter(t => t.is_final) + .map(t => t.text) + .join(''); + + const nonFinalText = transcript.tokens + .filter(t => !t.is_final) + .map(t => t.text) + .join(''); - for (const token of transcript.tokens || []) { - if (token.is_final) { - newFinalTokens.push(token.text); + setCaptionFeed(prev => { + const last = prev[prev.length - 1]; + + if (last && last.uid === uid) { + // Update final and nonFinal in same entry + return [ + ...prev.slice(0, -1), + { + ...last, + text: last.text + (finalText ? ' ' + finalText : ''), + nonFinal: nonFinalText, + time: Date.now(), + }, + ]; } else { - newNonFinal += token.text; + return [ + ...prev, + { + uid, + text: finalText, + nonFinal: nonFinalText, + time: Date.now(), + }, + ]; } - } - - setSonixCaptions(prev => { - const prevFinal = prev[uid]?.final || []; - return { - ...prev, - [uid]: { - final: [...prevFinal, ...newFinalTokens], - nonFinal: newNonFinal, - }, - }; }); }, ); // Start transcription for the users in the call , later move to start / button - //activeUids.map(uid => { - RtcEngineUnsafe.startSonioxTranscription($config.SONIOX_API_KEY); - // }); + activeUids.map(uid => { + RtcEngineUnsafe.startSonioxTranscription( + uid, + $config.SONIOX_API_KEY, + uid === localUid, + ); + }); return () => { - //RtcEngineUnsafe.stopSonioxTranscription(); // move to action menu + RtcEngineUnsafe.stopSonioxTranscription(); // move to action menu }; }, []); return ( - - {Object.entries(sonixCaptions).map(([uid, {final, nonFinal}]) => ( - - {final.map((word, i) => ( - - {word} - - ))} - {nonFinal} + // + // {Object.entries(sonixCaptions).map(([uid, {final, nonFinal}]) => ( + // + // {final.map((word, i) => ( + // + // {word} + // + // ))} + // {nonFinal} + // + // ))} + // + + {captionFeed.map((entry, index) => ( + + + {entry.nonFinal || entry.text + ? defaultContent[entry.uid].name + ' : ' + : ''} + + {entry.text} + {entry.nonFinal ? ( + {entry.nonFinal} + ) : null} ))} - + ); }; @@ -84,12 +147,42 @@ const styles = StyleSheet.create({ backgroundColor: 'grey', borderRadius: ThemeConfig.BorderRadius.small, marginTop: $config.ICON_TEXT ? 8 : 0, + width: '100%', }, captionText: { fontFamily: ThemeConfig.FontFamily.sansPro, fontWeight: '400', color: $config.FONT_COLOR, fontSize: 24, + flexDirection: 'row', + flexWrap: 'nowrap', textAlign: 'left', + whiteSpace: 'nowrap', + }, + captionLine: { + flexDirection: 'row', + flexWrap: 'wrap', + marginBottom: 4, + width: '100%', + }, + uid: { + color: 'orange', + fontWeight: 'bold', + fontSize: 18, + fontStyle: 'italic', + }, + content: { + flexShrink: 1, + fontFamily: ThemeConfig.FontFamily.sansPro, + fontWeight: '400', + color: $config.FONT_COLOR, + fontSize: 20, + flexWrap: 'wrap', + }, + live: { + color: 'skyblue', + fontSize: 20, + flexWrap: 'wrap', + flexShrink: 1, }, }); diff --git a/template/src/subComponents/caption/useCaption.tsx b/template/src/subComponents/caption/useCaption.tsx index bfd99fb16..70602f845 100644 --- a/template/src/subComponents/caption/useCaption.tsx +++ b/template/src/subComponents/caption/useCaption.tsx @@ -52,6 +52,8 @@ export const CaptionContext = React.createContext<{ prevSpeakerRef: React.MutableRefObject; sonixCaptions: Object; setSonixCaptions: React.Dispatch>; + captionFeed: Object; + setCaptionFeed: React.Dispatch>; }>({ isCaptionON: false, setIsCaptionON: () => {}, @@ -73,6 +75,8 @@ export const CaptionContext = React.createContext<{ prevSpeakerRef: {current: ''}, sonixCaptions: {}, setSonixCaptions: () => {}, + captionFeed: {}, + setCaptionFeed: () => {}, }); interface CaptionProviderProps { @@ -105,6 +109,14 @@ const CaptionProvider: React.FC = ({ const [sonixCaptions, setSonixCaptions] = React.useState< Record >({}); + const [captionFeed, setCaptionFeed] = React.useState< + { + uid: string; + text: string; // finalized text + nonFinal?: string; // optional, shows live tokens + time: number; + }[] + >([]); return ( = ({ prevSpeakerRef, sonixCaptions, setSonixCaptions, + captionFeed, + setCaptionFeed, }}> {children} From f0b056bcb31ca027672de58cbe03ec50e0de8b96 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Wed, 11 Jun 2025 14:25:21 +0530 Subject: [PATCH 06/17] temp hardcoded --- template/bridge/rtc/webNg/RtcEngine.ts | 198 +++++++++++++++--- .../caption/SonixCaptionContainer.tsx | 31 ++- 2 files changed, 187 insertions(+), 42 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index 7d0f79df3..f139270cc 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -336,64 +336,183 @@ export default class RtcEngine { // ); // } - async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) { - let stream: MediaStream | null = null; - - // Select local or remote stream - if (isLocal) { - if (!this.localStream?.audio) { - console.warn('No local audio stream available'); - return; - } - stream = new MediaStream([this.localStream.audio.getMediaStreamTrack()]); - } else { - const remoteAudio = this.remoteStreams.get(uid)?.audio; - if (!remoteAudio) { - console.warn(`No remote audio stream found for UID ${uid}`); - return; - } - stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); - } + async tempStartTranscribe(localUID, remoteUID) { + console.log( + `start transcribe for local uid ${localUID} and remote uid ${remoteUID}`, + ); + const localAudioStream = new MediaStream([ + this.localStream.audio.getMediaStreamTrack(), + ]); - // Create a new transcriber instance - const transcriber = new RecordTranscribe({apiKey}); + const LocalTranscriber = new RecordTranscribe({ + apiKey: $config.SONIOX_API_KEY, + }); - // Start transcription for the single stream - await transcriber.start({ + LocalTranscriber.start({ model: 'stt-rt-preview', - stream, + stream: localAudioStream, + enableSpeakerDiarization: true, + languageHints: ['en', 'hi'], // sampleRate: 48000, // numChannels: 1, - translation: { - type: 'one_way', - language_a: 'en', - language_b: 'hi', - }, + // translation: { + // type: 'one_way', + // language_a: 'en', + // language_b: 'hi', + // }, onPartialResult: results => { const callback = this.customEvents.get('onSonioxTranscriptionResult'); - if (callback) callback(uid, {uid, ...results}); + console.log( + `Sonix results local ${localUID} stream callback ${results}`, + ); + if (callback) callback(localUID, {localUID, ...results}); }, onError: (status, message, code) => { console.error( - `Soniox Transcription Error (${uid}):`, + `Soniox Transcription Error (${localUID}):`, status, message, code, ); }, onStarted: () => { - console.log(`Soniox started transcription for UID: ${uid}`); + console.log(`Soniox started transcription for UID: ${localUID}`); }, onStateChange: ({oldState, newState}) => { - console.log(`Soniox state (${uid}): ${oldState} → ${newState}`); + console.log(`Soniox state (${localUID}): ${oldState} → ${newState}`); }, onFinished: () => { - console.log(` Soniox transcription session finished for UID: ${uid}`); + console.log( + ` Soniox transcription session finished for UID: ${localUID}`, + ); }, }); + this.sonioxTranscribers.set(localUID, LocalTranscriber); + + if (remoteUID) { + const remoteAudioStream = new MediaStream([ + this.remoteStreams.get(remoteUID)?.audio.getMediaStreamTrack(), + ]); + const RemoteTranscriber = new RecordTranscribe({ + apiKey: $config.SONIOX_API_KEY, + }); + RemoteTranscriber.start({ + model: 'stt-rt-preview', + stream: remoteAudioStream, + enableSpeakerDiarization: true, + languageHints: ['en', 'hi'], + // sampleRate: 48000, + // numChannels: 1, + // translation: { + // type: 'one_way', + // language_a: 'en', + // language_b: 'hi', + // }, + onPartialResult: results => { + const callback = this.customEvents.get('onSonioxTranscriptionResult'); + console.log( + `Sonix results remote ${remoteUID} stream callback ${results}`, + ); + if (callback) callback(remoteUID, {remoteUID, ...results}); + }, + onError: (status, message, code) => { + console.error( + `Soniox Transcription Error (${remoteUID}):`, + status, + message, + code, + ); + }, + onStarted: () => { + console.log(`Soniox started transcription for UID: ${remoteUID}`); + }, + onStateChange: ({oldState, newState}) => { + console.log(`Soniox state (${remoteUID}): ${oldState} → ${newState}`); + }, + onFinished: () => { + console.log( + ` Soniox transcription session finished for UID: ${remoteUID}`, + ); + }, + }); + + this.sonioxTranscribers.set(remoteUID, RemoteTranscriber); + } + } + + async startTranscripe() { + for (const [ + uid, + {stream, transcriber}, + ] of this.sonioxTranscribers.entries()) { + console.log('start trascribe for uid', uid); + await transcriber.start({ + model: 'stt-rt-preview', + stream, + // sampleRate: 48000, + // numChannels: 1, + translation: { + type: 'one_way', + language_a: 'en', + language_b: 'hi', + }, + onPartialResult: results => { + const callback = this.customEvents.get('onSonioxTranscriptionResult'); + if (callback) callback(uid, {uid, ...results}); + }, + onError: (status, message, code) => { + console.error( + `Soniox Transcription Error (${uid}):`, + status, + message, + code, + ); + }, + onStarted: () => { + console.log(`Soniox started transcription for UID: ${uid}`); + }, + onStateChange: ({oldState, newState}) => { + console.log(`Soniox state (${uid}): ${oldState} → ${newState}`); + }, + onFinished: () => { + console.log(` Soniox transcription session finished for UID: ${uid}`); + }, + }); + } + } + + async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) { + let stream: MediaStream | null = null; + + // Select local or remote stream + if (isLocal) { + if (!this.localStream?.audio) { + console.log('No local audio stream available', uid); + // return; + } else { + stream = new MediaStream([ + this.localStream.audio.getMediaStreamTrack(), + ]); + } + } else { + const remoteAudio = this.remoteStreams.get(uid)?.audio; + if (!remoteAudio) { + console.warn(`No remote audio stream found for UID ${uid}`); + // return; + } else { + stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); + } + } + + // Create a new transcriber instance + const transcriber = new RecordTranscribe({apiKey}); + + // Start transcription for the single stream + // Track this transcriber - this.sonioxTranscribers.set(uid, transcriber); + this.sonioxTranscribers.set(uid, {transcriber, stream}); + console.log('all transcriper', this.sonioxTranscribers); logger.log( LogSource.AgoraSDK, @@ -414,6 +533,17 @@ export default class RtcEngine { this.sonioxTranscribers.clear(); } + stopTempSonixTranscription(localUid, RemoteUid): void { + this?.localTranscriber?.stop(); + this?.remoteTranscriber?.stop(); + + logger.log( + LogSource.AgoraSDK, + 'Soniox', + `Stopped Soniox transcription for userS L-${localUid} , R-${RemoteUid}`, + ); + } + getLocalVideoStats() { try { logger.log( diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 04db189be..f443e720f 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -91,16 +91,31 @@ const SonixCaptionContainer = () => { ); // Start transcription for the users in the call , later move to start / button - activeUids.map(uid => { - RtcEngineUnsafe.startSonioxTranscription( - uid, - $config.SONIOX_API_KEY, - uid === localUid, - ); - }); + // activeUids.map(uid => { + // RtcEngineUnsafe.startSonioxTranscription( + // uid, + // $config.SONIOX_API_KEY, + // uid === localUid, + // ); + // }); + + // for (let i = 0; i < activeUids.length; i++) { + // const uid = activeUids[i]; + // console.log('actuve uid-localuid', uid, localUid); + // RtcEngineUnsafe.startSonioxTranscription( + // uid, + // $config.SONIOX_API_KEY, + // uid === localUid, + // ); + // } + + // RtcEngineUnsafe.startTranscripe(); + const remoteUid = activeUids.find(uid => uid !== localUid); + RtcEngineUnsafe.tempStartTranscribe(localUid, remoteUid); return () => { - RtcEngineUnsafe.stopSonioxTranscription(); // move to action menu + // RtcEngineUnsafe.stopSonioxTranscription(); // move to action menu + RtcEngineUnsafe.stopTempSonixTranscription(localUid, remoteUid); }; }, []); From ba39beb094adfe2330dd2102aebdbb3c1f3fd299 Mon Sep 17 00:00:00 2001 From: HariharanIT Date: Wed, 11 Jun 2025 19:08:04 +0530 Subject: [PATCH 07/17] Upgrade the plugin "@soniox/speech-to-text-web": "^1.1.4", --- template/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/template/package.json b/template/package.json index 1ec548c9f..9099b9d72 100644 --- a/template/package.json +++ b/template/package.json @@ -56,7 +56,7 @@ "@react-native-async-storage/async-storage": "1.19.2", "@react-native-community/checkbox": "0.5.16", "@react-native-community/clipboard": "1.5.1", - "@soniox/speech-to-text-web": "^1.1.2", + "@soniox/speech-to-text-web": "^1.1.4", "@splinetool/runtime": "^1.9.69", "@supersami/rn-foreground-service": "^1.1.1", "add": "^2.0.6", From 52507f76a788a5c161b1b3e75d41ba5cff024ee1 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Thu, 12 Jun 2025 00:20:07 +0530 Subject: [PATCH 08/17] code cleanup and layout --- template/bridge/rtc/webNg/RtcEngine.ts | 264 +++--------------- .../caption/SonixCaptionContainer.tsx | 155 ++++------ 2 files changed, 84 insertions(+), 335 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index f139270cc..f44d41209 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -277,273 +277,77 @@ export default class RtcEngine { this.customEvents.delete(eventName); } - // async startSonioxTranscriptionCombined(apiKey: string) { - // let stream: MediaStream; - - // // combined audio strem for all users - // const audioContext = new AudioContext(); - // const destination = audioContext.createMediaStreamDestination(); - // // Add local stream if available - // if (this.localStream?.audio) { - // const localSource = audioContext.createMediaStreamSource( - // new MediaStream([this.localStream.audio.getMediaStreamTrack()]), - // ); - // localSource.connect(destination); - // } - // // Add all remote audio tracks - // for (const remote of this.remoteStreams.values()) { - // if (remote.audio) { - // const remoteSource = audioContext.createMediaStreamSource( - // new MediaStream([remote.audio.getMediaStreamTrack()]), - // ); - // remoteSource.connect(destination); - // } - // } - // const combinedStream = destination.stream; - - // const transcriber = new RecordTranscribe({apiKey}); - - // await transcriber.start({ - // model: 'stt-rt-preview', - // stream: combinedStream, - // // sampleRate: 48000, - // // numChannels: 1, - // enableSpeakerDiarization: true, - // onPartialResult: results => { - // const callback = this.customEvents.get('onSonioxTranscriptionResult'); - // if (callback) callback(101, results); - // }, - // onError: (status, message, code) => { - // console.error(`Soniox Transcription Error:`, status, message, code); - // }, - // onStarted: () => { - // console.log(` Soniox started transcription`); - // }, - // onStateChange: ({oldState, newState}) => { - // console.log(`Soniox state change : ${oldState} → ${newState}`); - // }, - // onFinished: () => { - // console.log(` Soniox transcription session finished}`); - // }, - // }); - - // this.sonioxTranscribers.set(101, transcriber); - - // logger.log( - // LogSource.AgoraSDK, - // 'Soniox', - // `Started Soniox transcription started`, - // ); - // } + async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) { + let stream: MediaStream | null = null; - async tempStartTranscribe(localUID, remoteUID) { - console.log( - `start transcribe for local uid ${localUID} and remote uid ${remoteUID}`, - ); - const localAudioStream = new MediaStream([ - this.localStream.audio.getMediaStreamTrack(), - ]); + // Select local or remote stream + if (isLocal) { + if (!this.localStream?.audio) { + console.log('No local audio stream available', uid); + return; + } else { + stream = new MediaStream([ + this.localStream.audio.getMediaStreamTrack(), + ]); + } + } else { + const remoteAudio = this.remoteStreams.get(uid)?.audio; + if (!remoteAudio) { + console.warn(`No remote audio stream found for UID ${uid}`); + return; + } else { + stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); + } + } - const LocalTranscriber = new RecordTranscribe({ - apiKey: $config.SONIOX_API_KEY, - }); + // Create a new transcriber instance + const transcriber = new RecordTranscribe({apiKey}); - LocalTranscriber.start({ + // Start transcription for the single stream + await transcriber.start({ model: 'stt-rt-preview', - stream: localAudioStream, - enableSpeakerDiarization: true, + stream, languageHints: ['en', 'hi'], - // sampleRate: 48000, - // numChannels: 1, // translation: { // type: 'one_way', - // language_a: 'en', - // language_b: 'hi', + // source_languages: ['en'], + // target_language: 'hi', // }, onPartialResult: results => { const callback = this.customEvents.get('onSonioxTranscriptionResult'); - console.log( - `Sonix results local ${localUID} stream callback ${results}`, - ); - if (callback) callback(localUID, {localUID, ...results}); + if (callback) callback(uid, {uid, ...results}); }, onError: (status, message, code) => { console.error( - `Soniox Transcription Error (${localUID}):`, + `Soniox Transcription Error (${uid}):`, status, message, code, ); }, onStarted: () => { - console.log(`Soniox started transcription for UID: ${localUID}`); + console.log(`Soniox started transcription for UID: ${uid}`); }, onStateChange: ({oldState, newState}) => { - console.log(`Soniox state (${localUID}): ${oldState} → ${newState}`); + console.log(`Soniox state (${uid}): ${oldState} → ${newState}`); }, onFinished: () => { - console.log( - ` Soniox transcription session finished for UID: ${localUID}`, - ); + console.log(` Soniox transcription session finished for UID: ${uid}`); }, }); - this.sonioxTranscribers.set(localUID, LocalTranscriber); - - if (remoteUID) { - const remoteAudioStream = new MediaStream([ - this.remoteStreams.get(remoteUID)?.audio.getMediaStreamTrack(), - ]); - const RemoteTranscriber = new RecordTranscribe({ - apiKey: $config.SONIOX_API_KEY, - }); - RemoteTranscriber.start({ - model: 'stt-rt-preview', - stream: remoteAudioStream, - enableSpeakerDiarization: true, - languageHints: ['en', 'hi'], - // sampleRate: 48000, - // numChannels: 1, - // translation: { - // type: 'one_way', - // language_a: 'en', - // language_b: 'hi', - // }, - onPartialResult: results => { - const callback = this.customEvents.get('onSonioxTranscriptionResult'); - console.log( - `Sonix results remote ${remoteUID} stream callback ${results}`, - ); - if (callback) callback(remoteUID, {remoteUID, ...results}); - }, - onError: (status, message, code) => { - console.error( - `Soniox Transcription Error (${remoteUID}):`, - status, - message, - code, - ); - }, - onStarted: () => { - console.log(`Soniox started transcription for UID: ${remoteUID}`); - }, - onStateChange: ({oldState, newState}) => { - console.log(`Soniox state (${remoteUID}): ${oldState} → ${newState}`); - }, - onFinished: () => { - console.log( - ` Soniox transcription session finished for UID: ${remoteUID}`, - ); - }, - }); - - this.sonioxTranscribers.set(remoteUID, RemoteTranscriber); - } - } - - async startTranscripe() { - for (const [ - uid, - {stream, transcriber}, - ] of this.sonioxTranscribers.entries()) { - console.log('start trascribe for uid', uid); - await transcriber.start({ - model: 'stt-rt-preview', - stream, - // sampleRate: 48000, - // numChannels: 1, - translation: { - type: 'one_way', - language_a: 'en', - language_b: 'hi', - }, - onPartialResult: results => { - const callback = this.customEvents.get('onSonioxTranscriptionResult'); - if (callback) callback(uid, {uid, ...results}); - }, - onError: (status, message, code) => { - console.error( - `Soniox Transcription Error (${uid}):`, - status, - message, - code, - ); - }, - onStarted: () => { - console.log(`Soniox started transcription for UID: ${uid}`); - }, - onStateChange: ({oldState, newState}) => { - console.log(`Soniox state (${uid}): ${oldState} → ${newState}`); - }, - onFinished: () => { - console.log(` Soniox transcription session finished for UID: ${uid}`); - }, - }); - } - } - - async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) { - let stream: MediaStream | null = null; - - // Select local or remote stream - if (isLocal) { - if (!this.localStream?.audio) { - console.log('No local audio stream available', uid); - // return; - } else { - stream = new MediaStream([ - this.localStream.audio.getMediaStreamTrack(), - ]); - } - } else { - const remoteAudio = this.remoteStreams.get(uid)?.audio; - if (!remoteAudio) { - console.warn(`No remote audio stream found for UID ${uid}`); - // return; - } else { - stream = new MediaStream([remoteAudio.getMediaStreamTrack()]); - } - } - - // Create a new transcriber instance - const transcriber = new RecordTranscribe({apiKey}); - - // Start transcription for the single stream - // Track this transcriber this.sonioxTranscribers.set(uid, {transcriber, stream}); - console.log('all transcriper', this.sonioxTranscribers); - - logger.log( - LogSource.AgoraSDK, - 'Soniox', - `Started transcription for ${uid}`, - ); } stopSonioxTranscription(): void { for (const [uid, transcriber] of this.sonioxTranscribers.entries()) { transcriber.stop(); - logger.log( - LogSource.AgoraSDK, - 'Soniox', - `Stopped Soniox transcription for user ${uid}`, - ); + console.log(` Stopped Soniox transcription for user UID: ${uid}`); } this.sonioxTranscribers.clear(); } - stopTempSonixTranscription(localUid, RemoteUid): void { - this?.localTranscriber?.stop(); - this?.remoteTranscriber?.stop(); - - logger.log( - LogSource.AgoraSDK, - 'Soniox', - `Stopped Soniox transcription for userS L-${localUid} , R-${RemoteUid}`, - ); - } - getLocalVideoStats() { try { logger.log( diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index f443e720f..f35467b6a 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -7,50 +7,17 @@ import {useRtc, useContent, useLocalUid, useCaption} from 'customization-api'; const SonixCaptionContainer = () => { const {RtcEngineUnsafe} = useRtc(); - const [caption, setCaption] = useState('Listening...'); - const listenerRef = useRef(null); - // const [captions, setCaptions] = useState< - // Record - // >({}); const {defaultContent, activeUids, customContent} = useContent(); const localUid = useLocalUid(); - const {sonixCaptions, setSonixCaptions, captionFeed, setCaptionFeed} = - useCaption(); + const {captionFeed, setCaptionFeed} = useCaption(); + const scrollRef = React.useRef(null); useEffect(() => { // Add listener for transcription result - // RtcEngineUnsafe.addCustomListener( - // 'onSonioxTranscriptionResult', - // (uid, transcript) => { - // console.log('sonix Captions =>', uid, transcript); - // const newFinalTokens: string[] = []; - // let newNonFinal = ''; - - // for (const token of transcript.tokens || []) { - // if (token.is_final) { - // newFinalTokens.push(token.text); - // } else { - // newNonFinal += token.text; - // } - // } - - // setSonixCaptions(prev => { - // const prevFinal = prev[uid]?.final || []; - // return { - // ...prev, - // [uid]: { - // final: [...prevFinal, ...newFinalTokens], - // nonFinal: newNonFinal, - // }, - // }; - // }); - // }, - // ); - RtcEngineUnsafe.addCustomListener( 'onSonioxTranscriptionResult', (uid, transcript) => { - console.log('sonix transcript =>', transcript); + console.log('sonix transcript =>', uid, transcript); const finalText = transcript.tokens .filter(t => t.is_final) .map(t => t.text) @@ -64,8 +31,13 @@ const SonixCaptionContainer = () => { setCaptionFeed(prev => { const last = prev[prev.length - 1]; + // Skip if there's nothing new to add + if (!finalText && !nonFinalText) { + return prev; + } + + // If same speaker, merge into last line if (last && last.uid === uid) { - // Update final and nonFinal in same entry return [ ...prev.slice(0, -1), { @@ -75,7 +47,10 @@ const SonixCaptionContainer = () => { time: Date.now(), }, ]; - } else { + } + + // If speaker changes OR no previous entry + if (finalText || nonFinalText) { return [ ...prev, { @@ -86,64 +61,44 @@ const SonixCaptionContainer = () => { }, ]; } + + return prev; }); }, ); // Start transcription for the users in the call , later move to start / button - // activeUids.map(uid => { - // RtcEngineUnsafe.startSonioxTranscription( - // uid, - // $config.SONIOX_API_KEY, - // uid === localUid, - // ); - // }); - - // for (let i = 0; i < activeUids.length; i++) { - // const uid = activeUids[i]; - // console.log('actuve uid-localuid', uid, localUid); - // RtcEngineUnsafe.startSonioxTranscription( - // uid, - // $config.SONIOX_API_KEY, - // uid === localUid, - // ); - // } - - // RtcEngineUnsafe.startTranscripe(); - const remoteUid = activeUids.find(uid => uid !== localUid); - RtcEngineUnsafe.tempStartTranscribe(localUid, remoteUid); + activeUids.map(uid => { + RtcEngineUnsafe.startSonioxTranscription( + uid, + $config.SONIOX_API_KEY, + uid === localUid, + ); + }); return () => { - // RtcEngineUnsafe.stopSonioxTranscription(); // move to action menu - RtcEngineUnsafe.stopTempSonixTranscription(localUid, remoteUid); + RtcEngineUnsafe.stopSonioxTranscription(); }; }, []); return ( - // - // {Object.entries(sonixCaptions).map(([uid, {final, nonFinal}]) => ( - // - // {final.map((word, i) => ( - // - // {word} - // - // ))} - // {nonFinal} - // - // ))} - // - + { + scrollRef.current?.scrollToEnd({animated: true}); + }}> {captionFeed.map((entry, index) => ( {entry.nonFinal || entry.text ? defaultContent[entry.uid].name + ' : ' - : ''} + : ''}{' '} {entry.text} - {entry.nonFinal ? ( - {entry.nonFinal} - ) : null} + {entry.nonFinal && {entry.nonFinal}} ))} @@ -153,51 +108,41 @@ const SonixCaptionContainer = () => { export default SonixCaptionContainer; const styles = StyleSheet.create({ - container: { - paddingVertical: 4, - paddingHorizontal: 20, + scrollContainer: { + maxHeight: CAPTION_CONTAINER_HEIGHT, height: CAPTION_CONTAINER_HEIGHT, - justifyContent: 'center', - alignItems: 'center', - backgroundColor: 'grey', + backgroundColor: '#815f46', borderRadius: ThemeConfig.BorderRadius.small, marginTop: $config.ICON_TEXT ? 8 : 0, - width: '100%', + overflowY: 'scroll', }, - captionText: { - fontFamily: ThemeConfig.FontFamily.sansPro, - fontWeight: '400', - color: $config.FONT_COLOR, - fontSize: 24, - flexDirection: 'row', - flexWrap: 'nowrap', - textAlign: 'left', - whiteSpace: 'nowrap', + container: { + padding: 12, + flexGrow: 1, }, captionLine: { flexDirection: 'row', flexWrap: 'wrap', marginBottom: 4, - width: '100%', + flexShrink: 1, + lineHeight: 24, }, uid: { color: 'orange', fontWeight: 'bold', + fontSize: 18, - fontStyle: 'italic', + lineHeight: 24, }, content: { - flexShrink: 1, - fontFamily: ThemeConfig.FontFamily.sansPro, - fontWeight: '400', - color: $config.FONT_COLOR, - fontSize: 20, - flexWrap: 'wrap', + color: 'white', + fontSize: 18, + flexShrink: 1, // test + lineHeight: 24, }, live: { color: 'skyblue', - fontSize: 20, - flexWrap: 'wrap', - flexShrink: 1, + fontSize: 18, + lineHeight: 24, }, }); From 160789efab73e982e3db6aed05c93ffa67ea6908 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Thu, 12 Jun 2025 00:26:26 +0530 Subject: [PATCH 09/17] fixed stop transcription --- template/bridge/rtc/webNg/RtcEngine.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index f44d41209..9a84e500f 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -337,7 +337,7 @@ export default class RtcEngine { }); // Track this transcriber - this.sonioxTranscribers.set(uid, {transcriber, stream}); + this.sonioxTranscribers.set(uid, transcriber); } stopSonioxTranscription(): void { From b8f4de5f4bd56f503a585f09d48f36d18cf6ab3c Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Thu, 12 Jun 2025 09:48:36 +0530 Subject: [PATCH 10/17] handle local mute transcript & transate config --- template/bridge/rtc/webNg/RtcEngine.ts | 22 ++++++++++++++----- .../caption/SonixCaptionContainer.tsx | 4 ++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index 9a84e500f..ae70059fa 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -268,6 +268,7 @@ export default class RtcEngine { this.appId = appId; this.sonioxTranscribers = new Map(); this.customEvents = new Map(); + this.localUserId = null; } addCustomListener(eventName: string, callback: (...args: any[]) => void) { this.customEvents.set(eventName, callback); @@ -282,6 +283,7 @@ export default class RtcEngine { // Select local or remote stream if (isLocal) { + this.localUserId = uid; if (!this.localStream?.audio) { console.log('No local audio stream available', uid); return; @@ -307,12 +309,12 @@ export default class RtcEngine { await transcriber.start({ model: 'stt-rt-preview', stream, - languageHints: ['en', 'hi'], - // translation: { - // type: 'one_way', - // source_languages: ['en'], - // target_language: 'hi', - // }, + languageHints: ['en'], + translation: { + type: 'one_way', + source_languages: ['en'], + target_language: 'hi', + }, onPartialResult: results => { const callback = this.customEvents.get('onSonioxTranscriptionResult'); if (callback) callback(uid, {uid, ...results}); @@ -1108,6 +1110,14 @@ export default class RtcEngine { // Release the lock once done this.muteLocalAudioMutex = false; this.isAudioEnabled = !muted; + + // Stop/ Start Local Transcriber on local mute/unmute + const transcriber = this.sonioxTranscribers.get(this.localUserId); + if (muted) { + await transcriber.stop(); + } else { + await transcriber.start(transcriber._audioOptions); + } // Unpublish only after when the user has joined the call if (!muted && !this.isAudioPublished && this.isJoined) { logger.log( diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index f35467b6a..4e575a130 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -19,12 +19,12 @@ const SonixCaptionContainer = () => { (uid, transcript) => { console.log('sonix transcript =>', uid, transcript); const finalText = transcript.tokens - .filter(t => t.is_final) + .filter(t => t.is_final && (!t.language || t.language === 'hi')) .map(t => t.text) .join(''); const nonFinalText = transcript.tokens - .filter(t => !t.is_final) + .filter(t => !t.is_final && (!t.language || t.language === 'hi')) .map(t => t.text) .join(''); From 8b9eff1ac4c3bff155ce5c82c571fecd158cfd3f Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Thu, 12 Jun 2025 11:18:13 +0530 Subject: [PATCH 11/17] sample rate added --- template/bridge/rtc/webNg/RtcEngine.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index ae70059fa..4b3d74a26 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -309,7 +309,9 @@ export default class RtcEngine { await transcriber.start({ model: 'stt-rt-preview', stream, - languageHints: ['en'], + languageHints: ['en', 'hi'], + sampleRate: 48000, + numChannels: 1, translation: { type: 'one_way', source_languages: ['en'], From a2bc289f3017e903e6903ef825fb4a005f132299 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Thu, 12 Jun 2025 12:33:32 +0530 Subject: [PATCH 12/17] remove translation --- template/bridge/rtc/webNg/RtcEngine.ts | 12 ++++++------ .../subComponents/caption/SonixCaptionContainer.tsx | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index 4b3d74a26..c689161da 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -309,14 +309,14 @@ export default class RtcEngine { await transcriber.start({ model: 'stt-rt-preview', stream, - languageHints: ['en', 'hi'], + languageHints: ['en'], sampleRate: 48000, numChannels: 1, - translation: { - type: 'one_way', - source_languages: ['en'], - target_language: 'hi', - }, + // translation: { + // type: 'one_way', + // source_languages: ['en'], + // target_language: 'hi', + // }, onPartialResult: results => { const callback = this.customEvents.get('onSonioxTranscriptionResult'); if (callback) callback(uid, {uid, ...results}); diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 4e575a130..07b6f9c1e 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -19,12 +19,12 @@ const SonixCaptionContainer = () => { (uid, transcript) => { console.log('sonix transcript =>', uid, transcript); const finalText = transcript.tokens - .filter(t => t.is_final && (!t.language || t.language === 'hi')) + .filter(t => t.is_final) // && (!t.language || t.language === 'hi')) .map(t => t.text) .join(''); const nonFinalText = transcript.tokens - .filter(t => !t.is_final && (!t.language || t.language === 'hi')) + .filter(t => !t.is_final) // && (!t.language || t.language === 'hi')) .map(t => t.text) .join(''); From d5f05a8c5a043aee16374e2c911ebae79edbfc98 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Mon, 16 Jun 2025 16:45:53 +0530 Subject: [PATCH 13/17] handle user join --- template/bridge/rtc/webNg/RtcEngine.ts | 10 ++++++++++ .../subComponents/caption/SonixCaptionContainer.tsx | 9 ++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index c689161da..81033bfec 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -241,6 +241,7 @@ export default class RtcEngine { public localStream: LocalStream = {}; public screenStream: ScreenStream = {}; public remoteStreams = new Map(); + public isSonioxPanelOpen = false; private inScreenshare: Boolean = false; private videoProfile: | VideoEncoderConfigurationPreset @@ -260,6 +261,7 @@ export default class RtcEngine { private muteLocalAudioMutex = false; private speakerDeviceId = ''; private usersVolumeLevel = []; + // Create channel profile and set it here initialize(context: RtcEngineContext) { @@ -861,6 +863,14 @@ export default class RtcEngine { 0, 0, ); + // Only start transcriber if panel is open & not already started + if (this.isSonioxPanelOpen && !this.sonioxTranscribers.has(user.uid)) { + this.startSonioxTranscription( + user.uid, + $config.SONIOX_API_KEY, + false, + ); + } } else { const videoTrack = user.videoTrack; // Play the video diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 07b6f9c1e..22040bef5 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -11,10 +11,12 @@ const SonixCaptionContainer = () => { const localUid = useLocalUid(); const {captionFeed, setCaptionFeed} = useCaption(); const scrollRef = React.useRef(null); + const engine = RtcEngineUnsafe; useEffect(() => { + engine.isSonioxPanelOpen = true; // Add listener for transcription result - RtcEngineUnsafe.addCustomListener( + engine.addCustomListener( 'onSonioxTranscriptionResult', (uid, transcript) => { console.log('sonix transcript =>', uid, transcript); @@ -69,7 +71,7 @@ const SonixCaptionContainer = () => { // Start transcription for the users in the call , later move to start / button activeUids.map(uid => { - RtcEngineUnsafe.startSonioxTranscription( + engine.startSonioxTranscription( uid, $config.SONIOX_API_KEY, uid === localUid, @@ -77,7 +79,8 @@ const SonixCaptionContainer = () => { }); return () => { - RtcEngineUnsafe.stopSonioxTranscription(); + engine.isSonioxPanelOpen = false; + engine.stopSonioxTranscription(); }; }, []); From 34f3440cbd0e0392c610c3f3f9ea3e46434a95d4 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Tue, 17 Jun 2025 11:05:19 +0530 Subject: [PATCH 14/17] add time stamp and lang detect false --- template/bridge/rtc/webNg/RtcEngine.ts | 2 ++ .../caption/SonixCaptionContainer.tsx | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/template/bridge/rtc/webNg/RtcEngine.ts b/template/bridge/rtc/webNg/RtcEngine.ts index 81033bfec..e45864429 100644 --- a/template/bridge/rtc/webNg/RtcEngine.ts +++ b/template/bridge/rtc/webNg/RtcEngine.ts @@ -314,6 +314,8 @@ export default class RtcEngine { languageHints: ['en'], sampleRate: 48000, numChannels: 1, + enableLanguageIdentification: false, + enableEndpointDetection: false, // translation: { // type: 'one_way', // source_languages: ['en'], diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 22040bef5..7b2fb30a2 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -5,6 +5,15 @@ import ThemeConfig from '../../theme'; import {CAPTION_CONTAINER_HEIGHT} from '../../components/CommonStyles'; import {useRtc, useContent, useLocalUid, useCaption} from 'customization-api'; +const formatTime = (timestamp: number) => { + const date = new Date(timestamp); + return date.toLocaleTimeString([], { + hour: '2-digit', + minute: '2-digit', + hour12: true, + }); +}; + const SonixCaptionContainer = () => { const {RtcEngineUnsafe} = useRtc(); const {defaultContent, activeUids, customContent} = useContent(); @@ -39,14 +48,14 @@ const SonixCaptionContainer = () => { } // If same speaker, merge into last line - if (last && last.uid === uid) { + if (last && last.uid === uid && Date.now()) { return [ ...prev.slice(0, -1), { ...last, text: last.text + (finalText ? ' ' + finalText : ''), nonFinal: nonFinalText, - time: Date.now(), + time: last.time, }, ]; } @@ -97,8 +106,10 @@ const SonixCaptionContainer = () => { {entry.nonFinal || entry.text - ? defaultContent[entry.uid].name + ' : ' - : ''}{' '} + ? `${defaultContent[entry.uid].name} (${formatTime( + entry.time, + )}) : ` + : ''} {entry.text} {entry.nonFinal && {entry.nonFinal}} From a0584eecf672072b25c53f64e9c57caa92a242f6 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Tue, 17 Jun 2025 13:33:05 +0530 Subject: [PATCH 15/17] added queue --- .../caption/SonixCaptionContainer.tsx | 163 ++++++++++++------ 1 file changed, 113 insertions(+), 50 deletions(-) diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 7b2fb30a2..2af8b6b17 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -4,6 +4,7 @@ import React, {useEffect, useRef, useState} from 'react'; import ThemeConfig from '../../theme'; import {CAPTION_CONTAINER_HEIGHT} from '../../components/CommonStyles'; import {useRtc, useContent, useLocalUid, useCaption} from 'customization-api'; +import PQueue from 'p-queue'; const formatTime = (timestamp: number) => { const date = new Date(timestamp); @@ -21,61 +22,15 @@ const SonixCaptionContainer = () => { const {captionFeed, setCaptionFeed} = useCaption(); const scrollRef = React.useRef(null); const engine = RtcEngineUnsafe; + const queueRef = React.useRef(new PQueue({concurrency: 1})); + const [autoScroll, setAutoScroll] = useState(true); useEffect(() => { engine.isSonioxPanelOpen = true; // Add listener for transcription result engine.addCustomListener( 'onSonioxTranscriptionResult', - (uid, transcript) => { - console.log('sonix transcript =>', uid, transcript); - const finalText = transcript.tokens - .filter(t => t.is_final) // && (!t.language || t.language === 'hi')) - .map(t => t.text) - .join(''); - - const nonFinalText = transcript.tokens - .filter(t => !t.is_final) // && (!t.language || t.language === 'hi')) - .map(t => t.text) - .join(''); - - setCaptionFeed(prev => { - const last = prev[prev.length - 1]; - - // Skip if there's nothing new to add - if (!finalText && !nonFinalText) { - return prev; - } - - // If same speaker, merge into last line - if (last && last.uid === uid && Date.now()) { - return [ - ...prev.slice(0, -1), - { - ...last, - text: last.text + (finalText ? ' ' + finalText : ''), - nonFinal: nonFinalText, - time: last.time, - }, - ]; - } - - // If speaker changes OR no previous entry - if (finalText || nonFinalText) { - return [ - ...prev, - { - uid, - text: finalText, - nonFinal: nonFinalText, - time: Date.now(), - }, - ]; - } - - return prev; - }); - }, + sonixCaptionCallback, ); // Start transcription for the users in the call , later move to start / button @@ -93,14 +48,122 @@ const SonixCaptionContainer = () => { }; }, []); + const sonixCaptionCallback1 = (uid, transcript) => { + console.log('sonix transcript =>', uid, transcript); + const finalText = transcript.tokens + .filter(t => t.is_final) // && (!t.language || t.language === 'hi')) + .map(t => t.text) + .join(''); + + const nonFinalText = transcript.tokens + .filter(t => !t.is_final) // && (!t.language || t.language === 'hi')) + .map(t => t.text) + .join(''); + + setCaptionFeed(prev => { + const last = prev[prev.length - 1]; + + // Skip if there's nothing new to add + if (!finalText && !nonFinalText) { + return prev; + } + + // If same speaker, merge into last line + if (last && last.uid === uid && Date.now()) { + return [ + ...prev.slice(0, -1), + { + ...last, + text: last.text + (finalText ? ' ' + finalText : ''), + nonFinal: nonFinalText, + time: last.time, + }, + ]; + } + + // If speaker changes OR no previous entry + if (finalText || nonFinalText) { + return [ + ...prev, + { + uid, + text: finalText, + nonFinal: nonFinalText, + time: Date.now(), + }, + ]; + } + + return prev; + }); + }; + + const sonixCaptionCallback = (uid, transcript) => { + const queueCallback = () => { + console.log('sonix transcript =>', uid, transcript); + const finalText = transcript.tokens + .filter(t => t.is_final) + .map(t => t.text) + .join(''); + + const nonFinalText = transcript.tokens + .filter(t => !t.is_final) + .map(t => t.text) + .join(''); + + setCaptionFeed(prev => { + const last = prev[prev.length - 1]; + + if (!finalText && !nonFinalText) return prev; + + if (last && last.uid === uid && Date.now()) { + return [ + ...prev.slice(0, -1), + { + ...last, + text: last.text + (finalText ? ' ' + finalText : ''), + nonFinal: nonFinalText, + time: last.time, + }, + ]; + } + + return [ + ...prev, + { + uid, + text: finalText, + nonFinal: nonFinalText, + time: Date.now(), + }, + ]; + }); + }; + if (transcript.tokens.length > 0) { + queueRef.current.add(queueCallback); + console.log('soniox stt- using pq queue'); + } + }; + + const handleScroll = event => { + const {layoutMeasurement, contentOffset, contentSize} = event.nativeEvent; + const isAtBottom = + layoutMeasurement.height + contentOffset.y >= contentSize.height - 20; + setAutoScroll(isAtBottom); + }; + return ( { - scrollRef.current?.scrollToEnd({animated: true}); + if (autoScroll) { + scrollRef.current?.scrollToEnd({animated: true}); + } }}> {captionFeed.map((entry, index) => ( From 7768dba695e502de802893edf7e19e5ca7ac8683 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Tue, 17 Jun 2025 16:25:48 +0530 Subject: [PATCH 16/17] handlering interruptions --- .../caption/SonixCaptionContainer.tsx | 147 +++++++----------- 1 file changed, 52 insertions(+), 95 deletions(-) diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 2af8b6b17..2120ffa59 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -17,23 +17,26 @@ const formatTime = (timestamp: number) => { const SonixCaptionContainer = () => { const {RtcEngineUnsafe} = useRtc(); - const {defaultContent, activeUids, customContent} = useContent(); + const {defaultContent, activeUids} = useContent(); const localUid = useLocalUid(); const {captionFeed, setCaptionFeed} = useCaption(); const scrollRef = React.useRef(null); - const engine = RtcEngineUnsafe; const queueRef = React.useRef(new PQueue({concurrency: 1})); const [autoScroll, setAutoScroll] = useState(true); + // in-progress captions per speaker + const activeCaptionsRef = useRef({}); + + const engine = RtcEngineUnsafe; + useEffect(() => { engine.isSonioxPanelOpen = true; - // Add listener for transcription result + engine.addCustomListener( 'onSonioxTranscriptionResult', sonixCaptionCallback, ); - // Start transcription for the users in the call , later move to start / button activeUids.map(uid => { engine.startSonioxTranscription( uid, @@ -48,101 +51,45 @@ const SonixCaptionContainer = () => { }; }, []); - const sonixCaptionCallback1 = (uid, transcript) => { - console.log('sonix transcript =>', uid, transcript); - const finalText = transcript.tokens - .filter(t => t.is_final) // && (!t.language || t.language === 'hi')) - .map(t => t.text) - .join(''); - - const nonFinalText = transcript.tokens - .filter(t => !t.is_final) // && (!t.language || t.language === 'hi')) - .map(t => t.text) - .join(''); - - setCaptionFeed(prev => { - const last = prev[prev.length - 1]; - - // Skip if there's nothing new to add - if (!finalText && !nonFinalText) { - return prev; - } - - // If same speaker, merge into last line - if (last && last.uid === uid && Date.now()) { - return [ - ...prev.slice(0, -1), - { - ...last, - text: last.text + (finalText ? ' ' + finalText : ''), - nonFinal: nonFinalText, - time: last.time, - }, - ]; - } - - // If speaker changes OR no previous entry - if (finalText || nonFinalText) { - return [ - ...prev, - { - uid, - text: finalText, - nonFinal: nonFinalText, - time: Date.now(), - }, - ]; - } - - return prev; - }); - }; - const sonixCaptionCallback = (uid, transcript) => { const queueCallback = () => { console.log('sonix transcript =>', uid, transcript); + const finalText = transcript.tokens .filter(t => t.is_final) .map(t => t.text) .join(''); - const nonFinalText = transcript.tokens .filter(t => !t.is_final) .map(t => t.text) .join(''); - setCaptionFeed(prev => { - const last = prev[prev.length - 1]; - - if (!finalText && !nonFinalText) return prev; - - if (last && last.uid === uid && Date.now()) { - return [ - ...prev.slice(0, -1), - { - ...last, - text: last.text + (finalText ? ' ' + finalText : ''), - nonFinal: nonFinalText, - time: last.time, - }, - ]; - } + // merge into in-progress buffer + const active = activeCaptionsRef.current[uid] || { + uid, + text: '', + nonFinal: '', + time: Date.now(), + }; - return [ - ...prev, - { - uid, - text: finalText, - nonFinal: nonFinalText, - time: Date.now(), - }, - ]; - }); + if (finalText) { + active.text = (active.text + ' ' + finalText).trim(); + } + active.nonFinal = nonFinalText; + active.time = Date.now(); + activeCaptionsRef.current[uid] = active; + + // If fully finalized, commit to feed + remove from active buffer + if (!nonFinalText && finalText) { + setCaptionFeed(prev => [...prev, {...active, nonFinal: ''}]); + delete activeCaptionsRef.current[uid]; + } else { + // partial update: force rerender by setting dummy feed (not needed in your hook-based context) + setCaptionFeed(prev => [...prev]); // triggers UI refresh + } }; - if (transcript.tokens.length > 0) { - queueRef.current.add(queueCallback); - console.log('soniox stt- using pq queue'); - } + + queueRef.current.add(queueCallback); }; const handleScroll = event => { @@ -165,19 +112,30 @@ const SonixCaptionContainer = () => { scrollRef.current?.scrollToEnd({animated: true}); } }}> + {/* Show committed lines */} {captionFeed.map((entry, index) => ( - + - {entry.nonFinal || entry.text - ? `${defaultContent[entry.uid].name} (${formatTime( - entry.time, - )}) : ` - : ''} + {defaultContent[entry.uid]?.name} ({formatTime(entry.time)}) : - {entry.text} - {entry.nonFinal && {entry.nonFinal}} + {entry.text} ))} + + {/* Show all active speakers */} + {Object.values(activeCaptionsRef.current) + .filter(entry => entry.text || entry.nonFinal) + .map((entry, index) => ( + + + {defaultContent[entry.uid]?.name} ({formatTime(entry.time)}) : + + {entry.text} + {entry.nonFinal && ( + {entry.nonFinal} + )} + + ))} ); }; @@ -207,14 +165,13 @@ const styles = StyleSheet.create({ uid: { color: 'orange', fontWeight: 'bold', - fontSize: 18, lineHeight: 24, }, content: { color: 'white', fontSize: 18, - flexShrink: 1, // test + flexShrink: 1, lineHeight: 24, }, live: { From f8e74fc3cc1979b34685c54837ee4b8865e357d9 Mon Sep 17 00:00:00 2001 From: Bhupendra Negi Date: Thu, 19 Jun 2025 11:40:57 +0530 Subject: [PATCH 17/17] dsf --- template/src/subComponents/caption/SonixCaptionContainer.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/template/src/subComponents/caption/SonixCaptionContainer.tsx b/template/src/subComponents/caption/SonixCaptionContainer.tsx index 2120ffa59..569cc1c27 100644 --- a/template/src/subComponents/caption/SonixCaptionContainer.tsx +++ b/template/src/subComponents/caption/SonixCaptionContainer.tsx @@ -24,7 +24,7 @@ const SonixCaptionContainer = () => { const queueRef = React.useRef(new PQueue({concurrency: 1})); const [autoScroll, setAutoScroll] = useState(true); - // in-progress captions per speaker + // in-progress captions per speaker now const activeCaptionsRef = useRef({}); const engine = RtcEngineUnsafe;