-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ui): add page to talk with voice, transcription, and tts
Signed-off-by: Ettore Di Giacinto <[email protected]>
- Loading branch information
Showing
4 changed files
with
300 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
|
||
const recordButton = document.getElementById('recordButton'); | ||
const audioPlayback = document.getElementById('audioPlayback'); | ||
const resetButton = document.getElementById('resetButton'); | ||
|
||
let mediaRecorder; | ||
let audioChunks = []; | ||
let isRecording = false; | ||
let conversationHistory = []; | ||
let resetTimer; | ||
|
||
function getApiKey() { | ||
return document.getElementById('apiKey').value; | ||
} | ||
|
||
function getModel() { | ||
return document.getElementById('modelSelect').value; | ||
} | ||
|
||
function getWhisperModel() { | ||
return document.getElementById('whisperModelSelect').value; | ||
} | ||
|
||
function getTTSModel() { | ||
return document.getElementById('ttsModelSelect').value; | ||
} | ||
|
||
function resetConversation() { | ||
conversationHistory = []; | ||
console.log("Conversation has been reset."); | ||
clearTimeout(resetTimer); | ||
} | ||
|
||
function setResetTimer() { | ||
clearTimeout(resetTimer); | ||
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes | ||
} | ||
|
||
recordButton.addEventListener('click', toggleRecording); | ||
resetButton.addEventListener('click', resetConversation); | ||
|
||
function toggleRecording() { | ||
if (!isRecording) { | ||
startRecording(); | ||
} else { | ||
stopRecording(); | ||
} | ||
} | ||
|
||
async function startRecording() { | ||
if (!navigator.mediaDevices) { | ||
alert('MediaDevices API not supported!'); | ||
return; | ||
} | ||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | ||
mediaRecorder = new MediaRecorder(stream); | ||
audioChunks = []; | ||
mediaRecorder.ondataavailable = (event) => { | ||
audioChunks.push(event.data); | ||
}; | ||
mediaRecorder.start(); | ||
recordButton.textContent = 'Stop Recording'; | ||
isRecording = true; | ||
} | ||
|
||
function stopRecording() { | ||
mediaRecorder.stop(); | ||
mediaRecorder.onstop = async () => { | ||
document.getElementById("loader").style.display = "block"; | ||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); | ||
const transcript = await sendAudioToWhisper(audioBlob); | ||
console.log("Transcript:", transcript) | ||
const responseText = await sendTextToChatGPT(transcript); | ||
console.log("Response:", responseText) | ||
|
||
const ttsAudio = await getTextToSpeechAudio(responseText); | ||
playAudioResponse(ttsAudio); | ||
|
||
recordButton.textContent = 'Record'; | ||
isRecording = false; | ||
document.getElementById("loader").style.display = "none"; | ||
}; | ||
} | ||
|
||
function submitKey(event) { | ||
event.preventDefault(); | ||
localStorage.setItem("key", document.getElementById("apiKey").value); | ||
document.getElementById("apiKey").blur(); | ||
} | ||
|
||
document.getElementById("key").addEventListener("submit", submitKey); | ||
|
||
|
||
storeKey = localStorage.getItem("key"); | ||
if (storeKey) { | ||
document.getElementById("apiKey").value = storeKey; | ||
} else { | ||
document.getElementById("apiKey").value = null; | ||
} | ||
|
||
|
||
async function sendAudioToWhisper(audioBlob) { | ||
const formData = new FormData(); | ||
formData.append('file', audioBlob); | ||
formData.append('model', getWhisperModel()); | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/audio/transcriptions', { | ||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}` | ||
}, | ||
body: formData | ||
}); | ||
|
||
const result = await response.json(); | ||
console.log("Whisper result:", result) | ||
return result.text; | ||
} | ||
|
||
async function sendTextToChatGPT(text) { | ||
conversationHistory.push({ role: "user", content: text }); | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/chat/completions', { | ||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}`, | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
model: getModel(), | ||
messages: conversationHistory | ||
}) | ||
}); | ||
|
||
const result = await response.json(); | ||
const responseText = result.choices[0].message.content; | ||
conversationHistory.push({ role: "assistant", content: responseText }); | ||
|
||
setResetTimer(); | ||
|
||
return responseText; | ||
} | ||
|
||
async function getTextToSpeechAudio(text) { | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/audio/speech', { | ||
|
||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}`, | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
// "backend": "string", | ||
input: text, | ||
model: getTTSModel(), | ||
// "voice": "string" | ||
}) | ||
}); | ||
|
||
const audioBlob = await response.blob(); | ||
return audioBlob; // Return the blob directly | ||
} | ||
|
||
function playAudioResponse(audioBlob) { | ||
const audioUrl = URL.createObjectURL(audioBlob); | ||
audioPlayback.src = audioUrl; | ||
audioPlayback.hidden = false; | ||
audioPlayback.play(); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
<!doctype html> | ||
<html lang="en"> | ||
{{template "views/partials/head" .}} | ||
<script defer src="/static/talk.js"></script> | ||
<style> | ||
body { | ||
overflow: hidden; | ||
} | ||
</style> | ||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }"> | ||
<div class="flex flex-col min-h-screen"> | ||
|
||
{{template "views/partials/navbar"}} | ||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " > | ||
<!-- Chat Header --> | ||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }"> | ||
|
||
<div class="flex items-center justify-center"> | ||
|
||
<div x-show="component === 'menu'" id="menu"> | ||
|
||
<button @click="component = 'key'" title="Update API key" | ||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong" | ||
>Set API Key🔑</button> | ||
|
||
</div> | ||
|
||
<form x-show="component === 'key'" id="key"> | ||
<input | ||
type="password" | ||
id="apiKey" | ||
name="apiKey" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
placeholder="API Key" | ||
x-model.lazy="key" | ||
/> | ||
<button @click="component = 'menu'" type="submit" title="Save API key"> | ||
<i class="fa-solid fa-arrow-right"></i> | ||
</button> | ||
</form> | ||
</div> | ||
</div> | ||
|
||
<div class="flex items-center justify-center"> | ||
<div class="w-full p-4 max-w-md border-t border-gray-700 "> | ||
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4"> | ||
<div id="loader" class="my-2 loader" style="display: none;"></div> | ||
|
||
<div class="mb-4" > | ||
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label> | ||
<select id="modelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
|
||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
<div class="mb-4" > | ||
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label> | ||
<select id="whisperModelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
|
||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
|
||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
|
||
<div class="mb-4" > | ||
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label> | ||
<select id="ttsModelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
|
||
<button id="recordButton" | ||
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline" | ||
><i class="fa-solid fa-circle-up text-gray-300 absolute right-2 top-3 text-lg p-2"></i>Record</button> | ||
<a id="resetButton" | ||
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800" | ||
href="#" | ||
>Reset conversation</a> | ||
<audio id="audioPlayback" controls hidden></audio> | ||
|
||
</div> | ||
</div> | ||
</div> | ||
</div> | ||
</body> | ||
</html> |