-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ui): add page to talk with voice, transcription, and tts
Signed-off-by: Ettore Di Giacinto <[email protected]>
- Loading branch information
Showing
4 changed files
with
327 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
|
||
const recordButton = document.getElementById('recordButton'); | ||
const audioPlayback = document.getElementById('audioPlayback'); | ||
const resetButton = document.getElementById('resetButton'); | ||
|
||
let mediaRecorder; | ||
let audioChunks = []; | ||
let isRecording = false; | ||
let conversationHistory = []; | ||
let resetTimer; | ||
|
||
function getApiKey() { | ||
return document.getElementById('apiKey').value; | ||
} | ||
|
||
function getModel() { | ||
return document.getElementById('modelSelect').value; | ||
} | ||
|
||
function getWhisperModel() { | ||
return document.getElementById('whisperModelSelect').value; | ||
} | ||
|
||
function getTTSModel() { | ||
return document.getElementById('ttsModelSelect').value; | ||
} | ||
|
||
function resetConversation() { | ||
conversationHistory = []; | ||
console.log("Conversation has been reset."); | ||
clearTimeout(resetTimer); | ||
} | ||
|
||
function setResetTimer() { | ||
clearTimeout(resetTimer); | ||
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes | ||
} | ||
|
||
recordButton.addEventListener('click', toggleRecording); | ||
resetButton.addEventListener('click', resetConversation); | ||
|
||
function toggleRecording() { | ||
if (!isRecording) { | ||
startRecording(); | ||
} else { | ||
stopRecording(); | ||
} | ||
} | ||
|
||
async function startRecording() { | ||
if (!navigator.mediaDevices) { | ||
alert('MediaDevices API not supported!'); | ||
return; | ||
} | ||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | ||
mediaRecorder = new MediaRecorder(stream); | ||
audioChunks = []; | ||
mediaRecorder.ondataavailable = (event) => { | ||
audioChunks.push(event.data); | ||
}; | ||
mediaRecorder.start(); | ||
recordButton.textContent = 'Stop Recording'; | ||
isRecording = true; | ||
} | ||
|
||
function stopRecording() { | ||
mediaRecorder.stop(); | ||
mediaRecorder.onstop = async () => { | ||
document.getElementById("loader").style.display = "block"; | ||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); | ||
const transcript = await sendAudioToWhisper(audioBlob); | ||
console.log("Transcript:", transcript) | ||
const responseText = await sendTextToChatGPT(transcript); | ||
console.log("Response:", responseText) | ||
|
||
const ttsAudio = await getTextToSpeechAudio(responseText); | ||
playAudioResponse(ttsAudio); | ||
|
||
recordButton.textContent = 'Record'; | ||
isRecording = false; | ||
document.getElementById("loader").style.display = "none"; | ||
}; | ||
} | ||
|
||
function submitKey(event) { | ||
event.preventDefault(); | ||
localStorage.setItem("key", document.getElementById("apiKey").value); | ||
document.getElementById("apiKey").blur(); | ||
} | ||
|
||
document.getElementById("key").addEventListener("submit", submitKey); | ||
|
||
|
||
storeKey = localStorage.getItem("key"); | ||
if (storeKey) { | ||
document.getElementById("apiKey").value = storeKey; | ||
} else { | ||
document.getElementById("apiKey").value = null; | ||
} | ||
|
||
|
||
async function sendAudioToWhisper(audioBlob) { | ||
const formData = new FormData(); | ||
formData.append('file', audioBlob); | ||
formData.append('model', getWhisperModel()); | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/audio/transcriptions', { | ||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}` | ||
}, | ||
body: formData | ||
}); | ||
|
||
const result = await response.json(); | ||
console.log("Whisper result:", result) | ||
return result.text; | ||
} | ||
|
||
async function sendTextToChatGPT(text) { | ||
conversationHistory.push({ role: "user", content: text }); | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/chat/completions', { | ||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}`, | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
model: getModel(), | ||
messages: conversationHistory | ||
}) | ||
}); | ||
|
||
const result = await response.json(); | ||
const responseText = result.choices[0].message.content; | ||
conversationHistory.push({ role: "assistant", content: responseText }); | ||
|
||
setResetTimer(); | ||
|
||
return responseText; | ||
} | ||
|
||
async function getTextToSpeechAudio(text) { | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/audio/speech', { | ||
|
||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}`, | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
// "backend": "string", | ||
input: text, | ||
model: getTTSModel(), | ||
// "voice": "string" | ||
}) | ||
}); | ||
|
||
const audioBlob = await response.blob(); | ||
return audioBlob; // Return the blob directly | ||
} | ||
|
||
function playAudioResponse(audioBlob) { | ||
const audioUrl = URL.createObjectURL(audioBlob); | ||
audioPlayback.src = audioUrl; | ||
audioPlayback.hidden = false; | ||
audioPlayback.play(); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
<!-- | ||
Part of this page is based on the OpenAI Chatbot example by David Härer: | ||
https://github.com/david-haerer/chatapi | ||
MIT License Copyright (c) 2023 David Härer | ||
Copyright (c) 2024 Ettore Di Giacinto | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. | ||
--> | ||
<!doctype html> | ||
<html lang="en"> | ||
{{template "views/partials/head" .}} | ||
<script defer src="/static/talk.js"></script> | ||
<style> | ||
body { | ||
overflow: hidden; | ||
} | ||
</style> | ||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }"> | ||
<div class="flex flex-col min-h-screen"> | ||
|
||
{{template "views/partials/navbar"}} | ||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " > | ||
<!-- Chat Header --> | ||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }"> | ||
|
||
<div class="flex items-center justify-center"> | ||
|
||
<div x-show="component === 'menu'" id="menu"> | ||
|
||
<button @click="component = 'key'" title="Update API key" | ||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong" | ||
>Set API Key🔑</button> | ||
|
||
</div> | ||
|
||
<form x-show="component === 'key'" id="key"> | ||
<input | ||
type="password" | ||
id="apiKey" | ||
name="apiKey" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
placeholder="API Key" | ||
x-model.lazy="key" | ||
/> | ||
<button @click="component = 'menu'" type="submit" title="Save API key"> | ||
<i class="fa-solid fa-arrow-right"></i> | ||
</button> | ||
</form> | ||
</div> | ||
</div> | ||
|
||
<div class="flex items-center justify-center"> | ||
<div class="w-full p-4 max-w-md border-t border-gray-700 "> | ||
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4"> | ||
<div id="loader" class="my-2 loader" style="display: none;"></div> | ||
|
||
<div class="mb-4" > | ||
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label> | ||
<select id="modelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
|
||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
<div class="mb-4" > | ||
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label> | ||
<select id="whisperModelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
|
||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
|
||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
|
||
<div class="mb-4" > | ||
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label> | ||
<select id="ttsModelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
|
||
<button id="recordButton" | ||
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline" | ||
><i class="fa-solid fa-circle-up text-gray-300 absolute right-2 top-3 text-lg p-2"></i>Record</button> | ||
<a id="resetButton" | ||
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800" | ||
href="#" | ||
>Reset conversation</a> | ||
<audio id="audioPlayback" controls hidden></audio> | ||
|
||
</div> | ||
</div> | ||
</div> | ||
</div> | ||
</body> | ||
</html> |