Skip to content

Commit

Permalink
feat(ui): add page to talk with voice, transcription, and tts
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Jun 7, 2024
1 parent 603d81d commit 2c05f54
Show file tree
Hide file tree
Showing 4 changed files with 327 additions and 0 deletions.
20 changes: 20 additions & 0 deletions core/http/routes/ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,26 @@ func RegisterUIRoutes(app *fiber.App,
// Render index
return c.Render("views/chat", summary)
})

app.Get("/talk/", auth, func(c *fiber.Ctx) error {
backendConfigs := cl.GetAllBackendConfigs()

if len(backendConfigs) == 0 {
// If no model is available redirect to the index which suggests how to install models
return c.Redirect("/")
}

summary := fiber.Map{
"Title": "LocalAI - Talk",
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0].Name,
"Version": internal.PrintableVersion(),
}

// Render index
return c.Render("views/talk", summary)
})

app.Get("/chat/", auth, func(c *fiber.Ctx) error {

backendConfigs := cl.GetAllBackendConfigs()
Expand Down
174 changes: 174 additions & 0 deletions core/http/static/talk.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@

const recordButton = document.getElementById('recordButton');
const audioPlayback = document.getElementById('audioPlayback');
const resetButton = document.getElementById('resetButton');

let mediaRecorder;
let audioChunks = [];
let isRecording = false;
let conversationHistory = [];
let resetTimer;

function getApiKey() {
return document.getElementById('apiKey').value;
}

function getModel() {
return document.getElementById('modelSelect').value;
}

function getWhisperModel() {
return document.getElementById('whisperModelSelect').value;
}

function getTTSModel() {
return document.getElementById('ttsModelSelect').value;
}

function resetConversation() {
conversationHistory = [];
console.log("Conversation has been reset.");
clearTimeout(resetTimer);
}

function setResetTimer() {
clearTimeout(resetTimer);
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
}

recordButton.addEventListener('click', toggleRecording);
resetButton.addEventListener('click', resetConversation);

function toggleRecording() {
if (!isRecording) {
startRecording();
} else {
stopRecording();
}
}

async function startRecording() {
if (!navigator.mediaDevices) {
alert('MediaDevices API not supported!');
return;
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.start();
recordButton.textContent = 'Stop Recording';
isRecording = true;
}

function stopRecording() {
mediaRecorder.stop();
mediaRecorder.onstop = async () => {
document.getElementById("loader").style.display = "block";
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
const transcript = await sendAudioToWhisper(audioBlob);
console.log("Transcript:", transcript)
const responseText = await sendTextToChatGPT(transcript);
console.log("Response:", responseText)

const ttsAudio = await getTextToSpeechAudio(responseText);
playAudioResponse(ttsAudio);

recordButton.textContent = 'Record';
isRecording = false;
document.getElementById("loader").style.display = "none";
};
}

function submitKey(event) {
event.preventDefault();
localStorage.setItem("key", document.getElementById("apiKey").value);
document.getElementById("apiKey").blur();
}

document.getElementById("key").addEventListener("submit", submitKey);


storeKey = localStorage.getItem("key");
if (storeKey) {
document.getElementById("apiKey").value = storeKey;
} else {
document.getElementById("apiKey").value = null;
}


async function sendAudioToWhisper(audioBlob) {
const formData = new FormData();
formData.append('file', audioBlob);
formData.append('model', getWhisperModel());
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`
},
body: formData
});

const result = await response.json();
console.log("Whisper result:", result)
return result.text;
}

async function sendTextToChatGPT(text) {
conversationHistory.push({ role: "user", content: text });
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: getModel(),
messages: conversationHistory
})
});

const result = await response.json();
const responseText = result.choices[0].message.content;
conversationHistory.push({ role: "assistant", content: responseText });

setResetTimer();

return responseText;
}

async function getTextToSpeechAudio(text) {
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/audio/speech', {

method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
// "backend": "string",
input: text,
model: getTTSModel(),
// "voice": "string"
})
});

const audioBlob = await response.blob();
return audioBlob; // Return the blob directly
}

function playAudioResponse(audioBlob) {
const audioUrl = URL.createObjectURL(audioBlob);
audioPlayback.src = audioUrl;
audioPlayback.hidden = false;
audioPlayback.play();
}

2 changes: 2 additions & 0 deletions core/http/views/partials/navbar.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>
Expand All @@ -32,6 +33,7 @@
<a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>
Expand Down
131 changes: 131 additions & 0 deletions core/http/views/talk.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
<!--
Part of this page is based on the OpenAI Chatbot example by David Härer:
https://github.com/david-haerer/chatapi
MIT License Copyright (c) 2023 David Härer
Copyright (c) 2024 Ettore Di Giacinto
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-->
<!doctype html>
<html lang="en">
{{template "views/partials/head" .}}
<script defer src="/static/talk.js"></script>
<style>
body {
overflow: hidden;
}
</style>
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
<div class="flex flex-col min-h-screen">

{{template "views/partials/navbar"}}
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
<!-- Chat Header -->
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">

<div class="flex items-center justify-center">

<div x-show="component === 'menu'" id="menu">

<button @click="component = 'key'" title="Update API key"
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
>Set API Key🔑</button>

</div>

<form x-show="component === 'key'" id="key">
<input
type="password"
id="apiKey"
name="apiKey"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
placeholder="API Key"
x-model.lazy="key"
/>
<button @click="component = 'menu'" type="submit" title="Save API key">
<i class="fa-solid fa-arrow-right"></i>
</button>
</form>
</div>
</div>

<div class="flex items-center justify-center">
<div class="w-full p-4 max-w-md border-t border-gray-700 ">
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4">
<div id="loader" class="my-2 loader" style="display: none;"></div>

<div class="mb-4" >
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
<select id="modelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
>
<option value="" disabled class="text-gray-400" >Select a model</option>

{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>

<div class="mb-4" >
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label>
<select id="whisperModelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"

>
<option value="" disabled class="text-gray-400" >Select a model</option>

{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>


<div class="mb-4" >
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label>
<select id="ttsModelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
>
<option value="" disabled class="text-gray-400" >Select a model</option>
{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>


<button id="recordButton"
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
><i class="fa-solid fa-circle-up text-gray-300 absolute right-2 top-3 text-lg p-2"></i>Record</button>
<a id="resetButton"
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
href="#"
>Reset conversation</a>
<audio id="audioPlayback" controls hidden></audio>

</div>
</div>
</div>
</div>
</body>
</html>

0 comments on commit 2c05f54

Please sign in to comment.