From 2c05f54c3fc747b798fc01c754486b5b5dbc4cb9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 8 Jun 2024 00:12:46 +0200
Subject: [PATCH] feat(ui): add page to talk with voice, transcription, and tts

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/routes/ui.go               |  20 +++
 core/http/static/talk.js             | 174 +++++++++++++++++++++++++++
 core/http/views/partials/navbar.html |   2 +
 core/http/views/talk.html            | 131 ++++++++++++++++++++
 4 files changed, 327 insertions(+)
 create mode 100644 core/http/static/talk.js
 create mode 100644 core/http/views/talk.html

diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index efd083158956..e0313abf17ae 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -247,6 +247,26 @@ func RegisterUIRoutes(app *fiber.App,
 		// Render index
 		return c.Render("views/chat", summary)
 	})
+
+	app.Get("/talk/", auth, func(c *fiber.Ctx) error {
+		backendConfigs := cl.GetAllBackendConfigs()
+
+		if len(backendConfigs) == 0 {
+			// If no model is available redirect to the index which suggests how to install models
+			return c.Redirect("/")
+		}
+
+		summary := fiber.Map{
+			"Title":        "LocalAI - Talk",
+			"ModelsConfig": backendConfigs,
+			"Model":        backendConfigs[0].Name,
+			"Version":      internal.PrintableVersion(),
+		}
+
+		// Render index
+		return c.Render("views/talk", summary)
+	})
+
 	app.Get("/chat/", auth, func(c *fiber.Ctx) error {
 
 		backendConfigs := cl.GetAllBackendConfigs()
diff --git a/core/http/static/talk.js b/core/http/static/talk.js
new file mode 100644
index 000000000000..1ab98ca76574
--- /dev/null
+++ b/core/http/static/talk.js
@@ -0,0 +1,174 @@
+
+const recordButton = document.getElementById('recordButton');
+const audioPlayback = document.getElementById('audioPlayback');
+const resetButton = document.getElementById('resetButton');
+
+let mediaRecorder;
+let audioChunks = [];
+let isRecording = false;
+let conversationHistory = [];
+let resetTimer;
+
+function getApiKey() {
+    return document.getElementById('apiKey').value;
+}
+
+function getModel() {
+    return document.getElementById('modelSelect').value;
+}
+
+function getWhisperModel() {
+    return document.getElementById('whisperModelSelect').value;
+}
+
+function getTTSModel() {
+    return document.getElementById('ttsModelSelect').value;
+}
+
+function resetConversation() {
+    conversationHistory = [];
+    console.log("Conversation has been reset.");
+    clearTimeout(resetTimer);
+}
+
+function setResetTimer() {
+    clearTimeout(resetTimer);
+    resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
+}
+
+recordButton.addEventListener('click', toggleRecording);
+resetButton.addEventListener('click', resetConversation);
+
+function toggleRecording() {
+    if (!isRecording) {
+        startRecording();
+    } else {
+        stopRecording();
+    }
+}
+
+async function startRecording() {
+    if (!navigator.mediaDevices) {
+        alert('MediaDevices API not supported!');
+        return;
+    }
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    mediaRecorder = new MediaRecorder(stream);
+    audioChunks = [];
+    mediaRecorder.ondataavailable = (event) => {
+        audioChunks.push(event.data);
+    };
+    mediaRecorder.start();
+    recordButton.textContent = 'Stop Recording';
+    isRecording = true;
+}
+
+function stopRecording() {
+    mediaRecorder.stop();
+    mediaRecorder.onstop = async () => {
+        document.getElementById("loader").style.display = "block";
+        const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
+        const transcript = await sendAudioToWhisper(audioBlob);
+        console.log("Transcript:", transcript)
+        const responseText = await sendTextToChatGPT(transcript);
+        console.log("Response:", responseText)
+
+        const ttsAudio = await getTextToSpeechAudio(responseText);
+        playAudioResponse(ttsAudio);
+
+        recordButton.textContent = 'Record';
+        isRecording = false;
+        document.getElementById("loader").style.display = "none";
+    };
+}
+
+function submitKey(event) {
+    event.preventDefault();
+    localStorage.setItem("key", document.getElementById("apiKey").value);
+    document.getElementById("apiKey").blur();
+}
+
+document.getElementById("key").addEventListener("submit", submitKey);
+
+
+storeKey = localStorage.getItem("key");
+if (storeKey) {
+  document.getElementById("apiKey").value = storeKey;
+} else {
+  document.getElementById("apiKey").value = null;
+}
+
+
+async function sendAudioToWhisper(audioBlob) {
+    const formData = new FormData();
+    formData.append('file', audioBlob);
+    formData.append('model', getWhisperModel());
+    API_KEY = localStorage.getItem("key");
+
+    const response = await fetch('/v1/audio/transcriptions', {
+        method: 'POST',
+        headers: {
+            'Authorization': `Bearer ${API_KEY}`
+        },
+        body: formData
+    });
+
+    const result = await response.json();
+    console.log("Whisper result:", result)
+    return result.text;
+}
+
+async function sendTextToChatGPT(text) {
+    conversationHistory.push({ role: "user", content: text });
+    API_KEY = localStorage.getItem("key");
+
+    const response = await fetch('/v1/chat/completions', {
+        method: 'POST',
+        headers: {
+            'Authorization': `Bearer ${API_KEY}`,
+            'Content-Type': 'application/json'
+        },
+        body: JSON.stringify({
+            model: getModel(),
+            messages: conversationHistory
+        })
+    });
+
+    const result = await response.json();
+    const responseText = result.choices[0].message.content;
+    conversationHistory.push({ role: "assistant", content: responseText });
+
+    setResetTimer();
+
+    return responseText;
+}
+
+async function getTextToSpeechAudio(text) {
+    API_KEY = localStorage.getItem("key");
+
+    const response = await fetch('/v1/audio/speech', {
+        
+        method: 'POST',
+        headers: {
+            'Authorization': `Bearer ${API_KEY}`,
+            'Content-Type': 'application/json'
+        },
+        body: JSON.stringify({ 
+          //  "backend": "string",
+            input: text,
+            model: getTTSModel(),
+           // "voice": "string"
+         })
+    });
+
+    const audioBlob = await response.blob();
+    return audioBlob;  // Return the blob directly
+}
+
+function playAudioResponse(audioBlob) {
+    const audioUrl = URL.createObjectURL(audioBlob);
+    audioPlayback.src = audioUrl;
+    audioPlayback.hidden = false;
+    audioPlayback.play();
+}
+
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index be238479f8b3..caa1f3b77c9f 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -20,6 +20,7 @@
                 <a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
                 <a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
                 <a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                 <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
             </div>
         </div>
@@ -32,6 +33,7 @@
                 <a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
                 <a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
                 <a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                 <a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
             </div>
         </div>
diff --git a/core/http/views/talk.html b/core/http/views/talk.html
new file mode 100644
index 000000000000..08caaaa66008
--- /dev/null
+++ b/core/http/views/talk.html
@@ -0,0 +1,131 @@
+<!--
+
+Part of this page is based on the OpenAI Chatbot example by David Härer:
+https://github.com/david-haerer/chatapi
+
+MIT License Copyright (c) 2023 David Härer
+            Copyright (c) 2024 Ettore Di Giacinto
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+-->
+<!doctype html>
+<html lang="en">
+  {{template "views/partials/head" .}}
+  <script defer src="/static/talk.js"></script>
+  <style>
+    body {
+        overflow: hidden; 
+    }
+  </style>
+  <body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
+    <div class="flex flex-col min-h-screen">
+
+    {{template "views/partials/navbar"}}
+    <div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
+     <!-- Chat Header -->
+      <div class="border-b border-gray-700 p-4"  x-data="{ component: 'menu' }">
+
+        <div class="flex items-center justify-center">
+
+          <div x-show="component === 'menu'" id="menu">
+        
+            <button @click="component = 'key'" title="Update API key"
+            class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
+            >Set API Key🔑</button>
+            
+          </div>
+      
+        <form x-show="component === 'key'" id="key">
+          <input
+            type="password"
+            id="apiKey"
+            name="apiKey"
+            class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
+            placeholder="API Key"
+            x-model.lazy="key"
+          />
+          <button @click="component = 'menu'" type="submit" title="Save API key">
+            <i class="fa-solid fa-arrow-right"></i>
+          </button>
+        </form>
+        </div>
+      </div>
+
+    <div class="flex items-center justify-center">
+    <div class="w-full p-4  max-w-md border-t border-gray-700 ">
+      <div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4">
+      <div id="loader" class="my-2 loader" style="display: none;"></div>
+ 
+      <div class="mb-4" >
+        <label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
+        <select id="modelSelect"
+        class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
+        >
+          <option value="" disabled class="text-gray-400" >Select a model</option>
+
+          {{ range .ModelsConfig }}
+          <option value="{{.Name}}"  class="bg-gray-700 text-white">{{.Name}}</option>
+          {{ end }}
+        </select>
+      </div>
+
+      <div class="mb-4" >
+        <label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label>
+          <select id="whisperModelSelect"
+          class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
+
+          >
+            <option value="" disabled class="text-gray-400" >Select a model</option>
+
+            {{ range .ModelsConfig }}
+            <option value="{{.Name}}"  class="bg-gray-700 text-white">{{.Name}}</option>
+            {{ end }}
+          </select>
+      </div>
+  
+  
+      <div class="mb-4" >
+        <label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label>
+        <select id="ttsModelSelect"
+        class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
+        >
+          <option value="" disabled class="text-gray-400" >Select a model</option>
+          {{ range .ModelsConfig }}
+          <option value="{{.Name}}"  class="bg-gray-700 text-white">{{.Name}}</option>
+          {{ end }}
+        </select>
+      </div>
+
+
+      <button id="recordButton"
+        class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
+      ><i class="fa-solid fa-circle-up text-gray-300 absolute right-2 top-3 text-lg p-2"></i>Record</button>
+      <a id="resetButton"
+      class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
+      href="#"
+      >Reset conversation</a>
+      <audio id="audioPlayback" controls hidden></audio>
+
+        </div>
+      </div>
+      </div>
+    </div>
+  </body>
+</html>