diff --git a/src/ai-engine/stt.client.py b/src/ai-engine/stt.client.py new file mode 100644 index 0000000..e040beb --- /dev/null +++ b/src/ai-engine/stt.client.py @@ -0,0 +1,11 @@ +import asyncio +import websockets + +async def main(): + uri = "ws://localhost:8000/ws" + async with websockets.connect(uri) as ws: + while True: + text = await ws.recv() + print("Live:", text) + +asyncio.run(main()) \ No newline at end of file diff --git a/src/ai-engine/stt_backend.py b/src/ai-engine/stt_backend.py new file mode 100644 index 0000000..3a64fbe --- /dev/null +++ b/src/ai-engine/stt_backend.py @@ -0,0 +1,58 @@ +import asyncio +import numpy as np +import sounddevice as sd +import whisper +from collections import deque +from fastapi import FastAPI, WebSocket + +app = FastAPI() +model = whisper.load_model("base") + +SAMPLE_RATE = 16000 +queue = asyncio.Queue() +buffer = deque(maxlen=SAMPLE_RATE * 2) +loop = None + +def audio_callback(indata, frames, time, status): + loop.call_soon_threadsafe(queue.put_nowait, indata.copy()) + +async def audio_collector(): + print("Microphone started...") + while True: + chunk = await queue.get() + for sample in chunk: + buffer.append(sample) + +@app.on_event("startup") +async def start_mic(): + global loop + loop = asyncio.get_running_loop() + + stream = sd.InputStream( + samplerate=SAMPLE_RATE, + channels=1, + callback=audio_callback + ) + stream.start() + + asyncio.create_task(audio_collector()) + +@app.websocket("/ws") +async def websocket_endpoint(ws: WebSocket): + await ws.accept() + print("Client connected") + + while True: + await asyncio.sleep(0.5) + + if len(buffer) < SAMPLE_RATE: + continue + + audio = np.array(buffer).flatten() + result = await loop.run_in_executor( + None, lambda: model.transcribe(audio, fp16=False) + ) + + text = result["text"].strip() + if text: + await ws.send_text(text) \ No newline at end of file