Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

openai compatible worker #314

Draft
wants to merge 30 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1720b78
LiveKit Pipeline Agent (#4)
benxu3 Nov 26, 2024
de2a7cb
add token in qr
benxu3 Nov 26, 2024
76f0847
fix dependency issues
benxu3 Nov 26, 2024
37198d8
Merge branch 'main' of https://github.com/benxu3/01
benxu3 Nov 26, 2024
2f53be0
update livekit server and profile docs
benxu3 Dec 9, 2024
f6c13a1
send multimodal message on startup of multimodal agent
benxu3 Dec 9, 2024
07672f4
add voice assistant state communication and clear chat context
benxu3 Dec 9, 2024
178ffc8
update logging with debug env variable
benxu3 Dec 9, 2024
c2de04a
update profiles to be compatible with new interpreter
benxu3 Dec 9, 2024
bba33db
update server to use new interpreter
benxu3 Dec 9, 2024
ba2813d
upgrade interpreter and livekit agents
benxu3 Dec 9, 2024
cedda96
use participant token in meet_url
benxu3 Dec 9, 2024
8f6d5fd
remove assistant fnc
benxu3 Dec 9, 2024
4e77a57
remove duplicate fnc_ctx declaration
benxu3 Dec 9, 2024
84e05db
add local setup docs
benxu3 Dec 30, 2024
095b704
add basic interrupt logic
benxu3 Dec 30, 2024
6084e25
refactor logging outside logic
benxu3 Dec 30, 2024
bd6f530
replace hosted livekit meet with local meet link
benxu3 Dec 30, 2024
6110e70
add local stt & tts, add anticipation logic, remove video context acc…
benxu3 Dec 30, 2024
4c271b1
remove separate transcriptions
benxu3 Dec 30, 2024
f68f83c
update local and default profile
benxu3 Dec 30, 2024
3f6ba52
add meet flag and better error handling
benxu3 Dec 30, 2024
7207add
run worker in dev mode
benxu3 Dec 30, 2024
0c6a2cd
fix error on local tts docs
benxu3 Dec 31, 2024
f989731
move tts and stt to 9001 and 9002
benxu3 Dec 31, 2024
ab8055e
draft main cli
benxu3 Jan 1, 2025
a2f86af
make request based on updated chat ctx in anticipation
benxu3 Jan 1, 2025
ce52aa6
fix cli bug in main
benxu3 Jan 1, 2025
16fb2b3
remove test.py
benxu3 Jan 1, 2025
8c89960
revert anticipation to default
benxu3 Jan 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add voice assistant state communication and clear chat context
  • Loading branch information
benxu3 committed Dec 9, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit 07672f498be0aa2a3222a262fbac76e99c3c97d3
54 changes: 51 additions & 3 deletions software/source/server/livekit/worker.py
Original file line number Diff line number Diff line change
@@ -24,9 +24,12 @@

# Define the path to the log file
LOG_FILE_PATH = 'worker.txt'
DEBUG = os.getenv('DEBUG', 'false').lower() == 'true'

def log_message(message: str):
"""Append a message to the log file with a timestamp."""
if not DEBUG:
return
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
with open(LOG_FILE_PATH, 'a') as log_file:
log_file.write(f"{timestamp} - {message}\n")
@@ -101,8 +104,6 @@ async def publish_static_image():

tts_provider = os.getenv('01_TTS', '').lower()
stt_provider = os.getenv('01_STT', '').lower()
tts_provider='elevenlabs'
stt_provider='deepgram'

# Add plugins here
if tts_provider == 'openai':
@@ -170,6 +171,8 @@ def _before_llm_cb(

else:
async def process_query():
log_message(f"[before_llm_cb] processing query in VAD with chat_ctx: {chat_ctx}")

if remote_video_processor and not video_muted:
video_frame = await remote_video_processor.get_current_frame()
if video_frame:
@@ -185,7 +188,7 @@ async def process_query():
return process_query()

############################################################
# on_message_received implementation
# on_message_received helper
############################################################
async def _on_message_received(msg: str):
nonlocal push_to_talk
@@ -318,9 +321,39 @@ def on_track_unmuted(participant: rtc.RemoteParticipant, publication: rtc.TrackP
video_muted = False
log_message(f"Track unmuted: {publication.kind}")


############################################################
# on data received callback
############################################################
async def _publish_clear_chat():
local_participant = ctx.room.local_participant
await local_participant.publish_data(payload="{CLEAR_CHAT}", topic="chat_context")
log_message("sent {CLEAR_CHAT} to chat_context for client to clear")
await assistant.say(assistant.start_message)


@ctx.room.on("data_received")
def on_data_received(data: rtc.DataPacket):
decoded_data = data.data.decode()
log_message(f"received data from {data.topic}: {decoded_data}")
if data.topic == "chat_context" and decoded_data == "{CLEAR_CHAT}":
assistant.chat_ctx.messages.clear()
assistant.chat_ctx.append(
role="system",
text=(
"Only take into context the user's image if their message is relevant or pertaining to the image. Otherwise just keep in context that the image is present but do not acknowledge or mention it in your response."
),
)
log_message(f"cleared chat_ctx")
log_message(f"chat_ctx is now {assistant.chat_ctx}")

asyncio.create_task(_publish_clear_chat())


############################################################
# Start the voice assistant with the LiveKit room
############################################################

assistant = VoicePipelineAgent(
vad=silero.VAD.load(),
stt=stt,
@@ -336,6 +369,21 @@ def on_track_unmuted(participant: rtc.RemoteParticipant, publication: rtc.TrackP
# Greets the user with an initial message
await assistant.say(start_message, allow_interruptions=True)

############################################################
# wait for the voice assistant to finish
############################################################
@assistant.on("agent_started_speaking")
def on_agent_started_speaking():
asyncio.create_task(ctx.room.local_participant.publish_data(payload="{AGENT_STARTED_SPEAKING}", topic="agent_state"))
log_message("Agent started speaking")
return

@assistant.on("agent_stopped_speaking")
def on_agent_stopped_speaking():
asyncio.create_task(ctx.room.local_participant.publish_data(payload="{AGENT_STOPPED_SPEAKING}", topic="agent_state"))
log_message("Agent stopped speaking")
return


def main(livekit_url: str):
# Workers have to be run as CLIs right now.