Big updates

younesbram · May 24, 2024 · 1092e2b · 1092e2b
1 parent 3c42da5
commit 1092e2b
Show file tree

Hide file tree

Showing 3 changed files with 217 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.streamlit/
+.env
diff --git a/app.py b/app.py
@@ -1,41 +1,119 @@
+import os
+from dotenv import load_dotenv
 import streamlit as st
 import openai
 from PIL import Image
 import requests
 from io import BytesIO
-
-# Replace with your OpenAI API key
-openai.api_key = st.secrets["OPENAI_API_KEY"]
+import base64
+from groq import Groq
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Retrieve API keys from environment variables and secrets
+openai_api_key = st.secrets["OPENAI_API_KEY"]
+elevenlabs_api_key = st.secrets["EV_API_KEY"]
+groq_api_key = st.secrets["GROQ_API_KEY"]
+jerry_voice = st.secrets["JERRY_VOICE"]
+kramer_voice = st.secrets["KRAMER_VOICE"]
+george_voice = st.secrets["GEORGE_VOICE"]
+larry_david_voice = st.secrets["LARRY_DAVID_VOICE"]
+elaine_voice = st.secrets["ELAINE_VOICE"]
+newman_voice = st.secrets["NEWMAN_VOICE"]
+leon_voice = st.secrets["LEON_VOICE"]
+jeff_voice = st.secrets["JEFF_VOICE"]
+
+if not groq_api_key:
+    raise ValueError("GROQ_API_KEY environment variable not set")
 
 st.set_page_config(
-    page_title="AI Comedy",
+    page_title="AI Skit Generator",
     page_icon="😂",
-    layout="wide",
     initial_sidebar_state="expanded",
     menu_items={
         "Get Help": "https://www.github.com/younesbram/aicomedy",
         "Report a bug": "https://www.younes.ca/contact",
-        "About": "# AI Comedy\nAn app that uses NLP to generate hilarious skits!",
+        "About": "# AI Skit Generator\nAn app that uses AI to generate hilarious skits!",
     },
 )
 
-def generate_joke(topic, characters):
-    # A faked few-shot conversation to prime the model into becoming a sarcastic comedian selected earlier
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system",
-                "content": f"You are a extremely funny and extremely sarcastic comedian writer tasked with preserving {', '.join(characters)} jokes and delivering the same style punchlines in your short skits. You will respond in a script that includes {', '.join(characters)}"},
-            {"role": "user",
-                "content": f"the topic is: {topic}. only respond as previous instructions and reply only with character names that I gave you followed by their script(make the responses deeply affected by the character's portrayed personality on their respective shows). Do not add any extra characters."},
-        ],
-        temperature=0.66666666666666666666666420,
+client = Groq(api_key=groq_api_key)
+
+# Initialize session state variables
+if 'script' not in st.session_state:
+    st.session_state['script'] = None
+if 'intro_audio' not in st.session_state:
+    st.session_state['intro_audio'] = None
+if 'outro_audio' not in st.session_state:
+    st.session_state['outro_audio'] = None
+
+def generate_joke_with_groq(topic, characters):
+    messages = [
+        {"role": "system",
+            "content": f"You never give emotions in the script or pauses or descriptors or laughs. NO (pauses) NO (smirks) NO (laughs) NO ANY OF THAT ONLY THE TEXT!!!! You are an extremely funny and sarcastic comedian writer that knows Larry David and Jerry Seinfeld's writing styles, tasked with preserving {', '.join(characters)} jokes and delivering the same style punchlines in your short skits. You will respond in a script that includes {', '.join(characters)}."},
+        {"role": "user",
+            "content": f"The topic is: {topic}. Only respond as previous instructions and be extremely funny, like genius comedy. Do not add any extra characters. Do not add any descriptors like (pauses) or (excitedly) NO MATTER WHAT!  because I will be programmatically generating voice clips from the script. So anything like (sarcastically) or ANYTHING like that will destroy our whole moat and program. Take this seriously. INCLUDE EVERY CHARACTER SELECTED . {', '.join(characters)}"},
+    ]
+
+    stream = client.chat.completions.create(
+        messages=messages,
+        model="llama3-8b-8192",
+        temperature=0.420,
+        max_tokens=1024,
+        top_p=1,
+        stop=None,
+        stream=True,
     )
 
-    # Get the generated text from the response
-    generated_text = response['choices'][0]['message']['content']
+    generated_text = ""
+    for chunk in stream:
+        if chunk.choices[0].delta.content:
+            generated_text += chunk.choices[0].delta.content
+            st.session_state['script'] = generated_text
     return generated_text
 
+def generate_joke(topic, characters, use_gpt4=False, image_data=None):
+    if use_gpt4 and image_data:
+        st.info("Generating script with GPT-4 Vision. This might take a few moments...  API costs aren't free, you know! Consider following my [@didntdrinkwater](https://twitter.com/didntdrinkwater) and GitHub: [@younesbram](https://www.github.com/younesbram) as a form of compensation. 😂  ")
+        img_str = base64.b64encode(image_data).decode('utf-8')
+        messages = [
+            {"role": "system",
+                "content": f"You are an extremely funny and sarcastic comedian writer that knows Larry David and Jerry Seinfeld's writing styles, tasked with preserving {', '.join(characters)} jokes and delivering the same style punchlines in your short skits. You will respond in a script that includes {', '.join(characters)}."},
+            {"role": "user",
+                "content": f"The topic is: {topic}. Only respond as previous instructions and be extremely funny, like genius comedy. Reply only with character names that I gave you followed by their script (make the responses deeply affected by the character's portrayed personality on their respective shows). Do not add any extra characters or descriptors or anything like sarcastically or laughs or whatever, ONLY the text to be voiced.."},
+            {"role": "user",
+                "content": [
+                    {"type": "text", "text": "Please use the following image as context."},
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_str}"}}
+                ]}
+        ]
+
+        data = {
+            "model": "gpt-4o",
+            "messages": messages,
+            "temperature": 0.66666666666666666666666420,
+        }
+
+        api_url = "https://api.openai.com/v1/chat/completions"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {openai_api_key}"
+        }
+        response = requests.post(api_url, headers=headers, json=data)
+
+        try:
+            response_data = response.json()
+            generated_text = response_data['choices'][0]['message']['content']
+            st.session_state['script'] = generated_text
+            st.write("Generated script with image context:")
+            st.write(generated_text)
+            return generated_text
+        except KeyError as e:
+            st.error(f"Failed to generate script: {response_data}")
+            return ""
+    else:
+        return generate_joke_with_groq(topic, characters)
 
 def create_video_html(video_path_webm, video_path_mp4, width=None, height=None):
     width_attribute = f'width="{width}"' if width else ""
@@ -52,7 +130,7 @@ def create_video_html(video_path_webm, video_path_mp4, width=None, height=None):
         }}
         @media only screen and (max-width: 480px) {{
             .video-container video {{
-                width: 50%; /* Change this value to the desired width for smaller screens */
+                width: 50%;
                 height: auto;
             }}
         }}
@@ -65,7 +143,6 @@ def create_video_html(video_path_webm, video_path_mp4, width=None, height=None):
     </div>
     """
 
-
 def load_image(url=None, path=None):
     if url:
         response = requests.get(url)
@@ -74,11 +151,82 @@ def load_image(url=None, path=None):
         img = Image.open(path)
     return img
 
-
-st.title("Seinfeld gpt-3.5 Joke Generator")
-
+def generate_voice(character_name, text):
+    voice_id = {
+        "jerry": jerry_voice,
+        "kramer": kramer_voice,
+        "george": george_voice,
+        "elaine": elaine_voice,
+        "newman": newman_voice,
+        "larry_david": larry_david_voice,
+        "leon": leon_voice,
+        "jeff": jeff_voice
+    }.get(character_name.lower())
+
+    if not voice_id:
+        return None
+
+    headers = {
+        "Accept": "audio/mpeg",
+        "Content-Type": "application/json",
+        "xi-api-key": elevenlabs_api_key
+    }
+    data = {
+        "text": text,
+        "model_id": "eleven_multilingual_v2",
+        "voice_settings": {
+            "stability": 0.5,
+            "similarity_boost": 0.6
+        }
+    }
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
+
+    response = requests.post(url, json=data, headers=headers)
+    if response.status_code == 200:
+        return response.content
+    else:
+        st.error(f"Failed to generate audio for {character_name}: {response.text}")
+        return None
+
+def stitch_audio_segments(audio_segments):
+    combined_audio = b""
+    for segment in audio_segments:
+        combined_audio += segment
+    with open("combined_audio.mp3", "wb") as f:
+        f.write(combined_audio)
+    return "combined_audio.mp3"
+
+def generate_audio_script(script):
+    lines = script.split("\n")
+    audio_segments = []
+
+    st.info("API costs aren't free, you know! Consider following my [@didntdrinkwater](https://twitter.com/didntdrinkwater) and GitHub: [@younesbram](https://www.github.com/younesbram) as a form of compensation. 😂")
+    progress_bar = st.progress(0)
+
+    for i, line in enumerate(lines):
+        st.write(f"Processing line: {line}")
+        if ":" in line:
+            character, text = line.split(":", 1)
+            st.write(f"Generating voice for {character.strip()} with text: {text.strip()}")
+            audio_segment = generate_voice(character.strip(), text.strip())
+            if audio_segment:
+                audio_segments.append(audio_segment)
+            else:
+                st.write(f"Failed to generate audio segment for {character.strip()}")
+        progress_bar.progress((i + 1) / len(lines))
+
+    if audio_segments:
+        audio_file_path = stitch_audio_segments(audio_segments)
+        return audio_file_path
+    return None
+
+st.title("AI Skit Generator")
+
+# Topic input and optional image upload
 topic = st.text_input("Enter a topic:")
+uploaded_image = st.file_uploader("Upload an image (optional)", type=["jpg", "jpeg", "png"])
 
+# Character selection
 seinfeld_characters = ["jerry", "kramer", "george", "elaine", "newman"]
 curb_characters = ["larry_david", "leon", "jeff"]
 characters = {
@@ -168,7 +316,6 @@ def load_image(url=None, path=None):
             st.markdown(video_html, unsafe_allow_html=True)
             char_info["selected"] = st.checkbox(char_info["name"])
 
-
 selected_characters = [char_info["name"]
                        for char_info in characters.values() if char_info["selected"]]
 if st.button("Generate script"):
@@ -177,7 +324,7 @@ def load_image(url=None, path=None):
     num_curb_chars = sum(char_key in curb_characters for char_key,
                          char_info in characters.items() if char_info["selected"])
 
-    if topic and len(selected_characters) > 1:
+    if (topic or uploaded_image) and len(selected_characters) > 1:
 
         # Determine which show's intro and outro to play based on the counts of selected characters
         if num_seinfeld_chars > num_curb_chars:
@@ -187,48 +334,60 @@ def load_image(url=None, path=None):
             intro_audio = open('sounds/introcurb.mp3', 'rb').read()
             outro_audio = open('sounds/outrocurb.mp3', 'rb').read()
 
+        # Store intro and outro audio in session state
+        st.session_state['intro_audio'] = intro_audio
+        st.session_state['outro_audio'] = outro_audio
+
         # Play the intro audio while the user waits
         st.audio(intro_audio, format="audio/mp3")
 
         # Add a spinner with a message while generating the script
-        with st.spinner("Click the audio for a classic intro based on the characters you selected. Generating script..."):
-            generated_script = generate_joke(topic, selected_characters)
+        with st.spinner("Generating script... This might take a few moments..."):
+            image_data = None
+            use_gpt4 = False
+            if uploaded_image:
+                image_data = uploaded_image.read()
+                use_gpt4 = True
+            generated_script = generate_joke(topic, selected_characters, use_gpt4, image_data)
 
+        st.session_state['script'] = generated_script
         st.write(generated_script)
 
-        # Display the laugh videos
-        # Set the desired height in pixels for laugh videos
-        laugh_video_height = 166.666666666666666666666666666666666666666666666666420666666666666666666666666666666666
-        # Create a container for the laugh videos
-        laugh_videos_container = st.container()
+# Add button to generate audio if script is generated
+if st.session_state.get('script'):
+    if st.button("Generate Audio"):
+        with st.spinner("Generating audio..."):
+            audio_file_path = generate_audio_script(st.session_state['script'])
+            if audio_file_path:
+                st.audio(audio_file_path, format="audio/mp3")
+            else:
+                st.error("Failed to generate audio.")
+
+        st.info("Follow me on my Twitter: [@didntdrinkwater](https://twitter.com/didntdrinkwater) and GitHub: [@younesbram](https://www.github.com/younesbram)")
 
-        # Create columns for each laugh video
-        num_laugh_videos = 3  # TODO : Make the unchecked characters the ones laughing
+        # Display the laugh videos after generating the audio
+        laugh_videos_container = st.container()
+        laugh_video_height = 166.67
+        num_laugh_videos = 3
         laugh_videos_cols = laugh_videos_container.columns(num_laugh_videos)
-
-        # Initialize column index
         col_index = 0
-
         laugh_video_characters = ["kramer", "george", "larry_david"]
         for char_key in laugh_video_characters:
             char_info = characters[char_key]
             if "laugh_video_webm" in char_info and "laugh_video_mp4" in char_info:
                 laugh_video_webm = char_info["laugh_video_webm"]
                 laugh_video_mp4 = char_info["laugh_video_mp4"]
 
-            # Add laugh video to the corresponding column
-            with laugh_videos_cols[col_index]:
-                laugh_video_html = create_video_html(
-                    laugh_video_webm, laugh_video_mp4, width=220, height=laugh_video_height)
-                st.markdown(laugh_video_html, unsafe_allow_html=True)
-
-            # Increment the column index
-            col_index += 1
+                with laugh_videos_cols[col_index]:
+                    laugh_video_html = create_video_html(
+                        laugh_video_webm, laugh_video_mp4, width=220, height=laugh_video_height)
+                    st.markdown(laugh_video_html, unsafe_allow_html=True)
 
-        st.markdown(
-            "Follow me on my Twitter: [@didntdrinkwater](https://twitter.com/didntdrinkwater) and GitHub: [@younesbram](https://www.github.com/younesbram)")
-        # Play the outro audio
-        st.audio(outro_audio, format="audio/mp3")
+                col_index += 1
 
+st.markdown(
+    "Follow me on my Twitter: [@didntdrinkwater](https://twitter.com/didntdrinkwater) and GitHub: [@younesbram](https://www.github.com/younesbram)")
+if st.session_state.get('outro_audio'):
+    st.audio(st.session_state['outro_audio'], format="audio/mp3")
 else:
-    st.write("Please provide a topic and select at least two characters.")
+    st.write("Please provide a topic or upload an image and select at least two characters.")
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,6 @@
-openai
 streamlit
-audioread
-bokeh
+openai
+Pillow
+requests
+groq
+python-dotenv