-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
220 lines (181 loc) · 9.16 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import streamlit as st
from transformers import pipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFaceHub
import requests
from PIL import Image
import base64
# Convert an image to text using an image captioning model
def img2text(image):
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = pipe(image)[0]["generated_text"]
return text
# Generate a story from a scenario or input text
def generate_story(scenario, llm):
template = """You are a story teller. You get a scenario as an input text, and generates a short story out of it.
Context: {scenario}
Story:
"""
prompt = PromptTemplate(template=template, input_variables=["scenario"])
chain = LLMChain(prompt=prompt, llm=llm)
full_output = chain.predict(scenario=scenario)
story_start = full_output.find("Story:")
if story_start != -1:
story = full_output[story_start + 6:].strip() # +6 to skip "Story:"
else:
story = full_output.strip()
return story
# Convert text to speech
def text2speech(text, api_token):
API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
headers = {"Authorization": f"Bearer {api_token}"}
payload = {"inputs": text}
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code != 200:
st.error(f"Error in text-to-speech API: {response.content}")
return None
return response.content
# Text enhancement function (rewriting sentences like in a novel)
def enhance_text(input_text, llm):
template = """You are a professional novelist. Rewrite the following text to make it more vivid, descriptive, and novel-like.
Text: {input_text}
Enhanced Text:
"""
prompt = PromptTemplate(template=template, input_variables=["input_text"])
chain = LLMChain(prompt=prompt, llm=llm)
enhanced_text = chain.predict(input_text=input_text)
enhance_start = enhanced_text.find("Enhanced Text:")
if enhance_start != -1:
enhance = enhanced_text[enhance_start + 14:].strip() # +13 to skip
else:
enhance = enhanced_text.strip()
return enhance
# Describe an image like in a novel
def describe_image(image, llm):
# Generate multiple detailed captions using beam search and return sequences
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# Generate multiple captions to capture different aspects of the image
captions = pipe(image)[0]['generated_text']
# Use the LLM to expand the combined caption into a detailed, novel-like description
template = """You are a novelist. Given the following image description, rewrite it into a short novel-like description.
Basic Description: {captions}
Novel-like Description:
"""
prompt = PromptTemplate(template=template, input_variables=["captions"])
chain = LLMChain(prompt=prompt, llm=llm)
novel_description = chain.predict(captions=captions)
descript_start = novel_description.find("Novel-like Description:")
if descript_start != -1:
description = novel_description[descript_start + 23:].strip() # +12 to skip "Description:"
else:
description = captions.strip()
return description
# Streamlit app
def main():
st.set_page_config(page_title="TaleCraft", page_icon="✒️")
st.title("✒️ TaleCraft")
st.markdown("## Your Creative Writing Assistant")
with st.expander("💡About This Application"):
st.markdown("""
**Created by - Tribuvana Kartikeya G ([email protected])**
This application serves as a creative assistant for novelists and content writers,
offering tools to help transform images and text into compelling narratives.
Whether it's generating stories from visuals, describing scenes with vivid detail, or enhancing your writing, this app provides versatile features to support your novel-writing journey.
Explore the options in the sidebar to begin crafting your next masterpiece.
""")
# Sidebar options
st.sidebar.title("Choose an Option")
option = st.sidebar.selectbox("Select a function", ("Story Generation", "Image Description", "Text Enhancement"))
# API Token input
api_token = st.text_input("Enter your Hugging Face API Token:", type="password")
if not api_token:
st.warning("Please enter your Hugging Face API Token to proceed.")
return
# Hugging Face Hub model setup
repo_id = "tiiuae/falcon-7b-instruct"
try:
llm = HuggingFaceHub(huggingfacehub_api_token=api_token,
repo_id=repo_id,
verbose=False,
model_kwargs={"temperature":0.5, "max_new_tokens":500}) # Adjust tokens and temperature as needed
except ValueError as e:
st.error(f"Error: {str(e)}")
st.error("Please check if your API token is correct.")
return
if option == "Story Generation":
st.header("Story Generation")
# Option to either input text or use an image
input_choice = st.selectbox("Choose input type:", ["Text Input", "Image Upload"])
if input_choice == "Text Input":
input_text = st.text_area("Enter the scenario or text:")
if st.button('Generate Story'):
if input_text:
story = generate_story(input_text, llm)
st.write("Generated Story:", story)
audio_bytes = text2speech(story, api_token)
if audio_bytes:
# Save audio to a file
with open("temp_audio.wav", "wb") as f:
f.write(audio_bytes)
# Read the file and encode to base64
with open("temp_audio.wav", "rb") as f:
audio_base64 = base64.b64encode(f.read()).decode()
# Display audio player
st.markdown(f'<audio controls autoplay><source src="data:audio/wav;base64,{audio_base64}" type="audio/wav"></audio>', unsafe_allow_html=True)
# Provide download link
st.download_button(
label="Download Audio",
data=audio_bytes,
file_name="story_audio.wav",
mime="audio/wav"
)
else:
st.warning("Please enter some text.")
elif input_choice == "Image Upload":
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
if st.button('Generate Story'):
# Image to text
scenario = img2text(image)
st.write("Image Caption:", scenario)
# Generate story
story = generate_story(scenario, llm)
st.write("Generated Story:", story)
audio_bytes = text2speech(story, api_token)
if audio_bytes:
# Save audio to a file
with open("temp_audio.wav", "wb") as f:
f.write(audio_bytes)
# Read the file and encode to base64
with open("temp_audio.wav", "rb") as f:
audio_base64 = base64.b64encode(f.read()).decode()
# Display audio player
st.markdown(f'<audio controls autoplay><source src="data:audio/wav;base64,{audio_base64}" type="audio/wav"></audio>', unsafe_allow_html=True)
# Provide download link
st.download_button(
label="Download Audio",
data=audio_bytes,
file_name="story_audio.wav",
mime="audio/wav"
)
elif option == "Image Description":
st.header("Image Description")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
if st.button('Describe Image'):
description = describe_image(image,llm)
st.write(description)
elif option == "Text Enhancement":
st.header("Text Enhancement")
input_text = st.text_area("Enter the text you want to enhance:")
if st.button("Enhance Text"):
enhanced_text = enhance_text(input_text, llm)
st.write(enhanced_text)
if __name__ == "__main__":
main()