diff --git a/.gitignore b/.gitignore index 1ac4c6e..f81b2e6 100644 --- a/.gitignore +++ b/.gitignore @@ -32,7 +32,6 @@ uploads/_1_1_1 uploads/_1_1 uploads/_1 uploads/_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1 -TreeHugger_Exam_ans.csv Test_Results/results_test_12-29-23_log.txt Test_Results/results_test_12-29-23.csv Test_Results/results_test_1-6-24_log.txt @@ -40,3 +39,7 @@ Test_Results/results_test_1-6-24.csv Test_Results/results_test_1-3-24_log.txt Test_Results/results_test_1-3-24.csv Test_Results/results_test_1-2-24_log.txt + +venv/ +__pycache__/ +__pycache__/*.pyc \ No newline at end of file diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..ac05e7f --- /dev/null +++ b/src/app.py @@ -0,0 +1,40 @@ +# __import__('pysqlite3') +# import sys +# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') +# import sqlite3 + +import streamlit as st +from components.frontend.chat import Chat_UI +from components.frontend.sidebar import Sidebar +from components.backend.pipeline.pipeline import Pipeline +import os +import uuid + + +st.set_page_config(layout='wide') + + +@st.cache_resource +def initalize(): + pipeline = Pipeline() + return pipeline, Sidebar(pipeline), Chat_UI(pipeline) + +class UI: + def __init__(self): + self._pipeline, self.sidebar, self.chat = initalize() + st.session_state['documents'] = [0] + st.session_state['user_id'] = str(uuid.uuid4()) + st.session_state['api_key'] = "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" + + if 'messages' not in st.session_state: + st.session_state['messages'] = [] + + def render(self): + self.sidebar() + self.chat() + +def main(): + UI().render() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/assets/eugenie.png b/src/assets/eugenie.png new file mode 100644 index 0000000..7fc9ffc Binary files /dev/null and b/src/assets/eugenie.png differ diff --git a/src/components/backend/pipeline/llm.py b/src/components/backend/pipeline/llm.py new file mode 100644 index 0000000..b3b5eba --- /dev/null +++ b/src/components/backend/pipeline/llm.py @@ -0,0 +1,13 @@ +from components.backend.tools.python_interpreter import PythonInterpreter +from components.backend.tools.arxiv_search import ArxivSearch +from components.backend.tools.calculator import Calculator +from components.backend.tools.web_search import WebSearch +from langchain_openai import ChatOpenAI + +import os, re, json + +class LLM: + def __init__(self, temperature=0.0001): + self.llm = ChatOpenAI(model_name='gpt-4', temperature=temperature) + + \ No newline at end of file diff --git a/src/components/backend/pipeline/pipeline.py b/src/components/backend/pipeline/pipeline.py new file mode 100644 index 0000000..6ab026e --- /dev/null +++ b/src/components/backend/pipeline/pipeline.py @@ -0,0 +1,38 @@ +from components.backend.pipeline.vectorstore import VectorStore +from components.backend.pipeline.llm import LLM + +import os, io + +from components.backend.tools.python_interpreter import PythonInterpreter +from components.backend.tools.arxiv_search import ArxivSearch +from components.backend.tools.calculator import Calculator +from components.backend.tools.web_search import WebSearch + +from langchain.agents import initialize_agent + +os.environ["OPENAI_API_KEY"] = "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" +os.environ['PINECONE_API_KEY'] = "204755b4-f7d8-4aa4-b16b-764e66796cc3" +os.environ["GOOGLE_API_KEY"] = "AIzaSyDKxAadUfBZ9oAMDlRjRe0jlp3N0oZKqvg" +os.environ["GOOGLE_CSE_ID"] = "57d010b1a25ce48c0" + +class Pipeline: + def __init__(self, max_iterations=5): + self.llm = LLM() + self.vectorstore = VectorStore() + self.tools = [ + PythonInterpreter(llm=self.llm.llm).initialize(), + ArxivSearch().initialize(), + Calculator(llm=self.llm.llm).initialize(), + WebSearch(llm=self.llm.llm, vectorstore_public=self.vectorstore.vectorstore).initialize(), + ] + + self.agent = initialize_agent(self.tools, + self.llm.llm, + agent="chat-conversational-react-description", + verbose=True, + handle_parsing_errors=True, + max_iterations=max_iterations + ) + + def run(self, query, chat_history): + return self.agent.invoke({'input': query.strip(), 'chat_history': chat_history}) \ No newline at end of file diff --git a/src/components/backend/pipeline/vectorstore.py b/src/components/backend/pipeline/vectorstore.py new file mode 100644 index 0000000..d46fc1b --- /dev/null +++ b/src/components/backend/pipeline/vectorstore.py @@ -0,0 +1,15 @@ +import chromadb +from langchain_community.vectorstores import Chroma +from langchain_openai import OpenAIEmbeddings + +class VectorStore: + def __init__(self): + self.chroma_client = chromadb.Client() + _ = self.chroma_client.create_collection(name="user") + self.embeddings_model = OpenAIEmbeddings() + + self.vectorstore = Chroma( + client=self.chroma_client, + collection_name="user", + embedding_function=self.embeddings_model, + ) diff --git a/src/components/backend/tools/arxiv_search.py b/src/components/backend/tools/arxiv_search.py new file mode 100644 index 0000000..c612ce2 --- /dev/null +++ b/src/components/backend/tools/arxiv_search.py @@ -0,0 +1,15 @@ +from langchain.utilities import ArxivAPIWrapper +from langchain.tools import Tool +from pydantic import BaseModel, Field + + +class ArxivSearch: + def __init__(self): + self.arxiv = ArxivAPIWrapper() + + def initialize(self): + return Tool.from_function( + func=self.arxiv.run, + name="arxiv", + description="useful for when you need to answer research based questions or find scientific documents or papers", + ) \ No newline at end of file diff --git a/src/components/backend/tools/calculator.py b/src/components/backend/tools/calculator.py new file mode 100644 index 0000000..277e123 --- /dev/null +++ b/src/components/backend/tools/calculator.py @@ -0,0 +1,14 @@ +from langchain.chains import LLMMathChain +from langchain.tools import Tool +from pydantic import BaseModel, Field + +class Calculator: + def __init__(self, llm): + self.llm = llm + + def initialize(self): + return Tool.from_function( + func=LLMMathChain.from_llm(llm=self.llm, verbose=True).run, + name="Calculator", + description="useful for when you need to answer questions about math", + ) \ No newline at end of file diff --git a/src/components/backend/tools/python_interpreter.py b/src/components/backend/tools/python_interpreter.py new file mode 100644 index 0000000..86891cf --- /dev/null +++ b/src/components/backend/tools/python_interpreter.py @@ -0,0 +1,40 @@ +from langchain_core.output_parsers import StrOutputParser +from langchain_experimental.utilities import PythonREPL +from langchain.tools import Tool +from langchain_core.prompts import ChatPromptTemplate + +class PythonInterpreter: + def __init__(self, llm): + self.llm = llm + + def _sanitize_output(self, text: str): + _, after = text.split("```python") + return after.split("```")[0] + + def python_interpreter(self, query): + template = """Write some python code to solve the user's problem. + + Return only python code in Markdown format, e.g.: + + ```python + .... + ```""" + prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{input}")]) + chain = prompt | self.llm | StrOutputParser() | self._sanitize_output | PythonREPL().run + output = chain.invoke({"input": query}) + print("Python interpreter") + print(output) + return output + + def initialize(self): + return Tool.from_function( + func=self.python_interpreter, + name="python_interpreter", + description="""The Python Code Generator Tool is a sophisticated utility designed to craft Python code solutions for a wide array of questions. When provided with a question, this tool leverages advanced algorithms to generate concise and efficient Python code snippets as answers. + + Usage Instructions: + + Pose a question requiring a Python code solution. + If existing tools are deemed insufficient for the task, instruct the Assistant to utilize the Python Code Generator Tool. + Expect a response in the form of a Markdown-formatted Python code block, enclosed within triple backticks.""", + ) \ No newline at end of file diff --git a/src/components/backend/tools/web_search.py b/src/components/backend/tools/web_search.py new file mode 100644 index 0000000..ca53a6e --- /dev/null +++ b/src/components/backend/tools/web_search.py @@ -0,0 +1,22 @@ +from langchain.tools import Tool +from langchain.chains import RetrievalQAWithSourcesChain +from langchain.retrievers.web_research import WebResearchRetriever +from langchain.utilities import GoogleSearchAPIWrapper + +class WebSearch: + def __init__(self, llm, vectorstore_public): + self.llm = llm + self.search = GoogleSearchAPIWrapper() + self.web_retriever = WebResearchRetriever.from_llm( + vectorstore=vectorstore_public, + llm=self.llm, + search=self.search, + num_search_results=3 + ) + + def initialize(self): + return Tool.from_function( + func=RetrievalQAWithSourcesChain.from_chain_type(llm=self.llm, retriever=self.web_retriever), + name="web_QA", + description="web_QA is a web searching tool for the LLM agent, triggered when the similarity score from in-context QA is too low. It dynamically integrates the LLM and a web retriever to broaden knowledge through targeted web searches, enhancing the agent's responsiveness and adaptability to diverse user queries", + ) \ No newline at end of file diff --git a/src/components/frontend/chat.py b/src/components/frontend/chat.py new file mode 100644 index 0000000..26aa6f6 --- /dev/null +++ b/src/components/frontend/chat.py @@ -0,0 +1,160 @@ +import streamlit as st +import os, re, json +import base64 +import extra_streamlit_components as stx +from annotated_text import annotated_text +import datetime +from langchain_core.messages import AIMessage, HumanMessage + +@st.cache_resource(experimental_allow_widgets=True) +def get_manager(): + return stx.CookieManager() + +class CookieManager: + def __init__(self, cookie_name = 'messages'): + self.manager = get_manager() + self.cookie_name = cookie_name + + def __call__(self): + _ = self.manager.get_all() + + def get(self): + return self.manager.get(cookie=self.cookie_name) + + def set(self, value): + self.manager.set(self.cookie_name, value) + + def delete(self): + self.manager.delete(cookie=self.cookie_name) + +class Chat_UI: + def __init__(self, pipeline): + self.pipeline = pipeline + self.cookie_manager = CookieManager() + + def render(self): + self.chat() + + def initiate_memory(self): + history = self.get_messages() + + print('History: ', history) + if not history: + st.session_state['messages'] = [{"role": "assistant", "content": "Hello! The name's euGenio. I'm here to help you with your pipeline. Ask me a question!"}] + else: + st.session_state['messages'] = history + + def append(self, message:dict): + st.session_state['messages'].append(message) + + def __call__(self): + self.cookie_manager() + # Instantiates the chat history + self.initiate_memory() + self.load_memory() + + # Load's the text tab + self.load_chatbox() + + def load_chatbox(self): + user_input = st.text_input("*Got a question?*", help='Try to specify keywords and intent in your question!', key="text", on_change=self.handle_query) + + if st.button('Delete History', use_container_width=True, type='primary'): + self.delete_messages() + + def load_memory(self): + messages = st.session_state['messages'] + if messages: + for message in messages : + role = message["role"] + content = message["content"] + + with st.chat_message(role): + if type(content) == dict and role == 'assistant': + with st.expander("Thought Process!", expanded=True): + st.json(content) + + else: + + st.markdown(content) + + def format_history(self): + messages = st.session_state['messages'] + + if messages: + formatted = [] + for message in messages[1:]: + if message['role'] == 'user': + formatted.append(HumanMessage(content=str(message['content']))) + else: + formatted.append(AIMessage(content=str(message['content']))) + return formatted + else: + return [] + + def handle_query(self): + text = st.session_state["text"] + st.session_state["text"] = "" + + user_message = {"role": "user", "content": text} + self.append(user_message) + + with st.chat_message("user"): + st.markdown(text) + + with st.chat_message("assistant"): + idx, tool = 0, None + + with st.spinner('Thinking...'): + results = self.pipeline.run(query=text, chat_history=self.format_history()) + + st.markdown(results['output']) + idx += 1 + + assistant_message = {"role": "assistant", "content": {key: value for key, value in results.items() if key != 'chat_history'}} + + self.append(assistant_message) + self.store_messages(user_message, assistant_message) + + def store_messages(self, user_message, assistant_message): + past = self.cookie_manager.get() + + if past: + if user_message not in past and assistant_message not in past: + past.append(user_message) + past.append(assistant_message) + self.cookie_manager.set(past) + else: + self.cookie_manager.set(st.session_state.messages) + + def get_messages(self): + return self.cookie_manager.get() + + def delete_messages(self): + self.cookie_manager.delete() + self.initiate_memory() + + def _annotated_parser(self, text): + pattern = r'\[(.*?)\]' + + annotated_parts = [] + last_end = 0 + + for match in re.finditer(pattern, text): + start, end = match.span() + + annotated_parts.append(text[last_end:start]) + + bracketed_text = match.group(1) + annotated_parts.append((bracketed_text, 'important')) + + last_end = end + + annotated_parts.append(text[last_end:]) + + return tuple(annotated_parts) + + +class CookieTester: + def __init__(self): + self.cookie = None \ No newline at end of file diff --git a/src/components/frontend/sidebar.py b/src/components/frontend/sidebar.py new file mode 100644 index 0000000..026a507 --- /dev/null +++ b/src/components/frontend/sidebar.py @@ -0,0 +1,102 @@ +import streamlit as st +import base64, os +import time +import io + +class Sidebar: + def __init__(self, pipeline): + self.pipeline = pipeline + + def __call__(self): + with st.sidebar: + st.markdown( + """ + + """, unsafe_allow_html=True + ) + + + st.image('assets/eugenie.png', width=250) + + disabled = True + if 'api_key' not in st.session_state: + disabled = False + key = st.sidebar.text_input('', placeholder ='Input your OpenAI API Key: ', type='password', label_visibility='hidden', key='api_key_input', disabled=disabled) + if key: + st.session_state['api_key'] = key + st.sidebar.success('API Key Successfully Added!') + st.sidebar.divider() + + self._upload_widget() + self._show_tools() + + def _upload_widget(self): + + upload_expander = st.sidebar.expander("File Uploader", expanded=True) + with upload_expander: + pdf_docs = st.file_uploader('Select Files to Upload', accept_multiple_files=True, type=['pdf', 'txt', 'png', 'jpg']) + if st.button('Start Upload'): + for pdf in pdf_docs: + file_details = {'Filename': pdf.name, 'FileType': pdf.type, 'FileSize': pdf.size} + + progress_text = 'Checking File...' + my_bar = st.progress(0, text=progress_text) + percent_complete = 0 + + if pdf.type == "application/pdf": + percent_complete += 10 + my_bar.progress(percent_complete, text=progress_text) + progress_text = 'Reading File...' + text_blocks = self.pipeline('add', {'pdf': pdf.read()}) + percent_complete += 40 + progress_text = 'Processing File...' + my_bar.progress(percent_complete, text=progress_text) + self.pipeline('process', {'text_blocks': text_blocks, 'filename': pdf.name}) + percent_complete += 50 + my_bar.progress(percent_complete, text="Finalizing...") + st.success(f'File Successfully Processed!') + my_bar.empty() + del my_bar + st.session_state['documents'][0] += 1 + + def _show_tools(self): + tools = st.sidebar.expander("Tools", expanded=True) + with tools: + options = st.selectbox( + 'List of available tools: ', + ('Chroma-DB', 'Web-Search', 'arXiv-Search', 'Calculator-App', 'Python-Interpreter')) + + if options == 'Chroma-DB': + st.markdown(""" + ## Chroma-DB + Chroma HTTP.Client object class can be used to retrieve documents with metadata based on a corresponding query embedding + """) + elif options == 'Web-Search': + st.markdown(""" + ## Web-Search + A module which can search the web and just return the results + """) + elif options == 'arXiv-Search': + st.markdown(""" + ## arXiv-Search + A module which can search arXiv's research repository with abstracts, papers, and authors. + """) + elif options == 'Calculator-App': + st.markdown(""" + ## Calculator-App + A module to which can you send in a formula in the form of a string + """) + elif options == 'Python-Interpreter': + st.markdown(""" + ## Python-Interpreter + A module to which can you send in code as a string with delimiters, and get output back + """) \ No newline at end of file