forked from alonzorworks/streamlit_ask_data_app
-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
124 lines (83 loc) · 5.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
import streamlit as st
from langchain.chat_models import ChatOpenAI
from langchain.agents import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from my_functions import load_csv, generate_response, other_inputs
import my_functions as mf
from tempfile import NamedTemporaryFile
import statistics
import plotly
import matplotlib
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LinearRegression
import bokeh
import altair
from streamlit_lottie import st_lottie
from streamlit_folium import folium_static
import json
from PIL import Image
# Page Configuration
st.set_page_config(
page_title= "Robo Data Analyzer",
page_icon= "🤖"
)
# Page Setup
#Image In Sidebar
with st.sidebar.container():
image = Image.open(r"images/pictures/ahead_transparent_edit2.png")
st.image(image, use_column_width=True)
# Import Downloaded JSON
def import_json(path):
with open(path, "r", encoding="utf8", errors="ignore") as file:
url = json.load(file)
return url
# Documentation https://platform.openai.com/docs/api-reference/fine-tunes/create?lang=python
# Todo list
# See how he gets more outputs for llms
# Create a plotting page for charts
# Create a mapping page
# Create an illustration page
# We will use this to satiate the other inputs function.
question_list = [
"How many rows are in this dataset?",
"How many columns are in this dataset?",
"Other"
]
st.title("Robo-Analytical Assistant 🤖🧮")
data_oracle = import_json(r"lottie_files/robo_oracle.json")
st_lottie(data_oracle, height = 400, key = "oracle")
st.subheader("Project Introduction")
st.write("""This project will show the power of utilizing Language Learning Models (LLMs) in jumpstarting data analytics by answering questions. Please note that visualization is not possible using this page. You will have to go to the appropriate page to create visualization.
We will be using OpenAI to enable the LLM's capability. In order for this project to work you will need an OpenAI key.
This key can be generated by visiting OpenAI's platform at this [link](https://platform.openai.com/account/api-keys).
""")
with st.expander("Pros and Cons of Using LLM for Analytics❕❗"):
st.write("There are pro and cons for using Generated Pretrained Transformers (GPT) for helps in data analytics. (GPT is a specific type of LLM that was created by OpenAI). A pro would be that certain questions can easily be answered without having to write a single line of code. Another benefit about GPT is that stakeholders can derive additional insight from datasets even if they do not have advanced technology skills nor deep analytical acumen when it comes to data.")
st.write("There are some downsides when it comes to using LLMs for data analytics. A con is that there are some rudimentary questions that the LLMs are not able to answer. Also it is possible for the LLMs to give erroneous answers occasionally. Despite the aforementioned and other drawbacks utlizing LLM and other forms of artificial intelligence (AI) can improve the lives of analysts and stakeholders alike.")
st.write("""Another aspect that people should consider is data security. One positive thing about using OpenAI APIs is that their model is not trained on information that is submitted. However at the time of writing OpenAI has a retention period for queries and other information to make sure its powerful GPT capabilities are not being used in a way that does not breach its Terms of Service or laws of the land.
For these reasons it may be best for industries with very sensitive datasets to develop their own Language Learning Models if they do not want to perpetually forgo these innovations perpetually altogether.
""")
# Collect CSV From User Here
input_file = st.file_uploader("Browse for a CSV file:", type = ["csv"])
# This Boolean stops the generate_response line from breaking.
# We will see that the llm generator will only become ready when the df is both loaded and configured.
generator_ready = False
# Loading the file for analytics
# Note streamlit allows users to upload files however once they are utilized once the dataframe is deleted
# This is why it is best to make a copy of the dataframe and to utilize the copy instead to prevent errors in this case it is df2.
if input_file is not None:
dataframe = load_csv(input_file)
# NOTE testing to see if we can get rid of these two lines of code
input_file.seek(0)
df = pd.read_csv(input_file, low_memory= False)
df2 = df
generator_ready = True
elif input_file is None:
st.info("Please upload a file on the above line to get started.")
# NOTE the necessary function is embedded in the previous if statement and is triggered once the file is uploaded.
# The uploading the file modifies the boolean and renders it true, allowing the next code to proceed without throwing an error.
if generator_ready is True:
other_inputs(question_list, df2)
#other_inputs(question_list, f.name)