From 5e2eb15f60d89d1683ce769da185fa184ca7b114 Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Sun, 29 Sep 2024 17:16:44 +0900 Subject: [PATCH 01/44] preparare doundation --- examples/japanese-easy-demo/pyproject.toml | 1 + examples/use-rdb-resource/.env.template | 0 examples/use-rdb-resource/.gitignore | 1 + examples/use-rdb-resource/db_connector.py | 38 +++++++++++++++ examples/use-rdb-resource/db_index.py | 29 ++++++++++++ examples/use-rdb-resource/main.py | 39 +++++++++++++++ openssa/core/resource/db.py | 55 +++++++++++++++++++++- 7 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 examples/use-rdb-resource/.env.template create mode 100644 examples/use-rdb-resource/.gitignore create mode 100644 examples/use-rdb-resource/db_connector.py create mode 100644 examples/use-rdb-resource/db_index.py create mode 100644 examples/use-rdb-resource/main.py diff --git a/examples/japanese-easy-demo/pyproject.toml b/examples/japanese-easy-demo/pyproject.toml index f5dfe97ad..bd1860427 100644 --- a/examples/japanese-easy-demo/pyproject.toml +++ b/examples/japanese-easy-demo/pyproject.toml @@ -8,6 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.12,<3.13" openssa = "^0.24.3.12" +python-dotenv = "^1.0.1" [build-system] diff --git a/examples/use-rdb-resource/.env.template b/examples/use-rdb-resource/.env.template new file mode 100644 index 000000000..e69de29bb diff --git a/examples/use-rdb-resource/.gitignore b/examples/use-rdb-resource/.gitignore new file mode 100644 index 000000000..ac8a8fa20 --- /dev/null +++ b/examples/use-rdb-resource/.gitignore @@ -0,0 +1 @@ +db.config.yaml diff --git a/examples/use-rdb-resource/db_connector.py b/examples/use-rdb-resource/db_connector.py new file mode 100644 index 000000000..827d75ea6 --- /dev/null +++ b/examples/use-rdb-resource/db_connector.py @@ -0,0 +1,38 @@ +import yaml +from sqlalchemy import create_engine, text +from sqlalchemy.orm import sessionmaker + +class MySQLDatabase: + def __init__(self, config_path): + self.config_path = config_path + self.config = self.load_config() + self.engine = self.create_engine() + self.Session = sessionmaker(bind=self.engine) + + def load_config(self): + with open(self.config_path, 'r') as file: + return yaml.safe_load(file)['database']['mysql'] + + def create_engine(self): + username = self.config['username'] + password = self.config['password'] + host = self.config['host'] + port = self.config['port'] + database = self.config['database'] + connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' + return create_engine(connection_string) + + def get_session(self): + return self.Session() + + def get_events(self): + session = self.get_session() + result = session.execute(text("SELECT * FROM users")) + return result + +if __name__ == '__main__': + config_path = 'db_config.yaml' + db = MySQLDatabase(config_path) + result = db.get_events() + for row in result: + print(type(row), str(row)) diff --git a/examples/use-rdb-resource/db_index.py b/examples/use-rdb-resource/db_index.py new file mode 100644 index 000000000..1e58e6634 --- /dev/null +++ b/examples/use-rdb-resource/db_index.py @@ -0,0 +1,29 @@ +from db_connector import MySQLDatabase +from llama_index.core.readers.file.base import SimpleDirectoryReader +from llama_index.core.indices.vector_store.base import VectorStoreIndex +from llama_index.core.schema import BaseNode, Document, IndexNode, TransformComponent +from llama_index.embeddings.openai.base import OpenAIEmbedding, OpenAIEmbeddingMode, OpenAIEmbeddingModelType +from multiprocessing import cpu_count + +def create_index_from_data(table_data): + documents = [Document(text=str(record)) for record in table_data] + embed_model = OpenAIEmbedding(mode=OpenAIEmbeddingMode.SIMILARITY_MODE, model=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE, + embed_batch_size=100, dimensions=3072, additional_kwargs=None, + api_key=None, api_base=None, api_version=None, + max_retries=10, timeout=60, + reuse_client=True, callback_manager=None, default_headers=None, http_client=None, + num_workers=cpu_count()) + + index = VectorStoreIndex.from_documents(documents, embed_model=embed_model) + + return index + +if __name__ == '__main__': + config_path = 'db_config.yaml' + db = MySQLDatabase(config_path) + + table_data = db.get_events() + + index = create_index_from_data(table_data) + + index.storage_context.persist() diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py new file mode 100644 index 000000000..e51748e3c --- /dev/null +++ b/examples/use-rdb-resource/main.py @@ -0,0 +1,39 @@ +from pathlib import Path +from dotenv import load_dotenv +from openssa import DANA, FileResource + +load_dotenv() + +# いらなそう +LOCAL_CACHE_DOCS_DIR_PATH: Path = Path(__file__).parent / '.data' + + +def get_or_create_agent() -> DANA: + return DANA( + # ここを変更(リソース) + resources={FileResource(path=LOCAL_CACHE_DOCS_DIR_PATH)} + ) + + +def solve(question) -> str: + agent = get_or_create_agent() + try: + return agent.solve(problem=question) + except Exception as err: # pylint: disable=broad-exception-caught + return f'ERROR: {err}' + + +if __name__ == '__main__': + QUESTION = ( + # ここを変更(プロンプト) + 'Please tell me three dishes you recommend.' + 'Please limit the total salt content of the three dishes to less than 21.5g.' + 'Also, please make sure that the total amount of vegetables in the three dishes is at least 700g.' + 'Please answer in Japanese.' + ) + answer = solve(QUESTION) + +# 出力をわかりやすくしたいかも + print('--------------------------------') + print(answer) + print('--------------------------------') diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index fd1388418..d4f3ca076 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -2,15 +2,68 @@ ============================================== [future work] Database Informational Resources ============================================== -""" +This module contains `DbResource` class, +which enables querying information from relational databases. +""" from __future__ import annotations +from dataclasses import dataclass, field +from typing import Any, Optional +import mysql.connector # MySQL Connector for Python + from .abstract import AbstractResource from ._global import global_register +from ._prompts import RESOURCE_QA_PROMPT_TEMPLATE @global_register +@dataclass class DbResource(AbstractResource): """Database Informational Resource.""" + + # Database connection parameters + host: str + user: str + password: str + database: str + + # SQL query to fetch data + query: str + + def __post_init__(self): + """Post-initialize database resource.""" + self.connection = mysql.connector.connect( + host=self.host, + user=self.user, + password=self.password, + database=self.database + ) + self.cursor = self.connection.cursor() + + @cached_property + def unique_name(self) -> str: + """Return globally-unique name of Resource.""" + return f"DBResource_{self.host}_{self.database}" + + @cached_property + def name(self) -> str: + """Return potentially non-unique, but informationally helpful name of Resource.""" + return f"Database at {self.host}/{self.database}" + + def fetch_data(self) -> list[tuple[Any]]: + """Fetch data from the database using the provided query.""" + self.cursor.execute(self.query) + return self.cursor.fetchall() + + def answer(self, question: str, n_words: int = 1000) -> str: + """Answer question from database-stored Informational Resource.""" + data = self.fetch_data() + # Here you can implement a more sophisticated way to generate answers from the data + # For simplicity, we will just return the fetched data as a string + return str(data) + + def __del__(self): + """Ensure the database connection is closed when the object is deleted.""" + self.connection.close() From e25cfe922a05d41cd18ac0d7d0eb30cd0d6a46eb Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Sun, 29 Sep 2024 20:18:58 +0900 Subject: [PATCH 02/44] working with mock data --- examples/use-rdb-resource/.env.template | 1 + examples/use-rdb-resource/.gitignore | 2 +- examples/use-rdb-resource/README.md | 0 examples/use-rdb-resource/db_connector.py | 2 +- examples/use-rdb-resource/db_index.py | 29 ---------- examples/use-rdb-resource/main.py | 14 ++--- examples/use-rdb-resource/pyproject.toml | 15 +++++ openssa/__init__.py | 1 + openssa/core/resource/db.py | 67 +++++++++++++++-------- 9 files changed, 69 insertions(+), 62 deletions(-) create mode 100644 examples/use-rdb-resource/README.md delete mode 100644 examples/use-rdb-resource/db_index.py create mode 100644 examples/use-rdb-resource/pyproject.toml diff --git a/examples/use-rdb-resource/.env.template b/examples/use-rdb-resource/.env.template index e69de29bb..e570b8b55 100644 --- a/examples/use-rdb-resource/.env.template +++ b/examples/use-rdb-resource/.env.template @@ -0,0 +1 @@ +OPENAI_API_KEY= diff --git a/examples/use-rdb-resource/.gitignore b/examples/use-rdb-resource/.gitignore index ac8a8fa20..677def8a8 100644 --- a/examples/use-rdb-resource/.gitignore +++ b/examples/use-rdb-resource/.gitignore @@ -1 +1 @@ -db.config.yaml +db_config.yaml diff --git a/examples/use-rdb-resource/README.md b/examples/use-rdb-resource/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/examples/use-rdb-resource/db_connector.py b/examples/use-rdb-resource/db_connector.py index 827d75ea6..8a2b32c4d 100644 --- a/examples/use-rdb-resource/db_connector.py +++ b/examples/use-rdb-resource/db_connector.py @@ -27,7 +27,7 @@ def get_session(self): def get_events(self): session = self.get_session() - result = session.execute(text("SELECT * FROM users")) + result = session.execute(text("SELECT * FROM items")) # TODO: use vanna ai later for the query return result if __name__ == '__main__': diff --git a/examples/use-rdb-resource/db_index.py b/examples/use-rdb-resource/db_index.py deleted file mode 100644 index 1e58e6634..000000000 --- a/examples/use-rdb-resource/db_index.py +++ /dev/null @@ -1,29 +0,0 @@ -from db_connector import MySQLDatabase -from llama_index.core.readers.file.base import SimpleDirectoryReader -from llama_index.core.indices.vector_store.base import VectorStoreIndex -from llama_index.core.schema import BaseNode, Document, IndexNode, TransformComponent -from llama_index.embeddings.openai.base import OpenAIEmbedding, OpenAIEmbeddingMode, OpenAIEmbeddingModelType -from multiprocessing import cpu_count - -def create_index_from_data(table_data): - documents = [Document(text=str(record)) for record in table_data] - embed_model = OpenAIEmbedding(mode=OpenAIEmbeddingMode.SIMILARITY_MODE, model=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE, - embed_batch_size=100, dimensions=3072, additional_kwargs=None, - api_key=None, api_base=None, api_version=None, - max_retries=10, timeout=60, - reuse_client=True, callback_manager=None, default_headers=None, http_client=None, - num_workers=cpu_count()) - - index = VectorStoreIndex.from_documents(documents, embed_model=embed_model) - - return index - -if __name__ == '__main__': - config_path = 'db_config.yaml' - db = MySQLDatabase(config_path) - - table_data = db.get_events() - - index = create_index_from_data(table_data) - - index.storage_context.persist() diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index e51748e3c..8c200beee 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,6 +1,6 @@ from pathlib import Path from dotenv import load_dotenv -from openssa import DANA, FileResource +from openssa import DANA, DbResource load_dotenv() @@ -10,8 +10,8 @@ def get_or_create_agent() -> DANA: return DANA( - # ここを変更(リソース) - resources={FileResource(path=LOCAL_CACHE_DOCS_DIR_PATH)} + # TODO: For Argument Use SQL first and use prompt later + resources=[DbResource(config_path="db_config.yaml", query="SELECT * FROM items")] ) @@ -25,15 +25,13 @@ def solve(question) -> str: if __name__ == '__main__': QUESTION = ( - # ここを変更(プロンプト) - 'Please tell me three dishes you recommend.' - 'Please limit the total salt content of the three dishes to less than 21.5g.' - 'Also, please make sure that the total amount of vegetables in the three dishes is at least 700g.' + # TODO:ここを変更(プロンプト) + 'Please tell me cheapest item from items table.' 'Please answer in Japanese.' ) answer = solve(QUESTION) -# 出力をわかりやすくしたいかも +# TODO: 出力情報の追加 (作成して使用されたSQLなど) print('--------------------------------') print(answer) print('--------------------------------') diff --git a/examples/use-rdb-resource/pyproject.toml b/examples/use-rdb-resource/pyproject.toml new file mode 100644 index 000000000..52df4ce54 --- /dev/null +++ b/examples/use-rdb-resource/pyproject.toml @@ -0,0 +1,15 @@ +[tool.poetry] +name = "use-rdb-resource" +version = "0.1.0" +description = "" +authors = ["Your Name "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.12" +pymysql = "^1.1.1" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/openssa/__init__.py b/openssa/__init__.py index 1b0958471..3f7a4f02f 100644 --- a/openssa/__init__.py +++ b/openssa/__init__.py @@ -21,6 +21,7 @@ from .core.reasoning.ooda import OodaReasoner from .core.resource.file import FileResource +from .core.resource.db import DbResource from .core.task import Task diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index d4f3ca076..709c132d7 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -11,51 +11,71 @@ from dataclasses import dataclass, field from typing import Any, Optional -import mysql.connector # MySQL Connector for Python +import yaml +from sqlalchemy import create_engine, text +from sqlalchemy.orm import sessionmaker from .abstract import AbstractResource from ._global import global_register from ._prompts import RESOURCE_QA_PROMPT_TEMPLATE +class MySQLDatabase: + def __init__(self, config_path): + self.config_path = config_path + self.config = self.load_config() + self.engine = self.create_engine() + self.Session = sessionmaker(bind=self.engine) + + def load_config(self): + with open(self.config_path, 'r') as file: + return yaml.safe_load(file)['database']['mysql'] + + def create_engine(self): + username = self.config['username'] + password = self.config['password'] + host = self.config['host'] + port = self.config['port'] + database = self.config['database'] + connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' + return create_engine(connection_string) + + def get_session(self): + return self.Session() + + def get_events(self): + session = self.get_session() + result = session.execute(text("SELECT * FROM items")) # TODO: use vanna ai later for the query + return result + + @global_register @dataclass class DbResource(AbstractResource): """Database Informational Resource.""" - # Database connection parameters - host: str - user: str - password: str - database: str - - # SQL query to fetch data + config_path: str query: str def __post_init__(self): """Post-initialize database resource.""" - self.connection = mysql.connector.connect( - host=self.host, - user=self.user, - password=self.password, - database=self.database - ) - self.cursor = self.connection.cursor() - - @cached_property + self.db = MySQLDatabase(self.config_path) + + @property def unique_name(self) -> str: """Return globally-unique name of Resource.""" - return f"DBResource_{self.host}_{self.database}" + return f"DBResource_{self.db.config['host']}_{self.db.config['database']}" - @cached_property + @property def name(self) -> str: """Return potentially non-unique, but informationally helpful name of Resource.""" - return f"Database at {self.host}/{self.database}" + return f"Database at {self.db.config['host']}/{self.db.config['database']}" def fetch_data(self) -> list[tuple[Any]]: """Fetch data from the database using the provided query.""" - self.cursor.execute(self.query) - return self.cursor.fetchall() + session = self.db.get_session() + result = session.execute(text(self.query)) + return result.fetchall() def answer(self, question: str, n_words: int = 1000) -> str: """Answer question from database-stored Informational Resource.""" @@ -66,4 +86,5 @@ def answer(self, question: str, n_words: int = 1000) -> str: def __del__(self): """Ensure the database connection is closed when the object is deleted.""" - self.connection.close() + if hasattr(self, 'db'): + self.db.Session.close_all() From 35b5eae55a3a622ab0824a1050866e9a70045c1b Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Tue, 1 Oct 2024 11:35:43 +0900 Subject: [PATCH 03/44] use vannna to make sql and pass to the dana agent through dbresource --- examples/use-rdb-resource/.env.template | 2 +- examples/use-rdb-resource/.gitignore | 2 ++ examples/use-rdb-resource/db_connector.py | 38 --------------------- examples/use-rdb-resource/main.py | 40 +++++++++++++++++++---- examples/use-rdb-resource/pyproject.toml | 3 ++ 5 files changed, 40 insertions(+), 45 deletions(-) delete mode 100644 examples/use-rdb-resource/db_connector.py diff --git a/examples/use-rdb-resource/.env.template b/examples/use-rdb-resource/.env.template index e570b8b55..2ba29ae4d 100644 --- a/examples/use-rdb-resource/.env.template +++ b/examples/use-rdb-resource/.env.template @@ -1 +1 @@ -OPENAI_API_KEY= +OPENAI_API_KEY={your openai API key} diff --git a/examples/use-rdb-resource/.gitignore b/examples/use-rdb-resource/.gitignore index 677def8a8..366fe455b 100644 --- a/examples/use-rdb-resource/.gitignore +++ b/examples/use-rdb-resource/.gitignore @@ -1 +1,3 @@ db_config.yaml +Chinook.sqlite +chroma.sqlite3 diff --git a/examples/use-rdb-resource/db_connector.py b/examples/use-rdb-resource/db_connector.py deleted file mode 100644 index 8a2b32c4d..000000000 --- a/examples/use-rdb-resource/db_connector.py +++ /dev/null @@ -1,38 +0,0 @@ -import yaml -from sqlalchemy import create_engine, text -from sqlalchemy.orm import sessionmaker - -class MySQLDatabase: - def __init__(self, config_path): - self.config_path = config_path - self.config = self.load_config() - self.engine = self.create_engine() - self.Session = sessionmaker(bind=self.engine) - - def load_config(self): - with open(self.config_path, 'r') as file: - return yaml.safe_load(file)['database']['mysql'] - - def create_engine(self): - username = self.config['username'] - password = self.config['password'] - host = self.config['host'] - port = self.config['port'] - database = self.config['database'] - connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' - return create_engine(connection_string) - - def get_session(self): - return self.Session() - - def get_events(self): - session = self.get_session() - result = session.execute(text("SELECT * FROM items")) # TODO: use vanna ai later for the query - return result - -if __name__ == '__main__': - config_path = 'db_config.yaml' - db = MySQLDatabase(config_path) - result = db.get_events() - for row in result: - print(type(row), str(row)) diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 8c200beee..a8d569816 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,17 +1,44 @@ -from pathlib import Path from dotenv import load_dotenv +import os from openssa import DANA, DbResource +from vanna.openai import OpenAI_Chat +from vanna.chromadb import ChromaDB_VectorStore +import yaml load_dotenv() -# いらなそう -LOCAL_CACHE_DOCS_DIR_PATH: Path = Path(__file__).parent / '.data' +def load_yaml(file_path): + with open(file_path, 'r') as file: + return yaml.safe_load(file) + + +config = load_yaml('db_config.yaml') +db_host = config['database']['mysql']['host'] +db_database = config['database']['mysql']['database'] +db_user = config['database']['mysql']['username'] +db_password = config['database']['mysql']['password'] +db_port = config['database']['mysql']['port'] + + +class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): + def __init__(self, config=None): + ChromaDB_VectorStore.__init__(self, config=config) + OpenAI_Chat.__init__(self, config=config) + + +openai_api_key = os.getenv('OPENAI_API_KEY') + +vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) +vn_openai.connect_to_mysql(host=db_host, dbname=db_database, user=db_user, password=db_password, port=db_port) + +question = "Please tell me cheapest item from items table." +query = vn_openai.generate_sql(question) def get_or_create_agent() -> DANA: return DANA( # TODO: For Argument Use SQL first and use prompt later - resources=[DbResource(config_path="db_config.yaml", query="SELECT * FROM items")] + resources=[DbResource(config_path="db_config.yaml", query=query)] ) @@ -27,11 +54,12 @@ def solve(question) -> str: QUESTION = ( # TODO:ここを変更(プロンプト) 'Please tell me cheapest item from items table.' - 'Please answer in Japanese.' ) answer = solve(QUESTION) -# TODO: 出力情報の追加 (作成して使用されたSQLなど) + # TODO: 出力情報の追加 (vanna aiで作成して使用されたSQLなど) print('--------------------------------') print(answer) print('--------------------------------') + print(query) + print('--------------------------------') diff --git a/examples/use-rdb-resource/pyproject.toml b/examples/use-rdb-resource/pyproject.toml index 52df4ce54..d78662073 100644 --- a/examples/use-rdb-resource/pyproject.toml +++ b/examples/use-rdb-resource/pyproject.toml @@ -8,6 +8,9 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.12" pymysql = "^1.1.1" +kaleido = "0.2.1" +vanna = "^0.7.3" +chromadb = "^0.5.11" [build-system] From 001f6d512c29eb3574ba213129d39a4ff70dd245 Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Tue, 1 Oct 2024 19:38:40 +0900 Subject: [PATCH 04/44] make file for making example table and data --- .../make_example_table_data.py | 118 ++++++++++++++++++ examples/use-rdb-resource/pyproject.toml | 1 + openssa/core/resource/db.py | 5 - 3 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 examples/use-rdb-resource/make_example_table_data.py diff --git a/examples/use-rdb-resource/make_example_table_data.py b/examples/use-rdb-resource/make_example_table_data.py new file mode 100644 index 000000000..32f8a560e --- /dev/null +++ b/examples/use-rdb-resource/make_example_table_data.py @@ -0,0 +1,118 @@ +from sqlalchemy import Column, Integer, String, Date, create_engine, inspect +from sqlalchemy.orm import sessionmaker, declarative_base +from faker import Faker +import random +import yaml + +# ベースモデルを定義 +Base = declarative_base() + +# sales_data テーブルの定義 +class SalesData(Base): + __tablename__ = 'sales_data' + + sale_id = Column(Integer, primary_key=True, autoincrement=True) + product_id = Column(Integer) + product_name = Column(String(255)) + sale_date = Column(Date) + region = Column(String(255)) + +# データベースクラス +class MySQLDatabase: + def __init__(self, config_path): + self.config_path = config_path + self.config = self.load_config() + self.engine = self.create_engine() + self.Session = sessionmaker(bind=self.engine) + + def load_config(self): + with open(self.config_path, 'r') as file: + return yaml.safe_load(file)['database']['mysql'] + + def create_engine(self): + username = self.config['username'] + password = self.config['password'] + host = self.config['host'] + port = self.config['port'] + database = self.config['database'] + connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' + return create_engine(connection_string) + + def create_tables(self): + Base.metadata.create_all(self.engine) + + def drop_table(self, table_class): + inspector = inspect(self.engine) + if inspector.has_table(table_class.__tablename__): + table_class.__table__.drop(self.engine) + + def get_session(self): + return self.Session() + + +# Fakeデータ生成用のFakerを初期化 +fake = Faker() + +# ランダムシードを設定(例: 42) +seed_value = 42 +random.seed(seed_value) +Faker.seed(seed_value) + +# 製品リストを作成 +products = [ + {"id": 101, "name": "Smartwatch", "price": 150.00}, + {"id": 102, "name": "Laptop", "price": 1200.00}, + {"id": 103, "name": "Smartphone", "price": 800.00}, + {"id": 104, "name": "Tablet", "price": 400.00}, + {"id": 105, "name": "Headphones", "price": 100.00} +] + +# 地域リストを作成 +regions = ["North America", "Europe", "Asia", "South America", "Africa"] + +# データを生成して挿入 +def generate_sales_data(session, num_records): + sales_data_list = [] + for _ in range(num_records): + # ランダムに製品、価格、地域を選択 + product = random.choice(products) + region = random.choice(regions) + + # ランダムな販売日を生成(過去1年の範囲) + sale_date = fake.date_between(start_date='-1y', end_date='today') + + # SalesData インスタンスを作成 + sales_data = SalesData( + product_id=product["id"], + product_name=product["name"], + sale_date=sale_date, + region=region + ) + sales_data_list.append(sales_data) + + # 一括挿入 + session.bulk_save_objects(sales_data_list) + session.commit() + + +# MySQLDatabaseクラスを使用して、テーブル作成とデータ挿入を実行 +if __name__ == "__main__": + # 設定ファイルのパス + config_path = 'db_config.yaml' + + # MySQLDatabaseインスタンスを初期化 + db = MySQLDatabase(config_path) + + # 既存のsales_dataテーブルを削除 + db.drop_table(SalesData) + + # テーブルを作成 + db.create_tables() + + # セッションを取得 + session = db.get_session() + + # 20000件のデータを生成 + generate_sales_data(session, 20000) + + print("20000件のデータがsales_dataテーブルに作成されました。") diff --git a/examples/use-rdb-resource/pyproject.toml b/examples/use-rdb-resource/pyproject.toml index d78662073..144e98f47 100644 --- a/examples/use-rdb-resource/pyproject.toml +++ b/examples/use-rdb-resource/pyproject.toml @@ -11,6 +11,7 @@ pymysql = "^1.1.1" kaleido = "0.2.1" vanna = "^0.7.3" chromadb = "^0.5.11" +faker = "^30.1.0" [build-system] diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 709c132d7..af1aede33 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -43,11 +43,6 @@ def create_engine(self): def get_session(self): return self.Session() - def get_events(self): - session = self.get_session() - result = session.execute(text("SELECT * FROM items")) # TODO: use vanna ai later for the query - return result - @global_register @dataclass From 3c047f4c459f628918cc88b2df2c4faaa6bb89dd Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Wed, 2 Oct 2024 18:49:06 +0900 Subject: [PATCH 05/44] train vanna for generating SQL from prompt --- examples/use-rdb-resource/.gitignore | 1 + examples/use-rdb-resource/main.py | 24 +++++++------- .../make_example_table_data.py | 32 ++++++++++++++++++- 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/examples/use-rdb-resource/.gitignore b/examples/use-rdb-resource/.gitignore index 366fe455b..ef2003889 100644 --- a/examples/use-rdb-resource/.gitignore +++ b/examples/use-rdb-resource/.gitignore @@ -1,3 +1,4 @@ db_config.yaml Chinook.sqlite chroma.sqlite3 +*.bin diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index a8d569816..72706698a 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -20,6 +20,7 @@ def load_yaml(file_path): db_password = config['database']['mysql']['password'] db_port = config['database']['mysql']['port'] +openai_api_key = os.getenv('OPENAI_API_KEY') class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): def __init__(self, config=None): @@ -27,23 +28,21 @@ def __init__(self, config=None): OpenAI_Chat.__init__(self, config=config) -openai_api_key = os.getenv('OPENAI_API_KEY') - -vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) -vn_openai.connect_to_mysql(host=db_host, dbname=db_database, user=db_user, password=db_password, port=db_port) +def generate_sql_from_prompt(question) -> str: + vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) + vn_openai.connect_to_mysql(host=db_host, dbname=db_database, user=db_user, password=db_password, port=db_port) + return vn_openai.generate_sql(question) -question = "Please tell me cheapest item from items table." -query = vn_openai.generate_sql(question) -def get_or_create_agent() -> DANA: +def get_or_create_agent(query) -> DANA: return DANA( # TODO: For Argument Use SQL first and use prompt later resources=[DbResource(config_path="db_config.yaml", query=query)] ) -def solve(question) -> str: - agent = get_or_create_agent() +def solve(question, query) -> str: + agent = get_or_create_agent(query) try: return agent.solve(problem=question) except Exception as err: # pylint: disable=broad-exception-caught @@ -53,9 +52,12 @@ def solve(question) -> str: if __name__ == '__main__': QUESTION = ( # TODO:ここを変更(プロンプト) - 'Please tell me cheapest item from items table.' + # "Please tell me cheapest item from items table." + "What is the best-selling product in the last year from sales_data table?" ) - answer = solve(QUESTION) + + query = generate_sql_from_prompt(QUESTION) + answer = solve(QUESTION, query) # TODO: 出力情報の追加 (vanna aiで作成して使用されたSQLなど) print('--------------------------------') diff --git a/examples/use-rdb-resource/make_example_table_data.py b/examples/use-rdb-resource/make_example_table_data.py index 32f8a560e..9fdd10831 100644 --- a/examples/use-rdb-resource/make_example_table_data.py +++ b/examples/use-rdb-resource/make_example_table_data.py @@ -4,6 +4,14 @@ import random import yaml +# TODO: vannaは一つのファイルにまとめた方が便利かも(main.pyのやつと合わせて) +from vanna.openai import OpenAI_Chat +from vanna.chromadb import ChromaDB_VectorStore +from dotenv import load_dotenv +import os + +load_dotenv() + # ベースモデルを定義 Base = declarative_base() @@ -95,6 +103,26 @@ def generate_sales_data(session, num_records): session.commit() +class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): + def __init__(self, config=None): + ChromaDB_VectorStore.__init__(self, config=config) + OpenAI_Chat.__init__(self, config=config) + + +def train_vanna_for_sales_data(): + openai_api_key = os.getenv('OPENAI_API_KEY') + vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) + vn_openai.train(ddl=""" + CREATE TABLE sales_data ( + sale_id INT PRIMARY KEY AUTO_INCREMENT, + product_id INT, + product_name VARCHAR(255), + sale_date DATE, + region VARCHAR(255) + ) + """) + + # MySQLDatabaseクラスを使用して、テーブル作成とデータ挿入を実行 if __name__ == "__main__": # 設定ファイルのパス @@ -114,5 +142,7 @@ def generate_sales_data(session, num_records): # 20000件のデータを生成 generate_sales_data(session, 20000) - print("20000件のデータがsales_dataテーブルに作成されました。") + + train_vanna_for_sales_data() + print("vannaをsales_dataに合わせて訓練しました。") From 0d63e0613fcec60a475dae1d8632c397cae0e4eb Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Thu, 3 Oct 2024 14:42:50 +0900 Subject: [PATCH 06/44] resource data is taple and it seems that openssa has difficulty to understand the context of the data --- examples/use-rdb-resource/.env.template | 8 +- examples/use-rdb-resource/README.md | 7 ++ examples/use-rdb-resource/main.py | 45 +-------- .../make_example_table_data.py | 93 +++++-------------- examples/use-rdb-resource/myvanna.py | 32 +++++++ openssa/core/resource/db.py | 28 +++--- 6 files changed, 85 insertions(+), 128 deletions(-) create mode 100644 examples/use-rdb-resource/myvanna.py diff --git a/examples/use-rdb-resource/.env.template b/examples/use-rdb-resource/.env.template index 2ba29ae4d..09adbdcb7 100644 --- a/examples/use-rdb-resource/.env.template +++ b/examples/use-rdb-resource/.env.template @@ -1 +1,7 @@ -OPENAI_API_KEY={your openai API key} +OPENAI_API_KEY=your openai API key + +DB_USERNAME=your_username +DB_PASSWORD=your_password +DB_HOST=your_host +DB_PORT=your_port +DB_NAME=your_database_name diff --git a/examples/use-rdb-resource/README.md b/examples/use-rdb-resource/README.md index e69de29bb..aa056ac6a 100644 --- a/examples/use-rdb-resource/README.md +++ b/examples/use-rdb-resource/README.md @@ -0,0 +1,7 @@ +# Access Database resources and inference by DANA + +## Commands for set-up and run + +## What is this folder doing? + +## Check Result diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 72706698a..26ba635a7 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,65 +1,30 @@ -from dotenv import load_dotenv -import os +from myvanna import generate_sql_from_prompt, MyVanna from openssa import DANA, DbResource -from vanna.openai import OpenAI_Chat -from vanna.chromadb import ChromaDB_VectorStore -import yaml +from dotenv import load_dotenv +# .envファイルの読み込み load_dotenv() - -def load_yaml(file_path): - with open(file_path, 'r') as file: - return yaml.safe_load(file) - - -config = load_yaml('db_config.yaml') -db_host = config['database']['mysql']['host'] -db_database = config['database']['mysql']['database'] -db_user = config['database']['mysql']['username'] -db_password = config['database']['mysql']['password'] -db_port = config['database']['mysql']['port'] - -openai_api_key = os.getenv('OPENAI_API_KEY') - -class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): - def __init__(self, config=None): - ChromaDB_VectorStore.__init__(self, config=config) - OpenAI_Chat.__init__(self, config=config) - - -def generate_sql_from_prompt(question) -> str: - vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) - vn_openai.connect_to_mysql(host=db_host, dbname=db_database, user=db_user, password=db_password, port=db_port) - return vn_openai.generate_sql(question) - - def get_or_create_agent(query) -> DANA: return DANA( - # TODO: For Argument Use SQL first and use prompt later - resources=[DbResource(config_path="db_config.yaml", query=query)] + resources=[DbResource(query=query)] ) - def solve(question, query) -> str: agent = get_or_create_agent(query) try: return agent.solve(problem=question) - except Exception as err: # pylint: disable=broad-exception-caught + except Exception as err: return f'ERROR: {err}' - if __name__ == '__main__': QUESTION = ( - # TODO:ここを変更(プロンプト) - # "Please tell me cheapest item from items table." "What is the best-selling product in the last year from sales_data table?" ) query = generate_sql_from_prompt(QUESTION) answer = solve(QUESTION, query) - # TODO: 出力情報の追加 (vanna aiで作成して使用されたSQLなど) print('--------------------------------') print(answer) print('--------------------------------') diff --git a/examples/use-rdb-resource/make_example_table_data.py b/examples/use-rdb-resource/make_example_table_data.py index 9fdd10831..224f5ad5f 100644 --- a/examples/use-rdb-resource/make_example_table_data.py +++ b/examples/use-rdb-resource/make_example_table_data.py @@ -1,51 +1,41 @@ -from sqlalchemy import Column, Integer, String, Date, create_engine, inspect +from sqlalchemy import Column, Integer, String, Date, inspect, create_engine from sqlalchemy.orm import sessionmaker, declarative_base from faker import Faker import random -import yaml - -# TODO: vannaは一つのファイルにまとめた方が便利かも(main.pyのやつと合わせて) -from vanna.openai import OpenAI_Chat -from vanna.chromadb import ChromaDB_VectorStore from dotenv import load_dotenv import os +from myvanna import train_vanna_for_sales_data +# .envファイルの読み込み load_dotenv() -# ベースモデルを定義 Base = declarative_base() -# sales_data テーブルの定義 class SalesData(Base): __tablename__ = 'sales_data' - sale_id = Column(Integer, primary_key=True, autoincrement=True) product_id = Column(Integer) product_name = Column(String(255)) sale_date = Column(Date) region = Column(String(255)) -# データベースクラス class MySQLDatabase: - def __init__(self, config_path): - self.config_path = config_path - self.config = self.load_config() + def __init__(self): self.engine = self.create_engine() self.Session = sessionmaker(bind=self.engine) - def load_config(self): - with open(self.config_path, 'r') as file: - return yaml.safe_load(file)['database']['mysql'] - def create_engine(self): - username = self.config['username'] - password = self.config['password'] - host = self.config['host'] - port = self.config['port'] - database = self.config['database'] + username = os.getenv('DB_USERNAME') + password = os.getenv('DB_PASSWORD') + host = os.getenv('DB_HOST') + port = os.getenv('DB_PORT') + database = os.getenv('DB_NAME') connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' return create_engine(connection_string) + def get_session(self): + return self.Session() + def create_tables(self): Base.metadata.create_all(self.engine) @@ -54,19 +44,12 @@ def drop_table(self, table_class): if inspector.has_table(table_class.__tablename__): table_class.__table__.drop(self.engine) - def get_session(self): - return self.Session() - - -# Fakeデータ生成用のFakerを初期化 +# データ生成 fake = Faker() - -# ランダムシードを設定(例: 42) seed_value = 42 random.seed(seed_value) Faker.seed(seed_value) -# 製品リストを作成 products = [ {"id": 101, "name": "Smartwatch", "price": 150.00}, {"id": 102, "name": "Laptop", "price": 1200.00}, @@ -75,21 +58,14 @@ def get_session(self): {"id": 105, "name": "Headphones", "price": 100.00} ] -# 地域リストを作成 regions = ["North America", "Europe", "Asia", "South America", "Africa"] -# データを生成して挿入 def generate_sales_data(session, num_records): sales_data_list = [] for _ in range(num_records): - # ランダムに製品、価格、地域を選択 product = random.choice(products) region = random.choice(regions) - - # ランダムな販売日を生成(過去1年の範囲) sale_date = fake.date_between(start_date='-1y', end_date='today') - - # SalesData インスタンスを作成 sales_data = SalesData( product_id=product["id"], product_name=product["name"], @@ -97,22 +73,21 @@ def generate_sales_data(session, num_records): region=region ) sales_data_list.append(sales_data) - - # 一括挿入 session.bulk_save_objects(sales_data_list) session.commit() +if __name__ == "__main__": + db = MySQLDatabase() -class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): - def __init__(self, config=None): - ChromaDB_VectorStore.__init__(self, config=config) - OpenAI_Chat.__init__(self, config=config) + db.drop_table(SalesData) + db.create_tables() + session = db.get_session() -def train_vanna_for_sales_data(): - openai_api_key = os.getenv('OPENAI_API_KEY') - vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) - vn_openai.train(ddl=""" + generate_sales_data(session, 20000) + print("20000件のデータがsales_dataテーブルに作成されました。") + + train_vanna_for_sales_data(""" CREATE TABLE sales_data ( sale_id INT PRIMARY KEY AUTO_INCREMENT, product_id INT, @@ -121,28 +96,4 @@ def train_vanna_for_sales_data(): region VARCHAR(255) ) """) - - -# MySQLDatabaseクラスを使用して、テーブル作成とデータ挿入を実行 -if __name__ == "__main__": - # 設定ファイルのパス - config_path = 'db_config.yaml' - - # MySQLDatabaseインスタンスを初期化 - db = MySQLDatabase(config_path) - - # 既存のsales_dataテーブルを削除 - db.drop_table(SalesData) - - # テーブルを作成 - db.create_tables() - - # セッションを取得 - session = db.get_session() - - # 20000件のデータを生成 - generate_sales_data(session, 20000) - print("20000件のデータがsales_dataテーブルに作成されました。") - - train_vanna_for_sales_data() print("vannaをsales_dataに合わせて訓練しました。") diff --git a/examples/use-rdb-resource/myvanna.py b/examples/use-rdb-resource/myvanna.py new file mode 100644 index 000000000..9f9c90d39 --- /dev/null +++ b/examples/use-rdb-resource/myvanna.py @@ -0,0 +1,32 @@ +from dotenv import load_dotenv +import os +from vanna.openai import OpenAI_Chat +from vanna.chromadb import ChromaDB_VectorStore + +# .envファイルの読み込み +load_dotenv() + +# 環境変数から接続情報を取得 +db_user = os.getenv('DB_USERNAME') +db_password = os.getenv('DB_PASSWORD') +db_host = os.getenv('DB_HOST') +db_port = int(os.getenv('DB_PORT')) +db_database = os.getenv('DB_NAME') +openai_api_key = os.getenv('OPENAI_API_KEY') + +# MyVannaクラス定義 +class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): + def __init__(self, config=None): + ChromaDB_VectorStore.__init__(self, config=config) + OpenAI_Chat.__init__(self, config=config) + +# sales_dataに基づいてVannaを訓練する関数 +def train_vanna_for_sales_data(ddl): + vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) + vn_openai.train(ddl=ddl) + +# プロンプトからSQLを生成する関数 +def generate_sql_from_prompt(question) -> str: + vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) + vn_openai.connect_to_mysql(host=db_host, dbname=db_database, user=db_user, password=db_password, port=db_port) + return vn_openai.generate_sql(question) diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index af1aede33..0aeb5e848 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -11,7 +11,7 @@ from dataclasses import dataclass, field from typing import Any, Optional -import yaml +import os from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker @@ -21,22 +21,20 @@ class MySQLDatabase: - def __init__(self, config_path): - self.config_path = config_path - self.config = self.load_config() + def __init__(self): self.engine = self.create_engine() self.Session = sessionmaker(bind=self.engine) - - def load_config(self): - with open(self.config_path, 'r') as file: - return yaml.safe_load(file)['database']['mysql'] + self.config = { + 'host': os.getenv('DB_HOST'), + 'database': os.getenv('DB_NAME') + } def create_engine(self): - username = self.config['username'] - password = self.config['password'] - host = self.config['host'] - port = self.config['port'] - database = self.config['database'] + username = os.getenv('DB_USERNAME') + password = os.getenv('DB_PASSWORD') + host = os.getenv('DB_HOST') + port = os.getenv('DB_PORT') + database = os.getenv('DB_NAME') connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' return create_engine(connection_string) @@ -48,13 +46,11 @@ def get_session(self): @dataclass class DbResource(AbstractResource): """Database Informational Resource.""" - - config_path: str query: str def __post_init__(self): """Post-initialize database resource.""" - self.db = MySQLDatabase(self.config_path) + self.db = MySQLDatabase() @property def unique_name(self) -> str: From ffae6257798f4f7b37f9c384ff28f0cce4b323d6 Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Thu, 3 Oct 2024 15:22:39 +0900 Subject: [PATCH 07/44] make myvanna file to clean code --- examples/use-rdb-resource/main.py | 3 +-- examples/use-rdb-resource/make_example_table_data.py | 4 ---- examples/use-rdb-resource/myvanna.py | 7 ++----- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 26ba635a7..272dcc2b5 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -2,7 +2,6 @@ from openssa import DANA, DbResource from dotenv import load_dotenv -# .envファイルの読み込み load_dotenv() def get_or_create_agent(query) -> DANA: @@ -19,7 +18,7 @@ def solve(question, query) -> str: if __name__ == '__main__': QUESTION = ( - "What is the best-selling product in the last year from sales_data table?" + "Can you list the products in order of sales volume from highest to lowest?" ) query = generate_sql_from_prompt(QUESTION) diff --git a/examples/use-rdb-resource/make_example_table_data.py b/examples/use-rdb-resource/make_example_table_data.py index 224f5ad5f..f5faa1bc3 100644 --- a/examples/use-rdb-resource/make_example_table_data.py +++ b/examples/use-rdb-resource/make_example_table_data.py @@ -6,7 +6,6 @@ import os from myvanna import train_vanna_for_sales_data -# .envファイルの読み込み load_dotenv() Base = declarative_base() @@ -44,7 +43,6 @@ def drop_table(self, table_class): if inspector.has_table(table_class.__tablename__): table_class.__table__.drop(self.engine) -# データ生成 fake = Faker() seed_value = 42 random.seed(seed_value) @@ -85,7 +83,6 @@ def generate_sales_data(session, num_records): session = db.get_session() generate_sales_data(session, 20000) - print("20000件のデータがsales_dataテーブルに作成されました。") train_vanna_for_sales_data(""" CREATE TABLE sales_data ( @@ -96,4 +93,3 @@ def generate_sales_data(session, num_records): region VARCHAR(255) ) """) - print("vannaをsales_dataに合わせて訓練しました。") diff --git a/examples/use-rdb-resource/myvanna.py b/examples/use-rdb-resource/myvanna.py index 9f9c90d39..0e544d350 100644 --- a/examples/use-rdb-resource/myvanna.py +++ b/examples/use-rdb-resource/myvanna.py @@ -3,10 +3,8 @@ from vanna.openai import OpenAI_Chat from vanna.chromadb import ChromaDB_VectorStore -# .envファイルの読み込み load_dotenv() -# 環境変数から接続情報を取得 db_user = os.getenv('DB_USERNAME') db_password = os.getenv('DB_PASSWORD') db_host = os.getenv('DB_HOST') @@ -14,18 +12,17 @@ db_database = os.getenv('DB_NAME') openai_api_key = os.getenv('OPENAI_API_KEY') -# MyVannaクラス定義 class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): def __init__(self, config=None): ChromaDB_VectorStore.__init__(self, config=config) OpenAI_Chat.__init__(self, config=config) -# sales_dataに基づいてVannaを訓練する関数 + def train_vanna_for_sales_data(ddl): vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) vn_openai.train(ddl=ddl) -# プロンプトからSQLを生成する関数 + def generate_sql_from_prompt(question) -> str: vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) vn_openai.connect_to_mysql(host=db_host, dbname=db_database, user=db_user, password=db_password, port=db_port) From 47ecfe8d8dcc57245226ff37ca15af6754b7720e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=AD=E5=B1=B1=E4=BA=9C=E8=98=AD?= Date: Fri, 11 Oct 2024 18:03:00 +0900 Subject: [PATCH 08/44] TypeError: Can't instantiate abstract class DbResource without an implementation for abstract method 'answer' --- examples/use-rdb-resource/README.md | 24 +++++++++++++++++++++--- examples/use-rdb-resource/main.py | 3 ++- examples/use-rdb-resource/pyproject.toml | 5 ++++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/examples/use-rdb-resource/README.md b/examples/use-rdb-resource/README.md index aa056ac6a..b4e335c60 100644 --- a/examples/use-rdb-resource/README.md +++ b/examples/use-rdb-resource/README.md @@ -1,7 +1,25 @@ # Access Database resources and inference by DANA -## Commands for set-up and run +## Setting-up +- What you need + - commands (if you are mac user, you can install those things by Homebrew) + - mysql + - Also, create or use existing database for this example. + - poetry + - API Key + - Use your own OpenAI API key + +- Setting up Commands + - `cd examples/use-rdb-resource` + - `poetry install` + - `cp .env.template .env` + - update .env data with your environment data + - `poetry run python make_example_table_data.py` + - if this command doesn't work, then run `poetry env use 3.12` + - change python version to resolve dependensies version + - + + +## What is this example doing? -## What is this folder doing? -## Check Result diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 272dcc2b5..be56cf354 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,5 +1,6 @@ from myvanna import generate_sql_from_prompt, MyVanna -from openssa import DANA, DbResource +from openssa import DANA +from openssa.core.resource.db import DbResource from dotenv import load_dotenv load_dotenv() diff --git a/examples/use-rdb-resource/pyproject.toml b/examples/use-rdb-resource/pyproject.toml index 144e98f47..cd5c4683d 100644 --- a/examples/use-rdb-resource/pyproject.toml +++ b/examples/use-rdb-resource/pyproject.toml @@ -6,12 +6,15 @@ authors = ["Your Name "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.12" +python = ">=3.12,<3.13" pymysql = "^1.1.1" kaleido = "0.2.1" vanna = "^0.7.3" chromadb = "^0.5.11" faker = "^30.1.0" +sqlalchemy = "^2.0.35" +openai = "^1.51.2" +openssa = "^0.24.10.10" [build-system] From 86c08400f3440b811f556887e1128f9b64a5769c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=AD=E5=B1=B1=E4=BA=9C=E8=98=AD?= Date: Fri, 18 Oct 2024 16:38:23 +0900 Subject: [PATCH 09/44] change how to import from openssa to try to make it work (but it didn't) --- examples/use-rdb-resource/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index be56cf354..425439a3c 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,5 +1,6 @@ from myvanna import generate_sql_from_prompt, MyVanna -from openssa import DANA +# from openssa import DANA, DbResource +from openssa.core.agent.dana import DANA #, FileResource from openssa.core.resource.db import DbResource from dotenv import load_dotenv From 4e9f648018e2b26ea05ea9042faeefe091daf89d Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Tue, 22 Oct 2024 09:00:37 +0900 Subject: [PATCH 10/44] nailcare --- examples/use-rdb-resource/README.md | 13 ++++++------- tests/core/resource/test_db_resource.py | 0 tests/core/resource/test_webpage_resource.py | 1 - 3 files changed, 6 insertions(+), 8 deletions(-) create mode 100644 tests/core/resource/test_db_resource.py diff --git a/examples/use-rdb-resource/README.md b/examples/use-rdb-resource/README.md index b4e335c60..e073b6238 100644 --- a/examples/use-rdb-resource/README.md +++ b/examples/use-rdb-resource/README.md @@ -1,10 +1,15 @@ # Access Database resources and inference by DANA +## What is this example doing? + +- Use DbResource file to get data from RDB(MySQL) Using SQL made by Vanna, and answer question by DANA. + ## Setting-up + - What you need - commands (if you are mac user, you can install those things by Homebrew) - mysql - - Also, create or use existing database for this example. + - Also, create or use existing database for this example. - poetry - API Key - Use your own OpenAI API key @@ -17,9 +22,3 @@ - `poetry run python make_example_table_data.py` - if this command doesn't work, then run `poetry env use 3.12` - change python version to resolve dependensies version - - - - -## What is this example doing? - - diff --git a/tests/core/resource/test_db_resource.py b/tests/core/resource/test_db_resource.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/core/resource/test_webpage_resource.py b/tests/core/resource/test_webpage_resource.py index 9ee7c4319..ffd8002b1 100644 --- a/tests/core/resource/test_webpage_resource.py +++ b/tests/core/resource/test_webpage_resource.py @@ -16,4 +16,3 @@ def test_webpage_resource(): _ = webpage1.answer(test_question) _ = webpage1.get_summary() # assert isinstance(answer, str) - # assert isinstance(summary, str) From 27d9c378898b1b00a05ca8d50c37cc68d66b7695 Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Tue, 22 Oct 2024 09:17:48 +0900 Subject: [PATCH 11/44] nailcare --- examples/use-rdb-resource/main.py | 2 +- openssa/core/resource/db.py | 5 ++--- tests/core/resource/test_db_resource.py | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 425439a3c..81ab173e1 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,4 +1,4 @@ -from myvanna import generate_sql_from_prompt, MyVanna +from myvanna import generate_sql_from_prompt # from openssa import DANA, DbResource from openssa.core.agent.dana import DANA #, FileResource from openssa.core.resource.db import DbResource diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 0aeb5e848..182b0c3e2 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -9,15 +9,14 @@ from __future__ import annotations -from dataclasses import dataclass, field -from typing import Any, Optional +from dataclasses import dataclass +from typing import Any import os from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from .abstract import AbstractResource from ._global import global_register -from ._prompts import RESOURCE_QA_PROMPT_TEMPLATE class MySQLDatabase: diff --git a/tests/core/resource/test_db_resource.py b/tests/core/resource/test_db_resource.py index e69de29bb..a508ea191 100644 --- a/tests/core/resource/test_db_resource.py +++ b/tests/core/resource/test_db_resource.py @@ -0,0 +1,18 @@ +from dotenv import load_dotenv +from openssa.core.resource.db import DbResource + +load_dotenv() + + +def test_db_resource(): + test_url = "http://paulgraham.com/worked.html" + test_question = "What did the author do growing up?" + + webpage1 = WebPageResource(test_url) + # print(f"unique name = {webpage1.name}") + # print(f"unique name = {webpage1.unique_name}") + # print(f"answer = {webpage1.answer(test_question)}") + # print(f"summary = {webpage1.get_summary()}") + _ = webpage1.answer(test_question) + _ = webpage1.get_summary() + # assert isinstance(answer, str) From 39b218b86dee00e4c623c984f4aa11e9960011d2 Mon Sep 17 00:00:00 2001 From: aran-nakayama Date: Tue, 22 Oct 2024 10:57:08 +0900 Subject: [PATCH 12/44] save tmp (need to be fixed) --- chroma.sqlite3 | Bin 0 -> 167936 bytes examples/use-rdb-resource/README.md | 2 ++ examples/use-rdb-resource/main.py | 1 + examples/use-rdb-resource/pyproject.toml | 1 + openssa/core/resource/db.py | 21 +++++++++++-------- tests/core/resource/test_db_resource.py | 25 +++++++++++++++-------- 6 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 chroma.sqlite3 diff --git a/chroma.sqlite3 b/chroma.sqlite3 new file mode 100644 index 0000000000000000000000000000000000000000..8b32ac1c40edff06ced7114790e84294744b5a98 GIT binary patch literal 167936 zcmeI5Uu+vme&^ZxLrRoode-CNcxW?i^J2SZ~m}YlT z;+i65HswG22LWZz;P!xfTHt^L$W!hD1W2%0%7@}(`QyQ;eDSD#Uhi;LYqby54oinFgd{2S zJB)=wp%wZ`(9d=H8Ka+X($56_I3LM5IN|R~XzBY8CnhzVV zzn%Kw)JEh|_}Af|PM!-t2^S~iiNB8i{`fzQ{cQYiq(@_o&@ZIoq(8?KG73HVY9#*d zyHYpbQL7DppgyS>{9&c8dq-DSw@bxxiIs~Vu9uh>&K9DKJ_g@A;LQ$Q!pfzO%k19v z+D38v6ZY-WCyRChaXxK>Gf8yA&kg+Ge?Qx3tZ+ zOLt1!rOnmS4s(+m3w1qV73TqeY8BKei(Sf2TL|(SwC%nU|l^ds^K zYL~vJ5(L?}S-6!X7;n|MU=6YEtF^h>;d+JISb1%uv{No_+;b}tyIs0d++8oT)!l7U zSh?aRu`b@-T)V$pVhdgkUGy|cq7d5jm6K|rE4Sf8YMguTayY(vZJ0jn+~^YL=yLbc zTaoz9n^Ko*sM*M^s9+~}wWhoU08j0B0+ zA#baldaD_%x6#D=@o@b9&0z)U$yzkTfS{4+t!8MVe z;X8aYP^>v$%$!%h`V0t@Wp>o8KcqIb*JxGMM&qezQ=005)ylkr`jS$4nXm>`)!=4w zo!B%DK_Jiyoiu&b(r*KtM5YN~Tboqt)=Oj#SBpEV#oMJtrnQ>2`d;OGMypvdI&EL$ zcPgNecF~o{s(IN<;rQB(VeEZ)Dk-4OV5|hir_}L2X6!_fgAj#Q~plld>IZ|A_?%+kq$W4V26@i66{{<2&*&VH{iPJaAnr=RSST zA74EeiOaI|?13p??-jl0vjK^FV{Or$V5gJeZ=iPEiMkv9(Ei*{Y*uZx8iStgzK3J* z*<(G;$zXnC*wdrC`gSBfKQA4Zy32HQ?HV2VAsS9u-wr^VbAnGYrB+LLdf00>&2X~R zgCjg6Vm&;3og*(wbdFp@98Hv_!|_kj!)lOsnOkKlK3nAmMvvvGNIadEo=YZz0oM%( znht;&kif1`z8I%Y4J_KLS*rrlJKeeRSR+$uwFGI9QhxBcC$5w57!8`Kr<>&=-~ zX;2O{R2;;8P*QsS?vI+*SCQEQ6?oApN}^8^GT>jm;2WFAW505>{G&#PPOP-5l{!jn>di;3#v{(m!Ll$aDr@q` z*2PS1>QvUlry{(wZ}mf2i`wZDTkEho^-$@UJ*YPtcE;xI#cfryhr`t->xfx8er@&l zwN?U3wy}?VZH99-pUD-L)7fe*mDMx3WJXQaxSmSqa*C4cRwJb6yT|WadY%+|&dP0m zK(u7j_y-lcZN^v#%twPoi`Ck#12c5ht+=F1Qpt`rVM3I|jDQghl*V6WZ*OnibLy;_ z@xX!`Y(ToIlFsF|LMm4+)T(N>ww$Y_@>O2bHAT;)^wht(5*7{dCw66?5QWUh_LxC- zM`ym02A&;jcTDqM`gm=pykn1fd~-#8V|mk%I)f;4sId}_o^VH30I%E8cTeA-{{`m< ztYVrnS%+_e9yJT&kxH{g@tQa-{;GmWeHTCLGvjDiEP%Y z+390Cuc>4#oq@P$aBdOSHg`(fWws?Idhe|lS4(0}Xv?d=Vl4BW;`%O4|1QkiH>y}w zWInMX7zzv6lLO%HFc$C*xCiJDff@3|f{iM|WW20>H9#f=gp9s6QdqRC95K<1GbY8ok%XOgM3mRFP{ z&t%f+?6T60OorpqM^CLMz_^e&?SIm{2z>T&gnoy6nKKo+5*4;Mr6;rbjGiXTtSoCO zHOcd}Tq>(Ar&aaYT^iU)AAAUvJq-1Bz@5?@5Or7`>41(31Uvx^q8~ZpzK1V@T zXM5l{T1*i{6TOzK@3B4jmC`FP+hd~g8Ph>CJ1f!DvXa)Q$Cyo~li94goE19EWD3c2 zo)?yt)U%s34xTQ3x?^clC{pp&DPx!}eHHgKbq*U#d0OiklsQjiap3?Jd0Bg%D@#kh z80Tp-Q1$=ZQz-qa3P|cqm#jq7g>*Kh=Tli#S2QJ;%x7wzOq z6unTT&Rw>URg!8U#S7WvkEDn^z4|=fR|2c*c@hn)d0q-jW?NYMjoom8U+>A6m8hQ1 zXR~^Z#sjG;FYrP-snjxhzNXcxG`5|Zj?(W~EEkIXuh=hR-;Ldit;TY(U&Q`n>}Rq6 z5c|W}zm0WcN3r@z!(haL00@8p2!H?xfB*=900@8p2!Oy@C2(%yvLuA*l@npn1o(Ox#U>9kf*16gJ*?`E}T_8pwBEIBIr*kD|Q#s?l*fwL;{4%=zHS;yvx2xS$8*@Lx`~O!!u@C_S zKmY_l00ck)1V8`;KmY_l00h2d0^nvHw5z|L<)O*#BR6 zWpK7=@7r74bnat1R&flUwdeRymw?_*^3LYu*#F=3(~SN9vHyRM=Q8a7|2h12|Ms0P zp1A*iTFn%4SpWY5BYq?W0T2KI5C8!X009sH0T2KI5CDPKlmOQMzov?V*dPD`AOHd& z00JNY0w4eaAOHd&@B#$z{{IDFAt?xe00@8p2!H?xfB*=900@8p2)w2QeDD8fUeh8# zY!Cne5C8!X009sH0T2KI5C8!X0D)5o;Qjw8fDjD=AOHd&00JNY0w4eaAOHd&00L)# zfbaeP87K{k00JNY0w4eaAOHd&00JNY0w4eaATWr4c>gcO{vkyF@c{u4009sH0T2KI z5C8!X009sH0T4J71TIKp37yx}qeiDs`BQyTt{dZOai1q*DGygAi{!eD_(I0$300ck)1V8`;KmY_l00hn|f#ZAQk==`--RILe zWjW2WIW8+I{mNvjs=Qpw7UaCPtmO1GUsm;k@vz~%H9YB!;t%Hbo5tgc)@s)3dq-`x zQ*SlrZm}QC8HcLI>1eKznpP9aK8|ZLaDD z1!X8?Lqz51pv{f?_j#qJYMoYF1TLj0w5oQr{fdY&Zz1ksJZ);`O^p3kL$`uOc%}37 z(3I(0n>!z^`bA!Ac6hs~Hs*fhPqx}>G`K16ki^IL#%!e7YFbxw)v8=u&g*g}S0kj! zT20pST2@Ufwe)g2uj@d1I?~^jBD?dVmhxJX=af`UPV2dXoXP72Syi%%tkhD;bUMR1 z5!YMV(E)FE%zB6VdRl!I@BhbRpM_#SjeUmi0Gw4rk8*(k2!H?xfB*=900@8p2!H?x zycz@+#&@Mrrr^d!!ciuc#>D#nQKn0*@&92c_Q$dR8asS7Xg~@O009sH0T2KI5C8!X z009sH0T6iI2`o-r+?7ULNi%LH9&wGxSaxE5clw1_KZy1J;mN-Yo&WjlrRevj{!8Rv z(+NHx00JNY00`Ebhiz_^S?S}oo$?Oj2UV`?_2yp1xxP|wb}En5#t~;*n*-w6!UO(v zk-4c7?4xg$wo6_BTiY>n*xX%TUx~h0-nB+c9VK^PIwyZ$&=-+_7NiOqrvvaObY$>{ z1%;?p)45zSt!A>Er!+m8%4RfPOY>SGmn~#@;rRXYq|}{f+*aziP-dIc6qnB&dS9P*GoQyiEG8B6>B*y+ud2)yvu4GBfH?2bCEf>et(|I zzSmX{<`XN?wauN-r+|-!Q>;O6#T7G8+)b?rd*u`0nRax|Qhd z?X7!5i?9-cUrKhXx7$2? z{A8K!++U|Q!t@q5ShLk(#?j$ntKDI$*(R8_&$S2aalNz8RASj?w1skmFEZ6&Dx<~6 z#6MORP*cQ4bFrbKz!nw%h>75_HZ)->>NI6+Sw(aPt z*almOGWyhYQ9DK6?B4d;MsfQS_U+Oqi)Ng`_rxOL3Z*HRJ}$fV$clAq#gKS|KNK+^ zu5W$VbETJQ)t&b_ZNPp=|sE!K>;QoCl|VQU5Z zjVnD&)~!$BX<2nAE?~>OK)-QuLalOA=6vq(`SvwPj<4=4evRaU$R5)h|r+*2JHE zqzpsl%ILbfsVMznSB@{fV=DUD!yr1tO>4{yMs=3`21Rd;Zw{}89vUA~EDQtRm+%J+4c7PQw8$^hCZ~Hz)7M!}0q!hZUqJYtay0 zmoLKQN+fz~>F0?Gg_D zg!?__vjI*b(*y|Dez(a>8+ip>E$*xqZZDjXf-S39m*8Ta>wL2t`gEN zx)M=t3yK zxR-q8IC~)yzkXeMzHC+mH^M!5BTxz4;4h^DnEs49ruwbJ6SZt58Kz_NE{-tZD_5j$%%p6G+J9mJ!gk;Zd;=vjE9M8Y;rNa`Oc>{B?>t^zE9X9a&L3Yr z7m3TV^z4BtU+)#Y=d%Hcdt+_UonXf=oxg$FaVP3-xHC7~8=tTxR-+x&liL7ae%1|q z9iX7kI?Kx&8O&b~dwO(N-;TuR=cVIPcbSf^U85sEM8hfT+W}~EPVh;l)M^RB0bUeKl*9Cm$I#$9-V;?TJWyWku@VHF^0uFZB34;uY&TH6(a4z->q(&yF~KWnW#l zD*vE|VKi}^95+qE^NRyT>&8s&R&dnG!|g%h1*nxgV5fOUbi|m0pEGOiL7&S$LjB9jc`S+-XO<=eZ4u;DhcxSvm_TR=9q#sXxC-U#Z{~-NX{CoWXys{E^uilKrH#VfM z_43i3N~}1`CMpJ3+uDBb^yiTHL|}Ss{;*K|8(J@WXZE>2YQ2Omh?Ys2c5^(w;y;u;hy;%lcW$4Ym) ziJ19XBwj2^-FJscGTb)UF*iHTrSpF^e15p^!jv>`SYQM_pQ=)DGIgx)<(UiaD)rFBy| zvbq6&Nz>O)d_AQ;p~tRxKNncO_uE=)xOy!d|H2FCs0Z}BFL&)>_-5GbR&_6bBSNNV zs=MlIa-uc47RTzscq;FSuoZvWI#9bO#CCVqHt(`p$H*@Dn-e|lS`T=WIw9}RQ){Ld zj0f|H^p%tJ>`FwhA-&^sSZip$mUoHJ8Ljww@nwUr@fwgXxU}$9T``%xH0tuZm$7OO z3oAdz!peW1rH=Ui|0_Eq0Kq^21V8`;KmY_l00ck)1V8`;K;Wej!1({Ap~Q_q00ck) z1V8`;KmY_l00ck)1VG@GC4lk&E2|m^1_B@e0w4eaAOHd&00JNY0w4eaFO7gR{*V3N zmv&3s2n0X?1V8`;KmY_l00ck)1V8`;K;RW1Ff)Edvb_RK%#u7~vHu;4{f2(w0|Fob z0w4eaAOHd&00JNY0w4eaAaE84^zHxey#GH7g+m!Y00ck)1V8`;KmY_l00ck)1V8`; z1``nP|3eoB#~~C1KmY_l00ck)1V8`;KmY_l00cnbY!JZs|7_?Hr2qjC009sH0T2KI z5C8!X009sHfs+V`_5TuW{)helPNIZ35C8!X009sH0T2KI5C8!X009tq{RsH?|NH+> CWF*M| literal 0 HcmV?d00001 diff --git a/examples/use-rdb-resource/README.md b/examples/use-rdb-resource/README.md index e073b6238..d8bdd515f 100644 --- a/examples/use-rdb-resource/README.md +++ b/examples/use-rdb-resource/README.md @@ -22,3 +22,5 @@ - `poetry run python make_example_table_data.py` - if this command doesn't work, then run `poetry env use 3.12` - change python version to resolve dependensies version + - `poetry run python main.py` + - run main file to answer question by DANA using DbResource, and see the result in the terminal. diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 81ab173e1..b1ec3151f 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -24,6 +24,7 @@ def solve(question, query) -> str: ) query = generate_sql_from_prompt(QUESTION) + print(query) answer = solve(QUESTION, query) print('--------------------------------') diff --git a/examples/use-rdb-resource/pyproject.toml b/examples/use-rdb-resource/pyproject.toml index cd5c4683d..58c170130 100644 --- a/examples/use-rdb-resource/pyproject.toml +++ b/examples/use-rdb-resource/pyproject.toml @@ -15,6 +15,7 @@ faker = "^30.1.0" sqlalchemy = "^2.0.35" openai = "^1.51.2" openssa = "^0.24.10.10" +plotly = "^5.24.1" [build-system] diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 182b0c3e2..3b26bc38f 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -14,6 +14,7 @@ import os from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker +from llama_index.core import SummaryIndex, Document from .abstract import AbstractResource from ._global import global_register @@ -70,11 +71,15 @@ def fetch_data(self) -> list[tuple[Any]]: def answer(self, question: str, n_words: int = 1000) -> str: """Answer question from database-stored Informational Resource.""" data = self.fetch_data() - # Here you can implement a more sophisticated way to generate answers from the data - # For simplicity, we will just return the fetched data as a string - return str(data) - - def __del__(self): - """Ensure the database connection is closed when the object is deleted.""" - if hasattr(self, 'db'): - self.db.Session.close_all() + print(data) + # データベースから取得したデータをドキュメントに変換 + documents = [Document(text=str(row[0]), metadata={'id': row[1]}) for row in data] + # print(documents) + index = SummaryIndex.from_documents(documents) + # print(index) + # set Logging to DEBUG for more detailed outputs + query_engine = index.as_query_engine() + # print(query_engine) + response = query_engine.query(question) + # print(response) + return response diff --git a/tests/core/resource/test_db_resource.py b/tests/core/resource/test_db_resource.py index a508ea191..d965f9c0e 100644 --- a/tests/core/resource/test_db_resource.py +++ b/tests/core/resource/test_db_resource.py @@ -3,16 +3,23 @@ load_dotenv() +#TODO: Fix hallucination +# Given Data: [(1, 'Laptop', 100000), (2, 'Smartphone', 60000), (3, 'Headphones', 8000), (4, 'Keyboard', 3000), (5, 'Mouse', 2000), (6, 'Monitor', 25000), (7, 'Tablet', 50000), (8, 'Smartwatch', 20000), (9, 'Camera', 45000), (10, 'Speaker', 15000)] +# Answer: The item that is the most expensive from the given data is the Camera. +# The Answer Should be: Laptop(100000). +# How to fix it? Make query by vanna or change the process in llama_index? def test_db_resource(): - test_url = "http://paulgraham.com/worked.html" - test_question = "What did the author do growing up?" + test_query = "SELECT * FROM items" + test_question = "Which item is the most expensive from given data?" - webpage1 = WebPageResource(test_url) - # print(f"unique name = {webpage1.name}") - # print(f"unique name = {webpage1.unique_name}") - # print(f"answer = {webpage1.answer(test_question)}") - # print(f"summary = {webpage1.get_summary()}") - _ = webpage1.answer(test_question) - _ = webpage1.get_summary() + rdb1 = DbResource(query=test_query) + # print(f"unique name = {rdb1.name}") + # print(f"unique name = {rdb1.unique_name}") + # print(f"answer = {rdb1.answer(test_question)}") + # print(f"summary = {rdb1.get_summary()}") + _ = rdb1.answer(test_question) # assert isinstance(answer, str) + print(_) + +test_db_resource() From 7227879d89c6cc8303bda84e22443635d5b194df Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 17:23:19 -0700 Subject: [PATCH 13/44] style: reformat code --- .../use-rdb-resource/make_example_table_data.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/use-rdb-resource/make_example_table_data.py b/examples/use-rdb-resource/make_example_table_data.py index f5faa1bc3..5ee74223c 100644 --- a/examples/use-rdb-resource/make_example_table_data.py +++ b/examples/use-rdb-resource/make_example_table_data.py @@ -1,15 +1,18 @@ -from sqlalchemy import Column, Integer, String, Date, inspect, create_engine -from sqlalchemy.orm import sessionmaker, declarative_base -from faker import Faker +import os import random + from dotenv import load_dotenv -import os +from faker import Faker +from sqlalchemy import Column, Integer, String, Date, inspect, create_engine +from sqlalchemy.orm import sessionmaker, declarative_base + from myvanna import train_vanna_for_sales_data load_dotenv() Base = declarative_base() + class SalesData(Base): __tablename__ = 'sales_data' sale_id = Column(Integer, primary_key=True, autoincrement=True) @@ -18,6 +21,7 @@ class SalesData(Base): sale_date = Column(Date) region = Column(String(255)) + class MySQLDatabase: def __init__(self): self.engine = self.create_engine() @@ -43,6 +47,7 @@ def drop_table(self, table_class): if inspector.has_table(table_class.__tablename__): table_class.__table__.drop(self.engine) + fake = Faker() seed_value = 42 random.seed(seed_value) @@ -58,6 +63,7 @@ def drop_table(self, table_class): regions = ["North America", "Europe", "Asia", "South America", "Africa"] + def generate_sales_data(session, num_records): sales_data_list = [] for _ in range(num_records): @@ -74,6 +80,7 @@ def generate_sales_data(session, num_records): session.bulk_save_objects(sales_data_list) session.commit() + if __name__ == "__main__": db = MySQLDatabase() From f27c9ed47c7a77aa422fa0bc036835f44703e486 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 17:25:11 -0700 Subject: [PATCH 14/44] refactor: replace random.choice by secrets.choice to avoid security risks --- examples/use-rdb-resource/make_example_table_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/use-rdb-resource/make_example_table_data.py b/examples/use-rdb-resource/make_example_table_data.py index 5ee74223c..8fb6e7c97 100644 --- a/examples/use-rdb-resource/make_example_table_data.py +++ b/examples/use-rdb-resource/make_example_table_data.py @@ -1,5 +1,6 @@ import os import random +import secrets from dotenv import load_dotenv from faker import Faker @@ -67,8 +68,8 @@ def drop_table(self, table_class): def generate_sales_data(session, num_records): sales_data_list = [] for _ in range(num_records): - product = random.choice(products) - region = random.choice(regions) + product = secrets.choice(products) + region = secrets.choice(regions) sale_date = fake.date_between(start_date='-1y', end_date='today') sales_data = SalesData( product_id=product["id"], From 82432fb0fd5a2b904073ca359ccbfd91afa5817c Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 17:41:56 -0700 Subject: [PATCH 15/44] style: add empty lines and space --- tests/core/resource/test_db_resource.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/core/resource/test_db_resource.py b/tests/core/resource/test_db_resource.py index d965f9c0e..48d7e9c82 100644 --- a/tests/core/resource/test_db_resource.py +++ b/tests/core/resource/test_db_resource.py @@ -3,7 +3,8 @@ load_dotenv() -#TODO: Fix hallucination + +# TODO: Fix hallucination # Given Data: [(1, 'Laptop', 100000), (2, 'Smartphone', 60000), (3, 'Headphones', 8000), (4, 'Keyboard', 3000), (5, 'Mouse', 2000), (6, 'Monitor', 25000), (7, 'Tablet', 50000), (8, 'Smartwatch', 20000), (9, 'Camera', 45000), (10, 'Speaker', 15000)] # Answer: The item that is the most expensive from the given data is the Camera. # The Answer Should be: Laptop(100000). @@ -22,4 +23,5 @@ def test_db_resource(): # assert isinstance(answer, str) print(_) + test_db_resource() From 2ec3e3a1a3edc0b809d18b9f4399a01df2f60aff Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 17:45:46 -0700 Subject: [PATCH 16/44] style: add empty lines --- examples/use-rdb-resource/main.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index b1ec3151f..54e6aabaf 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,16 +1,19 @@ -from myvanna import generate_sql_from_prompt +from dotenv import load_dotenv # from openssa import DANA, DbResource -from openssa.core.agent.dana import DANA #, FileResource +from openssa.core.agent.dana import DANA # , FileResource from openssa.core.resource.db import DbResource -from dotenv import load_dotenv + +from myvanna import generate_sql_from_prompt load_dotenv() + def get_or_create_agent(query) -> DANA: return DANA( resources=[DbResource(query=query)] ) + def solve(question, query) -> str: agent = get_or_create_agent(query) try: @@ -18,6 +21,7 @@ def solve(question, query) -> str: except Exception as err: return f'ERROR: {err}' + if __name__ == '__main__': QUESTION = ( "Can you list the products in order of sales volume from highest to lowest?" From 5a931525df529d93f0071d5c427127c69ee6bfb2 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 17:53:51 -0700 Subject: [PATCH 17/44] style: rearrange imports --- examples/use-rdb-resource/myvanna.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/use-rdb-resource/myvanna.py b/examples/use-rdb-resource/myvanna.py index 0e544d350..9d3b3ce7e 100644 --- a/examples/use-rdb-resource/myvanna.py +++ b/examples/use-rdb-resource/myvanna.py @@ -1,7 +1,8 @@ -from dotenv import load_dotenv import os -from vanna.openai import OpenAI_Chat + +from dotenv import load_dotenv from vanna.chromadb import ChromaDB_VectorStore +from vanna.openai import OpenAI_Chat load_dotenv() @@ -12,6 +13,7 @@ db_database = os.getenv('DB_NAME') openai_api_key = os.getenv('OPENAI_API_KEY') + class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): def __init__(self, config=None): ChromaDB_VectorStore.__init__(self, config=config) From 7f9f0444d7f0aacd4ff541d3e1ae6a4a30617e5c Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 17:55:43 -0700 Subject: [PATCH 18/44] refactor: replace Exception by ValueError & RuntimeError --- examples/use-rdb-resource/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 54e6aabaf..7d4ecc111 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -18,7 +18,9 @@ def solve(question, query) -> str: agent = get_or_create_agent(query) try: return agent.solve(problem=question) - except Exception as err: + except ValueError as err: + return f'ERROR: {err}' + except RuntimeError as err: return f'ERROR: {err}' From 08a217072a5217b6bd4fc754fe41c92a86db9b99 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 18:10:26 -0700 Subject: [PATCH 19/44] style: import 3rd party before 1st party --- examples/use-rdb-resource/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py index 7d4ecc111..5d6096c8b 100644 --- a/examples/use-rdb-resource/main.py +++ b/examples/use-rdb-resource/main.py @@ -1,10 +1,11 @@ from dotenv import load_dotenv + +from myvanna import generate_sql_from_prompt + # from openssa import DANA, DbResource from openssa.core.agent.dana import DANA # , FileResource from openssa.core.resource.db import DbResource -from myvanna import generate_sql_from_prompt - load_dotenv() From 4c51982086bb527e899bcfe15ef6f60a5d2ae493 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 18:46:10 -0700 Subject: [PATCH 20/44] fix: add default db port --- openssa/core/resource/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 65853776f..4eeb8143c 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -33,7 +33,7 @@ def create_engine(self): username = os.getenv('DB_USERNAME') password = os.getenv('DB_PASSWORD') host = os.getenv('DB_HOST') - port = os.getenv('DB_PORT') + port = os.getenv('DB_PORT', 3306) database = os.getenv('DB_NAME') connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' return create_engine(connection_string) From 654b8f7bd79c08fb18a48d6db22985996c72fd57 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 18:48:56 -0700 Subject: [PATCH 21/44] fix: add default db port as string --- openssa/core/resource/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 4eeb8143c..cee9f369b 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -33,7 +33,7 @@ def create_engine(self): username = os.getenv('DB_USERNAME') password = os.getenv('DB_PASSWORD') host = os.getenv('DB_HOST') - port = os.getenv('DB_PORT', 3306) + port = os.getenv('DB_PORT', '3306') database = os.getenv('DB_NAME') connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' return create_engine(connection_string) From c255aa99b01ccf6ac43bf4c8bc1db9aa88081c75 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 18:55:49 -0700 Subject: [PATCH 22/44] build: add pymysql --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 4de7a7233..822130408 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ streamlit-mic-recorder = {version = ">=0.0.8", optional = true} langchainhub = ">=0.1" faiss-cpu = ">=1.8" +pymysql = "^1.1.1" [tool.poetry.extras] contrib = [ From 9bf2495c96ff256df417a0e9bc245f340dbb3622 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 19:03:32 -0700 Subject: [PATCH 23/44] fix: set default values for DB connection --- openssa/core/resource/db.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index cee9f369b..860e8a060 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -30,11 +30,11 @@ def __init__(self): } def create_engine(self): - username = os.getenv('DB_USERNAME') - password = os.getenv('DB_PASSWORD') - host = os.getenv('DB_HOST') + username = os.getenv('DB_USERNAME', 'root') + password = os.getenv('DB_PASSWORD', '') + host = os.getenv('DB_HOST', 'localhost') port = os.getenv('DB_PORT', '3306') - database = os.getenv('DB_NAME') + database = os.getenv('DB_NAME', 'test') connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' return create_engine(connection_string) From 2030be43e2020da7059eaca82118b4c08b71af01 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 19:18:08 -0700 Subject: [PATCH 24/44] ci: add MySQL to Mac --- .../workflows/install-lint-test-on-mac.yml | 28 ++++++++++++++++++- openssa/core/resource/db.py | 6 ++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/.github/workflows/install-lint-test-on-mac.yml b/.github/workflows/install-lint-test-on-mac.yml index 515a447b9..ad04f55c8 100644 --- a/.github/workflows/install-lint-test-on-mac.yml +++ b/.github/workflows/install-lint-test-on-mac.yml @@ -15,10 +15,25 @@ jobs: strategy: matrix: - python-version: # github.com/actions/python-versions/releases + python-version: # github.com/actions/python-versions/releases - 3.12 # - 3.13 + services: + mysql: + image: mysql:9.1.0 + env: + MYSQL_USER: root + MYSQL_PASSWORD: password + MYSQL_DATABASE: test + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping --silent" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + steps: - name: Checkout Repo uses: actions/checkout@v4 # github.com/actions/checkout @@ -34,6 +49,13 @@ jobs: - name: Install Package & Dependencies run: make install + - name: Wait for MySQL to be ready + run: | + until mysqladmin ping -h 127.0.0.1 --silent; do + echo "Waiting for MySQL..." + sleep 5 + done + - name: Lint Code run: make lint @@ -42,3 +64,7 @@ jobs: env: LEPTON_API_KEY: ${{ secrets.LEPTON_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MYSQL_USER: root + MYSQL_PASSWORD: password + MYSQL_DATABASE: test + DB_PORT: 3306 diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 860e8a060..09ec379fc 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -30,11 +30,11 @@ def __init__(self): } def create_engine(self): - username = os.getenv('DB_USERNAME', 'root') - password = os.getenv('DB_PASSWORD', '') + username = os.getenv('DB_USERNAME') + password = os.getenv('DB_PASSWORD') host = os.getenv('DB_HOST', 'localhost') port = os.getenv('DB_PORT', '3306') - database = os.getenv('DB_NAME', 'test') + database = os.getenv('DB_NAME') connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' return create_engine(connection_string) From 16220cd2b73d2c416df6e1bb0b4748b24cd6c755 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 19:23:54 -0700 Subject: [PATCH 25/44] ci: install Docker for pulling MySQL --- .github/workflows/install-lint-test-on-mac.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/install-lint-test-on-mac.yml b/.github/workflows/install-lint-test-on-mac.yml index ad04f55c8..b75f317bc 100644 --- a/.github/workflows/install-lint-test-on-mac.yml +++ b/.github/workflows/install-lint-test-on-mac.yml @@ -43,6 +43,16 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Install Docker + run: | + brew install --cask docker + open /Applications/Docker.app + # Wait for Docker to start + while ! docker system info; do + echo "Waiting for Docker to start..." + sleep 5 + done + - name: Install Poetry run: make get-poetry From a5a1a8ab930af847cd1005bf5726ec2142e3306f Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 19:31:41 -0700 Subject: [PATCH 26/44] ci: setup Docker Buildx --- .github/workflows/install-lint-test-on-mac.yml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/install-lint-test-on-mac.yml b/.github/workflows/install-lint-test-on-mac.yml index b75f317bc..be3f9f56a 100644 --- a/.github/workflows/install-lint-test-on-mac.yml +++ b/.github/workflows/install-lint-test-on-mac.yml @@ -43,15 +43,8 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install Docker - run: | - brew install --cask docker - open /Applications/Docker.app - # Wait for Docker to start - while ! docker system info; do - echo "Waiting for Docker to start..." - sleep 5 - done + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 - name: Install Poetry run: make get-poetry @@ -77,4 +70,3 @@ jobs: MYSQL_USER: root MYSQL_PASSWORD: password MYSQL_DATABASE: test - DB_PORT: 3306 From 4921af533658b7deb1f20f901b61c17d5ae0d89c Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:08:39 -0700 Subject: [PATCH 27/44] ci: setup MySQL on Ubuntu --- .../workflows/install-lint-test-on-ubuntu.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 880056400..e4bcaf227 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -19,6 +19,21 @@ jobs: - 3.12 # - 3.13 + services: + mysql: + image: mysql:latest + env: + MYSQL_USER: root + MYSQL_PASSWORD: password + MYSQL_DATABASE: test + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping --silent" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + steps: - name: Checkout Repo uses: actions/checkout@v4 # github.com/actions/checkout @@ -42,3 +57,6 @@ jobs: env: LEPTON_API_KEY: ${{ secrets.LEPTON_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MYSQL_USER: root + MYSQL_PASSWORD: password + MYSQL_DATABASE: test From 9fbf2cea86dac4a667cb1d06329a9829a367815e Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:10:53 -0700 Subject: [PATCH 28/44] ci: wait for MySQL to be ready --- .github/workflows/install-lint-test-on-ubuntu.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index e4bcaf227..d67c86571 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -49,6 +49,13 @@ jobs: - name: Install Package & Dependencies run: make install + - name: Wait for MySQL to be ready + run: | + until mysqladmin ping -h 127.0.0.1 --silent; do + echo "Waiting for MySQL..." + sleep 5 + done + - name: Lint Code run: make lint From e9ee6629d8f2835aa72cbfc8cde86e57eb0c81bc Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:15:21 -0700 Subject: [PATCH 29/44] ci: use random root password for MySQL --- .github/workflows/install-lint-test-on-ubuntu.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index d67c86571..1f2e3f842 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -23,6 +23,7 @@ jobs: mysql: image: mysql:latest env: + MYSQL_RANDOM_ROOT_PASSWORD: "yes" MYSQL_USER: root MYSQL_PASSWORD: password MYSQL_DATABASE: test From 965881c43f2dbe549d781f41327f3f06570f4f76 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:19:25 -0700 Subject: [PATCH 30/44] ci: remove MYSQL_USER="root" --- .github/workflows/install-lint-test-on-ubuntu.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 1f2e3f842..18d2efe7e 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -24,7 +24,6 @@ jobs: image: mysql:latest env: MYSQL_RANDOM_ROOT_PASSWORD: "yes" - MYSQL_USER: root MYSQL_PASSWORD: password MYSQL_DATABASE: test ports: From 78f0789cffd8aa94d83b67834ca58638debbb4a0 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:22:23 -0700 Subject: [PATCH 31/44] ci: remove MYSQL_USER="root" in test env --- .github/workflows/install-lint-test-on-ubuntu.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 18d2efe7e..28a509bfc 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -64,6 +64,5 @@ jobs: env: LEPTON_API_KEY: ${{ secrets.LEPTON_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - MYSQL_USER: root MYSQL_PASSWORD: password MYSQL_DATABASE: test From e44b6abce480c64904582f652d84e914dd892c6b Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:35:16 -0700 Subject: [PATCH 32/44] ci: use empty password for MySQL --- .github/workflows/install-lint-test-on-ubuntu.yml | 8 ++++---- openssa/core/resource/db.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 28a509bfc..9ed338d18 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -23,8 +23,8 @@ jobs: mysql: image: mysql:latest env: - MYSQL_RANDOM_ROOT_PASSWORD: "yes" - MYSQL_PASSWORD: password + MYSQL_ALLOW_EMPTY_PASSWORD: "yes" + MYSQL_USER: root MYSQL_DATABASE: test ports: - 3306:3306 @@ -64,5 +64,5 @@ jobs: env: LEPTON_API_KEY: ${{ secrets.LEPTON_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - MYSQL_PASSWORD: password - MYSQL_DATABASE: test + DB_NAME: test + DB_USERNAME: root diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 09ec379fc..ea6548a32 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -31,7 +31,7 @@ def __init__(self): def create_engine(self): username = os.getenv('DB_USERNAME') - password = os.getenv('DB_PASSWORD') + password = os.getenv('DB_PASSWORD', '') host = os.getenv('DB_HOST', 'localhost') port = os.getenv('DB_PORT', '3306') database = os.getenv('DB_NAME') From 0a0e0487d4aff5b366e2543568486d0a8a6916d9 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:38:48 -0700 Subject: [PATCH 33/44] ci: remove MYSQL_USER="root" if using MYSQL_ALLOW_EMPTY_PASSWORD --- .github/workflows/install-lint-test-on-ubuntu.yml | 2 -- openssa/core/resource/db.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 9ed338d18..0c831d51f 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -24,7 +24,6 @@ jobs: image: mysql:latest env: MYSQL_ALLOW_EMPTY_PASSWORD: "yes" - MYSQL_USER: root MYSQL_DATABASE: test ports: - 3306:3306 @@ -65,4 +64,3 @@ jobs: LEPTON_API_KEY: ${{ secrets.LEPTON_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} DB_NAME: test - DB_USERNAME: root diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index ea6548a32..86f62b98a 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -30,7 +30,7 @@ def __init__(self): } def create_engine(self): - username = os.getenv('DB_USERNAME') + username = os.getenv('DB_USERNAME', 'root') password = os.getenv('DB_PASSWORD', '') host = os.getenv('DB_HOST', 'localhost') port = os.getenv('DB_PORT', '3306') From 35063d01f2ef34b3261cd803ca5c1f560ab85593 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:49:50 -0700 Subject: [PATCH 34/44] ci: run db migrations before testing --- .github/workflows/install-lint-test-on-ubuntu.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 0c831d51f..d0d25ccfd 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -55,6 +55,12 @@ jobs: sleep 5 done + - name: Run Migrations + run: | + python -m alembic upgrade head + env: + DATABASE_URL: mysql://root@127.0.0.1/test + - name: Lint Code run: make lint From 7cdbeae3cefc8a5a9433705af063ca343ebc4741 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 20:59:57 -0700 Subject: [PATCH 35/44] build: add alembic --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 822130408..a98702865 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,6 +103,7 @@ streamlit-mic-recorder = {version = ">=0.0.8", optional = true} langchainhub = ">=0.1" faiss-cpu = ">=1.8" pymysql = "^1.1.1" +alembic = "^1.13.3" [tool.poetry.extras] contrib = [ From 54135fec86e69821df2af0dc1cd4c55a33c2c36a Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 21:05:19 -0700 Subject: [PATCH 36/44] test: add alembic --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a98702865..b14b6f5bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ ruff = ">=0.6" [tool.poetry.group.test.dependencies] pytest = ">=8.3" +alembic = "^1.13.3" [tool.poetry.dependencies] python = ">=3.12,<3.14" @@ -103,7 +104,6 @@ streamlit-mic-recorder = {version = ">=0.0.8", optional = true} langchainhub = ">=0.1" faiss-cpu = ">=1.8" pymysql = "^1.1.1" -alembic = "^1.13.3" [tool.poetry.extras] contrib = [ From ff1df646a6295c769b9c482d3f11dd2548f93bfc Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 21:14:12 -0700 Subject: [PATCH 37/44] ci: install alembic --- .github/workflows/install-lint-test-on-ubuntu.yml | 3 +++ pyproject.toml | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index d0d25ccfd..ad27d6235 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -55,6 +55,9 @@ jobs: sleep 5 done + - name: Install Alembic + run: pip install alembic + - name: Run Migrations run: | python -m alembic upgrade head diff --git a/pyproject.toml b/pyproject.toml index b14b6f5bd..822130408 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,6 @@ ruff = ">=0.6" [tool.poetry.group.test.dependencies] pytest = ">=8.3" -alembic = "^1.13.3" [tool.poetry.dependencies] python = ">=3.12,<3.14" From d042cf5b52b92de3d8b31634ad649d41977d2d7f Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 21:21:59 -0700 Subject: [PATCH 38/44] ci: init alembic --- .github/workflows/install-lint-test-on-ubuntu.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index ad27d6235..40ba5853e 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -58,6 +58,9 @@ jobs: - name: Install Alembic run: pip install alembic + - name: Initialize Alembic + run: alembic init alembic + - name: Run Migrations run: | python -m alembic upgrade head From e6e6426efcfefd91caca36a25147bd9eb57a9001 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 21:30:55 -0700 Subject: [PATCH 39/44] ci: install MySQL driver --- .github/workflows/install-lint-test-on-ubuntu.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 40ba5853e..fbccbfdbd 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -55,6 +55,9 @@ jobs: sleep 5 done + - name: Install MySQL Driver + run: pip install pymysql + - name: Install Alembic run: pip install alembic From a2edf10b649df8fd47014ef618feb30485069d33 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 21:40:18 -0700 Subject: [PATCH 40/44] ci: use pymysql when running db migration --- .github/workflows/install-lint-test-on-ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index fbccbfdbd..2114ae6be 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -68,7 +68,7 @@ jobs: run: | python -m alembic upgrade head env: - DATABASE_URL: mysql://root@127.0.0.1/test + DATABASE_URL: mysql+pymysql://root:@127.0.0.1/test - name: Lint Code run: make lint From cff10f686c0bb18417ba67d5263107c185248189 Mon Sep 17 00:00:00 2001 From: An Phan Date: Fri, 25 Oct 2024 22:10:14 -0700 Subject: [PATCH 41/44] ci: no password in db connection --- .github/workflows/install-lint-test-on-ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 2114ae6be..906182004 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -68,7 +68,7 @@ jobs: run: | python -m alembic upgrade head env: - DATABASE_URL: mysql+pymysql://root:@127.0.0.1/test + DATABASE_URL: mysql+pymysql://root@127.0.0.1/test - name: Lint Code run: make lint From ef5aa89ab45969b526d310443ef737b61ff663a3 Mon Sep 17 00:00:00 2001 From: An Phan Date: Sat, 26 Oct 2024 20:46:23 -0700 Subject: [PATCH 42/44] ci: add alembic.ini --- .github/workflows/alembic.ini | 85 +++++++++++++++++++ .../workflows/install-lint-test-on-ubuntu.yml | 8 +- 2 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/alembic.ini diff --git a/.github/workflows/alembic.ini b/.github/workflows/alembic.ini new file mode 100644 index 000000000..d515de166 --- /dev/null +++ b/.github/workflows/alembic.ini @@ -0,0 +1,85 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = mysql+pymysql://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks=black +# black.type=console_scripts +# black.entrypoint=black +# black.options=-l 79 + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 906182004..5d0323edc 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -55,14 +55,12 @@ jobs: sleep 5 done - - name: Install MySQL Driver - run: pip install pymysql - - name: Install Alembic run: pip install alembic - - name: Initialize Alembic - run: alembic init alembic + - name: Set up Alembic Configuration + run: | + cp .github/workflows/alembic.ini . - name: Run Migrations run: | From aaba05dbcabcb868a9e807cdf61a87b89705a844 Mon Sep 17 00:00:00 2001 From: An Phan Date: Sat, 26 Oct 2024 21:17:01 -0700 Subject: [PATCH 43/44] fix: put alembic.ini in `openssa` --- .github/workflows/install-lint-test-on-ubuntu.yml | 4 ---- {.github/workflows => openssa}/alembic.ini | 0 2 files changed, 4 deletions(-) rename {.github/workflows => openssa}/alembic.ini (100%) diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 5d0323edc..250b1d2eb 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -58,10 +58,6 @@ jobs: - name: Install Alembic run: pip install alembic - - name: Set up Alembic Configuration - run: | - cp .github/workflows/alembic.ini . - - name: Run Migrations run: | python -m alembic upgrade head diff --git a/.github/workflows/alembic.ini b/openssa/alembic.ini similarity index 100% rename from .github/workflows/alembic.ini rename to openssa/alembic.ini From 3ea10b734b1506df6f75f8a43f6be93c0a87656d Mon Sep 17 00:00:00 2001 From: An Phan Date: Sat, 26 Oct 2024 21:28:02 -0700 Subject: [PATCH 44/44] fix: put alembic.ini in root --- openssa/alembic.ini => alembic.ini | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename openssa/alembic.ini => alembic.ini (100%) diff --git a/openssa/alembic.ini b/alembic.ini similarity index 100% rename from openssa/alembic.ini rename to alembic.ini