-
Notifications
You must be signed in to change notification settings - Fork 9
ML/LlamaIndex: Add software tests and CI configuration #707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| name: LlamaIndex | ||
|
|
||
| on: | ||
| pull_request: | ||
| branches: ~ | ||
| paths: | ||
| - '.github/workflows/ml-llamaindex.yml' | ||
| - 'topic/machine-learning/llama-index/**' | ||
| - '/requirements.txt' | ||
| push: | ||
| branches: [ main ] | ||
| paths: | ||
| - '.github/workflows/ml-llamaindex.yml' | ||
| - 'topic/machine-learning/llama-index/**' | ||
| - '/requirements.txt' | ||
|
|
||
| # Allow job to be triggered manually. | ||
| workflow_dispatch: | ||
|
|
||
| # Run job each night after CrateDB nightly has been published. | ||
| schedule: | ||
| - cron: '0 3 * * *' | ||
|
|
||
| # Cancel in-progress jobs when pushing to the same branch. | ||
| concurrency: | ||
| cancel-in-progress: true | ||
| group: ${{ github.workflow }}-${{ github.ref }} | ||
|
|
||
| jobs: | ||
| test: | ||
| name: " | ||
| Python: ${{ matrix.python-version }} | ||
| CrateDB: ${{ matrix.cratedb-version }} | ||
| on ${{ matrix.os }}" | ||
| runs-on: ${{ matrix.os }} | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| os: [ | ||
| 'ubuntu-latest', | ||
| ] | ||
| python-version: [ | ||
| '3.8', | ||
| '3.13', | ||
| ] | ||
| cratedb-version: [ 'nightly' ] | ||
|
|
||
| services: | ||
| cratedb: | ||
| image: crate/crate:${{ matrix.cratedb-version }} | ||
| ports: | ||
| - 4200:4200 | ||
| - 5432:5432 | ||
| env: | ||
| CRATE_HEAP_SIZE: 4g | ||
|
|
||
| env: | ||
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||
|
|
||
| steps: | ||
|
|
||
| - name: Acquire sources | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Set up Python | ||
| uses: actions/setup-python@v5 | ||
| with: | ||
| python-version: ${{ matrix.python-version }} | ||
| architecture: x64 | ||
| cache: 'pip' | ||
| cache-dependency-path: | | ||
| requirements.txt | ||
| topic/machine-learning/llama-index/requirements.txt | ||
| topic/machine-learning/llama-index/requirements-dev.txt | ||
|
|
||
| - name: Install utilities | ||
| run: | | ||
| pip install -r requirements.txt | ||
|
|
||
| - name: Validate topic/machine-learning/llama-index | ||
| run: | | ||
| ngr test --accept-no-venv topic/machine-learning/llama-index |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| # OPENAI_API_KEY=sk-XJZ7pfog5Gp8Kus8D--invalid--0CJ5lyAKSefZLaV1Y9S1 | ||
| OPENAI_API_TYPE=openai | ||
| CRATEDB_SQLALCHEMY_URL="crate://crate@localhost:4200/" | ||
| CRATEDB_TABLE_NAME=time_series_data |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| CREATE TABLE IF NOT EXISTS time_series_data ( | ||
| timestamp TIMESTAMP, | ||
| value DOUBLE, | ||
| location STRING, | ||
| sensor_id INT | ||
| ); | ||
|
|
||
| INSERT INTO time_series_data (timestamp, value, location, sensor_id) | ||
| VALUES | ||
| ('2023-09-14T00:00:00', 10.5, 'Sensor A', 1), | ||
| ('2023-09-14T01:00:00', 15.2, 'Sensor A', 1), | ||
| ('2023-09-14T02:00:00', 18.9, 'Sensor A', 1), | ||
| ('2023-09-14T03:00:00', 12.7, 'Sensor B', 2), | ||
| ('2023-09-14T04:00:00', 17.3, 'Sensor B', 2), | ||
| ('2023-09-14T05:00:00', 20.1, 'Sensor B', 2), | ||
| ('2023-09-14T06:00:00', 22.5, 'Sensor A', 1), | ||
| ('2023-09-14T07:00:00', 18.3, 'Sensor A', 1), | ||
| ('2023-09-14T08:00:00', 16.8, 'Sensor A', 1), | ||
| ('2023-09-14T09:00:00', 14.6, 'Sensor B', 2), | ||
| ('2023-09-14T10:00:00', 13.2, 'Sensor B', 2), | ||
| ('2023-09-14T11:00:00', 11.7, 'Sensor B', 2); | ||
|
|
||
| REFRESH TABLE time_series_data; | ||
|
Comment on lines
+1
to
+23
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's keep this here as it lines up better with the format of the tutorial on Discourse. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,59 +1,92 @@ | ||
| """ Example code using Azure Open AI and llama-index. """ | ||
| """ | ||
| Use an LLM to query a database in human language. | ||
| Example code using LlamaIndex with vanilla Open AI and Azure Open AI. | ||
| """ | ||
|
|
||
| import os | ||
| import openai | ||
| import sqlalchemy as sa | ||
|
|
||
| from dotenv import load_dotenv | ||
| from langchain_openai import AzureOpenAIEmbeddings | ||
| from langchain_openai import OpenAIEmbeddings | ||
| from llama_index.llms.azure_openai import AzureOpenAI | ||
| from llama_index.llms.openai import OpenAI | ||
| from llama_index.embeddings.langchain import LangchainEmbedding | ||
| from llama_index.core.utilities.sql_wrapper import SQLDatabase | ||
| from llama_index.core.query_engine import NLSQLTableQueryEngine | ||
| from llama_index.core import Settings | ||
|
|
||
| if __name__ == "__main__": | ||
| load_dotenv() | ||
|
|
||
| def configure_llm(): | ||
| """ | ||
| Configure LLM. Use either vanilla Open AI, or Azure Open AI. | ||
| """ | ||
|
|
||
| openai.api_type = os.getenv("OPENAI_API_TYPE") | ||
| openai.azure_endpoint = os.getenv("OPENAI_AZURE_ENDPOINT") | ||
| openai.api_version = os.getenv("OPENAI_AZURE_API_VERSION") | ||
| openai.api_key = os.getenv("OPENAI_API_KEY") | ||
|
|
||
| llm = AzureOpenAI( | ||
| engine=os.getenv("LLM_INSTANCE"), | ||
| azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
| api_key = os.getenv("OPENAI_API_KEY"), | ||
| api_version = os.getenv("OPENAI_AZURE_API_VERSION"), | ||
| temperature=0.0 | ||
| ) | ||
| if openai.api_type == "openai": | ||
| llm = OpenAI( | ||
| api_key=os.getenv("OPENAI_API_KEY"), | ||
| temperature=0.0 | ||
| ) | ||
| elif openai.api_type == "azure": | ||
| llm = AzureOpenAI( | ||
| engine=os.getenv("LLM_INSTANCE"), | ||
| azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
| api_key = os.getenv("OPENAI_API_KEY"), | ||
| api_version = os.getenv("OPENAI_AZURE_API_VERSION"), | ||
| temperature=0.0 | ||
| ) | ||
| else: | ||
| raise ValueError(f"Open AI API type not defined or invalid: {openai.api_type}") | ||
|
|
||
| Settings.llm = llm | ||
| Settings.embed_model = LangchainEmbedding( | ||
| AzureOpenAIEmbeddings( | ||
| azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
| model=os.getenv("EMBEDDING_MODEL_INSTANCE") | ||
| if openai.api_type == "openai": | ||
| Settings.embed_model = LangchainEmbedding(OpenAIEmbeddings()) | ||
| elif openai.api_type == "azure": | ||
| Settings.embed_model = LangchainEmbedding( | ||
| AzureOpenAIEmbeddings( | ||
| azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
| model=os.getenv("EMBEDDING_MODEL_INSTANCE") | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| print("Creating SQLAlchemy engine...") | ||
| engine_crate = sa.create_engine(os.getenv("CRATEDB_URL")) | ||
| print("Connecting to CrateDB...") | ||
|
|
||
| def main(): | ||
| """ | ||
| Use an LLM to query a database in human language. | ||
| """ | ||
|
|
||
| # Configure application. | ||
| load_dotenv() | ||
| configure_llm() | ||
|
|
||
| # Configure database connection and query engine. | ||
| print("Connecting to CrateDB") | ||
| engine_crate = sa.create_engine(os.getenv("CRATEDB_SQLALCHEMY_URL")) | ||
| engine_crate.connect() | ||
| print("Creating SQLDatabase instance...") | ||
|
|
||
| print("Creating LlamaIndex QueryEngine") | ||
| sql_database = SQLDatabase(engine_crate, include_tables=[os.getenv("CRATEDB_TABLE_NAME")]) | ||
| print("Creating QueryEngine...") | ||
| query_engine = NLSQLTableQueryEngine( | ||
| sql_database=sql_database, | ||
| tables=[os.getenv("CRATEDB_TABLE_NAME")], | ||
| llm = llm | ||
| llm=Settings.llm | ||
| ) | ||
|
|
||
| print("Running query...") | ||
|
|
||
| # Invoke an inquiry. | ||
| print("Running query") | ||
| QUERY_STR = "What is the average value for sensor 1?" | ||
| answer = query_engine.query(QUERY_STR) | ||
| print(answer.get_formatted_sources()) | ||
| print("Query was:", QUERY_STR) | ||
| print("Answer was:", answer) | ||
| print(answer.metadata) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| [tool.pytest.ini_options] | ||
| minversion = "2.0" | ||
| addopts = """ | ||
| -rfEX -p pytester --strict-markers --verbosity=3 --capture=no | ||
| --cov=. --cov-report=term-missing --cov-report=xml | ||
| """ | ||
|
|
||
| #log_level = "DEBUG" | ||
| #log_cli_level = "DEBUG" | ||
|
|
||
| testpaths = [ | ||
| "*.py", | ||
| ] | ||
| xfail_strict = true | ||
| markers = [ | ||
| ] | ||
|
|
||
| [tool.coverage.run] | ||
| branch = false | ||
|
|
||
| [tool.coverage.report] | ||
| fail_under = 0 | ||
| show_missing = true | ||
| omit = [ | ||
| "conftest.py", | ||
| "test*.py", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| cratedb-toolkit | ||
| pueblo[testing] | ||
| sqlparse |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CRATEDB_SQLALCHEMY_URLis the designated environment variable name for the SQLAlchemy connection string, contrary toCRATEDB_HTTP_URL, which is suitable for the Python DB API orcrash, for example.In order to not use different environment variables on our tutorials and educational material, let's adjust this to adhere to that convention.