Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,19 @@ updates:
schedule:
interval: "weekly"

# Frameworks.
# Topics.

- directory: "/framework/langchain"
- directory: "/topic/machine-learning/classification-automl"
package-ecosystem: "pip"
schedule:
interval: "weekly"

- directory: "/framework/mlflow"
- directory: "/topic/machine-learning/llm-langchain"
package-ecosystem: "pip"
schedule:
interval: "weekly"

- directory: "/topic/machine-learning/timeseries-basics"
package-ecosystem: "pip"
schedule:
interval: "weekly"
Expand Down
75 changes: 75 additions & 0 deletions .github/workflows/test-automl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: AutoML

on:
pull_request:
branches: ~
paths:
- '.github/workflows/test-automl.yml'
- 'topic/machine-learning/classification-automl/**'
- 'requirements.txt'
push:
branches: [ main ]
paths:
- '.github/workflows/test-automl.yml'
- 'topic/machine-learning/classification-automl/**'
- 'requirements.txt'

# Allow job to be triggered manually.
workflow_dispatch:

# Run job each night after CrateDB nightly has been published.
schedule:
- cron: '0 3 * * *'

# Cancel in-progress jobs when pushing to the same branch.
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}

jobs:
test:
name: "
Python: ${{ matrix.python-version }}
CrateDB: ${{ matrix.cratedb-version }}
on ${{ matrix.os }}"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ 'ubuntu-latest' ]
python-version: [ '3.10' ]
cratedb-version: [ 'nightly' ]

services:
cratedb:
image: crate/crate:nightly
ports:
- 4200:4200
- 5432:5432

env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

steps:

- name: Acquire sources
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64
cache: 'pip'
cache-dependency-path: |
requirements.txt
topic/machine-learning/classification-automl/requirements.txt
topic/machine-learning/classification-automl/requirements-dev.txt

- name: Install utilities
run: |
pip install -r requirements.txt

- name: Validate topic/machine-learning/classification-automl
run: |
ngr test --accept-no-venv topic/machine-learning/classification-automl
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pueblo[ngr]==0.0.2
pueblo[ngr]==0.0.3

# Development.
# pueblo[ngr] @ git+https://github.com/pyveci/pueblo.git@develop
3 changes: 3 additions & 0 deletions topic/machine-learning/classification-automl/backlog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Backlog

- Describe / program how to import `churn-dataset.csv`.
45 changes: 45 additions & 0 deletions topic/machine-learning/classification-automl/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[tool.pytest.ini_options]
minversion = "2.0"
addopts = """
-rfEX -p pytester --strict-markers --verbosity=3 --capture=no
"""
# --cov=. --cov-report=term-missing --cov-report=xml
env = [
"CRATEDB_CONNECTION_STRING=crate://crate@localhost/?schema=testdrive",
"PYDEVD_DISABLE_FILE_VALIDATION=1",
]

#log_level = "DEBUG"
#log_cli_level = "DEBUG"

testpaths = [
"*.py",
]
xfail_strict = true
markers = [
]

# pytest-notebook settings
nb_test_files = true
nb_coverage = true
nb_diff_replace = [
# Compensate output of `crash`.
'"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
]
# `vector_search.py` does not include any output(s).
nb_diff_ignore = [
"/metadata/language_info",
"/cells/*/execution_count",
"/cells/*/outputs/*/execution_count",
]

[tool.coverage.run]
branch = false

[tool.coverage.report]
fail_under = 0
show_missing = true
omit = [
"conftest.py",
"test*.py",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
coverage~=7.3
ipykernel
pytest<8
pytest-cov<5
pytest-env<2
pytest-notebook<0.9
117 changes: 117 additions & 0 deletions topic/machine-learning/classification-automl/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import importlib
import io
import sys
from pathlib import Path
from unittest import mock

import pytest
from _pytest.python import Function

HERE = Path(__file__).parent


def list_files(path: Path, pattern: str):
"""
Enumerate all files in given directory.
"""
files = path.glob(pattern)
files = [item.relative_to(path) for item in files]
return files


def list_notebooks(path: Path):
"""
Enumerate all Jupyter Notebook files found in given directory.
"""
return list_files(path, "**/*.ipynb")


def list_pyfiles(path: Path):
"""
Enumerate all regular Python files found in given directory.
"""
pyfiles = []
for item in list_files(path, "**/*.py"):
if item.suffix != ".py" or item.name in ["conftest.py"] or item.name.startswith("test"):
continue
pyfiles.append(item)
return pyfiles


def str_list(things):
"""
Converge list to list of strings.
"""
return map(str, things)


@pytest.fixture(scope="function", autouse=True)
def db_init():
"""
Initialize database.
"""
run_sql(statement="DROP TABLE IF EXISTS pycaret_churn;")


def db_provision_churn_dataset():
"""
Provision database.
"""
# FIXME: `import_csv` is not defined.
import_csv(file=HERE / "churn-dataset.csv")
run_sql(statement="REFRESH TABLE churn_dataset;")


def run_sql(statement: str = None, file: str = None):
"""
Run SQL from string or file.
"""
import crate.crash.command
sys.argv = ["foo", "--schema=testdrive"]
if statement:
sys.argv += ["--command", statement]
if file:
sys.stdin = io.StringIO(Path(file).read_text())
with \
mock.patch("crate.crash.repl.SQLCompleter._populate_keywords"), \
mock.patch("crate.crash.command.CrateShell.close"):
try:
crate.crash.command.main()
except SystemExit as ex:
if ex.code != 0:
raise


@pytest.mark.parametrize("notebook", str_list(list_notebooks(HERE)))
def test_notebook(request, notebook: str):
"""
From individual Jupyter Notebook file, collect cells as pytest
test cases, and run them.

Not using `NBRegressionFixture`, because it would manually need to be configured.
"""
from _pytest._py.path import LocalPath
from pytest_notebook.plugin import pytest_collect_file
tests = pytest_collect_file(LocalPath(notebook), request.node)
for test in tests.collect():
test.runtest()


@pytest.mark.parametrize("pyfile", str_list(list_pyfiles(HERE)))
def test_file(request, pyfile: Path):
"""
From individual Python file, collect and wrap the `main` function into a test case.
"""

# TODO: Make configurable.
entrypoint_symbol = "main"

# `.py` file needs provisioning.
if str(pyfile).endswith(".py"):
db_provision_churn_dataset()

path = Path(pyfile)
mod = importlib.import_module(path.stem)
fun = getattr(mod, entrypoint_symbol)
f = Function.from_parent(request.node, name="main", callobj=fun)
f.runtest()