From 5528d5a32e4dc52daa5a82890ee55ea5dadb7059 Mon Sep 17 00:00:00 2001
From: Linus Seelinger <linus.seelinger@simulation.science>
Date: Mon, 10 Nov 2025 12:53:23 +0100
Subject: [PATCH 1/4] fix: switch MLflow tests from deprecated file backend to
 sqlite

---
 tesseract_core/runtime/mpa.py   |  6 ++++--
 tests/runtime_tests/test_mpa.py | 15 ++++++---------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/tesseract_core/runtime/mpa.py b/tesseract_core/runtime/mpa.py
index bbcbe89d..70a7c984 100644
--- a/tesseract_core/runtime/mpa.py
+++ b/tesseract_core/runtime/mpa.py
@@ -146,13 +146,15 @@ def __init__(self, base_dir: Optional[str] = None) -> None:
         tracking_uri = config.mlflow_tracking_uri
 
         if not tracking_uri.startswith(("http://", "https://")):
-            # If it's a file URI, convert to local path
-            tracking_uri = tracking_uri.replace("file://", "")
+            # If it's a db file URI, convert to local path
+            tracking_uri = tracking_uri.replace("sqlite://", "")
 
             # Relative paths are resolved against the base output path
             if not Path(tracking_uri).is_absolute():
                 tracking_uri = (Path(get_config().output_path) / tracking_uri).resolve()
 
+            tracking_uri = f"sqlite://{tracking_uri}"
+
         mlflow.set_tracking_uri(tracking_uri)
 
     def _ensure_mlflow_reachable(self) -> None:
diff --git a/tests/runtime_tests/test_mpa.py b/tests/runtime_tests/test_mpa.py
index 1ca46fcc..276f2510 100644
--- a/tests/runtime_tests/test_mpa.py
+++ b/tests/runtime_tests/test_mpa.py
@@ -141,8 +141,8 @@ def test_log_artifact_missing_file():
 def test_mlflow_backend_creation(tmpdir):
     """Test that MLflowBackend is created when mlflow_tracking_uri is set."""
     pytest.importorskip("mlflow")  # Skip if MLflow is not installed
-    mlflow_dir = tmpdir / "mlflow_backend_test"
-    update_config(mlflow_tracking_uri=f"file://{mlflow_dir}")
+    mlflow_db_file = tmpdir / "mlflow.db"
+    update_config(mlflow_tracking_uri=f"sqlite:///{mlflow_db_file}")
     backend = mpa._create_backend(None)
     assert isinstance(backend, mpa.MLflowBackend)
 
@@ -150,8 +150,8 @@ def test_mlflow_backend_creation(tmpdir):
 def test_mlflow_log_calls(tmpdir):
     """Test MLflow backend logging functions with temporary directory."""
     pytest.importorskip("mlflow")  # Skip if MLflow is not installed
-    mlflow_dir = tmpdir / "mlflow_logging_test"
-    update_config(mlflow_tracking_uri=f"file://{mlflow_dir}")
+    mlflow_db_file = tmpdir / "mlflow.db"
+    update_config(mlflow_tracking_uri=f"sqlite:///{mlflow_db_file}")
 
     with start_run():
         log_parameter("model_type", "neural_network")
@@ -164,8 +164,5 @@ def test_mlflow_log_calls(tmpdir):
         artifact_file.write_text("Test content", encoding="utf-8")
         log_artifact(str(artifact_file))
 
-    # Verify MLflow directory structure was created
-    assert mlflow_dir.exists()
-    # MLflow creates experiment directories, so we should see some structure
-    mlflow_contents = list(mlflow_dir.listdir())
-    assert len(mlflow_contents) > 0
+    # Verify MLflow database file was created
+    assert mlflow_db_file.exists()

From 9db8bc76048288289130a9b267ad69c608f78441 Mon Sep 17 00:00:00 2001
From: Linus Seelinger <linus.seelinger@simulation.science>
Date: Mon, 10 Nov 2025 13:02:35 +0100
Subject: [PATCH 2/4] Fix sqlite prefix

---
 tesseract_core/runtime/mpa.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tesseract_core/runtime/mpa.py b/tesseract_core/runtime/mpa.py
index 70a7c984..fe3a0b5f 100644
--- a/tesseract_core/runtime/mpa.py
+++ b/tesseract_core/runtime/mpa.py
@@ -147,13 +147,13 @@ def __init__(self, base_dir: Optional[str] = None) -> None:
 
         if not tracking_uri.startswith(("http://", "https://")):
             # If it's a db file URI, convert to local path
-            tracking_uri = tracking_uri.replace("sqlite://", "")
+            tracking_uri = tracking_uri.replace("sqlite:///", "")
 
             # Relative paths are resolved against the base output path
             if not Path(tracking_uri).is_absolute():
                 tracking_uri = (Path(get_config().output_path) / tracking_uri).resolve()
 
-            tracking_uri = f"sqlite://{tracking_uri}"
+            tracking_uri = f"sqlite:///{tracking_uri}"
 
         mlflow.set_tracking_uri(tracking_uri)
 

From f1c99c76ec25ed90c56adfda34db7276084a6ff5 Mon Sep 17 00:00:00 2001
From: Linus Seelinger <linus.seelinger@simulation.science>
Date: Mon, 10 Nov 2025 15:34:53 +0100
Subject: [PATCH 3/4] Update mlflow e2e test

---
 tests/endtoend_tests/test_endtoend.py | 41 +++------------------------
 1 file changed, 4 insertions(+), 37 deletions(-)

diff --git a/tests/endtoend_tests/test_endtoend.py b/tests/endtoend_tests/test_endtoend.py
index ca255f23..d287221e 100644
--- a/tests/endtoend_tests/test_endtoend.py
+++ b/tests/endtoend_tests/test_endtoend.py
@@ -1107,7 +1107,7 @@ def test_mpa_mlflow_backend(mpa_test_image, tmpdir):
         "tesseract",
         "run",
         "--env",
-        "TESSERACT_MLFLOW_TRACKING_URI=mlruns",
+        "TESSERACT_MLFLOW_TRACKING_URI=mlflow.db",
         mpa_test_image,
         "apply",
         '{"inputs": {}}',
@@ -1122,42 +1122,9 @@ def test_mpa_mlflow_backend(mpa_test_image, tmpdir):
     )
     assert run_res.returncode == 0, run_res.stderr
 
-    # Check for mlruns directory structure
-    mlruns_dir = Path(tmpdir) / "mlruns"
-    assert mlruns_dir.exists()
-    assert (mlruns_dir / "0").exists()  # Default experiment ID is 0
-
-    # Find run directories
-    run_dirs = [d for d in (mlruns_dir / "0").iterdir() if d.is_dir()]
-    assert len(run_dirs) == 1  # Should be only one run
-    run_dir = run_dirs[0]
-    assert run_dir.is_dir()
-    assert (run_dir / "artifacts").exists()
-    assert (run_dir / "metrics").exists()
-    assert (run_dir / "params").exists()
-
-    # Verify parameters file
-    param_file = run_dir / "params" / "test_parameter"
-    assert param_file.exists()
-    with open(param_file) as f:
-        param_value = f.read().strip()
-        assert param_value == "test_param"
-
-    # Verify metrics file
-    metrics_file = run_dir / "metrics" / "squared_step"
-    assert metrics_file.exists()
-    with open(metrics_file) as f:
-        metrics = f.readlines()
-        assert len(metrics) == 5
-        for i, metric in enumerate(metrics):
-            parts = metric.split()
-            assert len(parts) == 3
-            assert float(parts[1]) == i**2  # Check squared_step values
-            assert int(parts[2]) == i
-
-    # Verify artifacts directory and artifact file
-    artifacts_dir = run_dir / "artifacts"
-    assert artifacts_dir.exists()
+    # Check for MLflow database file
+    mlflow_db_path = Path(tmpdir) / "mlflow.db"
+    assert mlflow_db_path.exists(), "Expected MLflow database file to exist"
 
 
 def test_multi_helloworld_endtoend(

From 8e79ba1637c6b0bccb0999161078bb15be1f186d Mon Sep 17 00:00:00 2001
From: Linus Seelinger <linus.seelinger@simulation.science>
Date: Tue, 11 Nov 2025 09:51:00 +0100
Subject: [PATCH 4/4] MLflow CI: Check SQL db content

---
 tests/endtoend_tests/test_endtoend.py | 28 ++++++++++++++++++++
 tests/runtime_tests/test_mpa.py       | 38 +++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/tests/endtoend_tests/test_endtoend.py b/tests/endtoend_tests/test_endtoend.py
index d287221e..e9dc22f8 100644
--- a/tests/endtoend_tests/test_endtoend.py
+++ b/tests/endtoend_tests/test_endtoend.py
@@ -6,6 +6,7 @@
 import json
 import os
 import shutil
+import sqlite3
 import subprocess
 import uuid
 from pathlib import Path
@@ -1126,6 +1127,33 @@ def test_mpa_mlflow_backend(mpa_test_image, tmpdir):
     mlflow_db_path = Path(tmpdir) / "mlflow.db"
     assert mlflow_db_path.exists(), "Expected MLflow database file to exist"
 
+    # Query the database to verify content was logged
+    with sqlite3.connect(str(mlflow_db_path)) as conn:
+        cursor = conn.cursor()
+
+        # Check parameters were logged
+        cursor.execute("SELECT key, value FROM params")
+        params = dict(cursor.fetchall())
+        assert params["test_parameter"] == "test_param"
+        assert params["steps_config"] == "5"  # MLflow stores params as strings
+
+        # Check metrics were logged
+        cursor.execute("SELECT key, value, step FROM metrics ORDER BY step")
+        metrics = cursor.fetchall()
+        assert len(metrics) == 5
+
+        # Verify some of the squared_step values
+        squared_metrics = [m for m in metrics if m[0] == "squared_step"]
+        assert len(squared_metrics) == 5
+        assert squared_metrics[0] == ("squared_step", 0.0, 0)
+        assert squared_metrics[1] == ("squared_step", 1.0, 1)
+        assert squared_metrics[4] == ("squared_step", 16.0, 4)
+
+        # Check artifacts were logged (MLflow stores artifact info in runs table)
+        cursor.execute("SELECT artifact_uri FROM runs")
+        artifact_uris = [row[0] for row in cursor.fetchall()]
+        assert len(artifact_uris) > 0  # At least one run with artifacts
+
 
 def test_multi_helloworld_endtoend(
     docker_client,
diff --git a/tests/runtime_tests/test_mpa.py b/tests/runtime_tests/test_mpa.py
index 276f2510..2ff7de9d 100644
--- a/tests/runtime_tests/test_mpa.py
+++ b/tests/runtime_tests/test_mpa.py
@@ -5,6 +5,8 @@
 
 import csv
 import json
+import os
+import sqlite3
 
 import pytest
 
@@ -166,3 +168,39 @@ def test_mlflow_log_calls(tmpdir):
 
     # Verify MLflow database file was created
     assert mlflow_db_file.exists()
+
+    # Query the database to verify content was logged
+    with sqlite3.connect(str(mlflow_db_file)) as conn:
+        cursor = conn.cursor()
+
+        # Check parameters were logged
+        cursor.execute("SELECT key, value FROM params")
+        params = dict(cursor.fetchall())
+        assert params["model_type"] == "neural_network"
+        assert params["epochs"] == "100"
+
+        # Check metrics were logged
+        cursor.execute("SELECT key, value, step FROM metrics ORDER BY step")
+        metrics = cursor.fetchall()
+        assert len(metrics) == 2
+        assert metrics[0] == ("accuracy", 0.85, 0)  # step defaults to 0
+        assert metrics[1] == ("loss", 0.25, 1)
+
+        # Check artifacts were logged (MLflow stores artifact info in runs table)
+        cursor.execute("SELECT artifact_uri FROM runs")
+        artifact_uris = [row[0] for row in cursor.fetchall()]
+        assert len(artifact_uris) > 0  # At least one run with artifacts
+
+        # Verify the artifact file was actually copied to the artifact location
+        artifact_found = False
+        for artifact_uri in artifact_uris:
+            if artifact_uri and os.path.exists(artifact_uri):
+                try:
+                    artifact_files = os.listdir(artifact_uri)
+                    if "model_config.json" in artifact_files:
+                        artifact_found = True
+                        break
+                except OSError:
+                    continue
+
+        assert artifact_found