Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,24 +91,26 @@ if __name__ == "__main__":

```bash
stratum/
├─ pyproject.toml # Project metadata + Python/Rust build config (maturin)
├─ pyproject.toml # Project metadata + Python/Rust build config (maturin)
├─ README.md
├─ LICENSE
├─ _rust/ # Rust crate (PyO3 extension)
├─ _rust/ # Rust crate (PyO3 extension)
│ ├─ Cargo.toml
│ └─ src/lib.rs # Defines #[pymodule] fn _rust_backend_native(...)
└─ stratum/ # Python package
├─ __init__.py # Façade over skrub + automatic patching
├─ _config.py # set_config/get_config + runtime/env sync
├─ _api.py # High-level grid search / evaluate helpers
├─ _rust_backend.py # Python <-> Rust shim (re-exports native fns)
├─ adapters/ # Public API (dispatch to Rust or fall back to skrub)
│ ├─ string_encoder.py # RustyStringEncoder
│ └─ src/lib.rs # Defines #[pymodule] fn _rust_backend_native(...)
└─ stratum/ # Python package
├─ __init__.py # Façade over skrub + automatic patching
├─ _config.py # set_config/get_config + runtime/env sync
├─ _api.py # High-level grid search / evaluate helpers
├─ _rust_backend.py # Python <-> Rust shim (re-exports native fns)
├─ adapters/ # Public API (dispatch to Rust or fall back to skrub)
│ ├─ string_encoder.py # RustyStringEncoder
│ └─ one_hot_encoder.py # RustyOneHotEncoder
├─ logical_optimizer/ # DAG representation + logical rewrites
├─ runtime/ # Schedulers and runtime execution
├─ patching/ # Hooks that patch upstream skrub
└─ tests/ # Test suite
├─ optimizer/
│ ├─ ir/ # DAG representation
│ └─ _optimize.py # logical rewrites
├─ runtime/ # Schedulers and runtime execution
├─ patching/ # Hooks that patch upstream skrub
└─ tests/ # Test suite
```
---

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/logical_optimizer/end-to-end/20newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.linear_model import Ridge, LinearRegression, LogisticRegression
from sklearn.svm import LinearSVC

from stratum.logical_optimizer import apply_cse_on_skrub_ir
from stratum.optimizer import apply_cse_on_skrub_ir
from stratum.api.gridsearch import grid_search

import stratum as skrub
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sklearn.metrics import mean_squared_log_error, make_scorer
import time

from stratum.logical_optimizer import apply_cse_on_skrub_ir
from stratum.optimizer import apply_cse_on_skrub_ir

t0 = time.time()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pandas as pd
import numpy as np

base_path = "benchmarks/logical_optimizer/end-to-end/california-housing/"
base_path = "benchmarks/optimizer/end-to-end/california-housing/"
data = pd.read_csv(base_path + "california_housing_pipelines_benchmark.csv", sep=";")
data["time"] = data["time"].apply(np.round, decimals=2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet, Lasso, LinearRegression, Ridge

from stratum.logical_optimizer import apply_cse_on_skrub_ir
from stratum.optimizer import apply_cse_on_skrub_ir
from stratum.api.gridsearch import grid_search
from time import time

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import matplotlib.pyplot as plt
import numpy as np

base_path = "benchmarks/logical_optimizer/end-to-end/"
base_path = "benchmarks/optimizer/end-to-end/"

data = pd.read_csv(base_path + 'bench_cse_tfidf_gridsearch.csv')
data["total"] = data["total"].apply(np.round, decimals=2)
Expand Down
2 changes: 1 addition & 1 deletion stratum/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from skrub import DataOp

from stratum._config import FLAGS
from stratum.logical_optimizer._optimize import optimize
from stratum.optimizer._optimize import optimize
from stratum.runtime._scheduler import SequentialScheduler
from time import perf_counter

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from dataclasses import dataclass
from typing import Any, Callable

from stratum.logical_optimizer._numeric_ops import NumericOp
from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.logical_optimizer._numeric_ops import NumericOpType
from stratum.optimizer.ir._numeric_ops import NumericOp
from stratum.optimizer._op_utils import topological_iterator
from stratum.optimizer.ir._numeric_ops import NumericOpType

RewriteFn = Callable[[NumericOp, Any], Any]

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import deque
from typing import Iterator
from graphviz import Digraph
from stratum.logical_optimizer._ops import DATA_OP_PLACEHOLDER, Op, ChoiceOp
from stratum.optimizer.ir._ops import DATA_OP_PLACEHOLDER, Op, ChoiceOp
from stratum._config import get_config
import os
from dataclasses import dataclass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
from skrub._data_ops._subsampling import SubsamplePreviews
from collections import deque
from ._cse import apply_cse
from ._dataframe_ops import rewrite_dataframe_ops, group_dataframe_ops,add_splitting_op
from ._numeric_ops import to_numeric_op
from ._ops import ChoiceOp, ImplOp, Op, SearchEvalOp, as_op
from stratum.optimizer.ir._dataframe_ops import rewrite_dataframe_ops, group_dataframe_ops,add_splitting_op
from stratum.optimizer.ir._numeric_ops import to_numeric_op
from stratum.optimizer.ir._ops import ChoiceOp, ImplOp, Op, SearchEvalOp, as_op
from ._op_utils import clone_sub_dag, find_choice_naive, replace_op_in_outputs, show_graph, topological_iterator
from ._algebraic_rewrites import algebraic_rewrites
from stratum.utils._skrub_graph import build_graph
from time import perf_counter
import logging
from stratum._config import FLAGS
from stratum.logical_optimizer._algebraic_rewrites import AlgebraicRewritesConfig
from stratum.optimizer._algebraic_rewrites import AlgebraicRewritesConfig

logger = logging.getLogger(__name__)
EVAL_OP_ENABLED = False
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from stratum.logical_optimizer._ops import DATA_OP_PLACEHOLDER, BaseEstimatorOp, BinOp, CallOp, GetAttrOp, GetItemOp, MethodCallOp, Op, ValueOp, VariableOp
from stratum.optimizer.ir._ops import DATA_OP_PLACEHOLDER, BaseEstimatorOp, BinOp, CallOp, GetAttrOp, GetItemOp, MethodCallOp, Op, ValueOp, VariableOp
from pandas import DataFrame
import pandas as pd
import polars as pl
from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.optimizer._op_utils import topological_iterator
from stratum._config import FLAGS
from skrub._data_ops._data_ops import DataOp
import logging
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from stratum.logical_optimizer._ops import CallOp, Op, ValueOp
from pandas import DataFrame
from stratum.logical_optimizer._dataframe_ops import DataSourceOp
from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.optimizer.ir._ops import CallOp, Op
from stratum.optimizer._op_utils import topological_iterator
import numpy as np
from enum import Enum

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def as_op(data_op: DataOp):
elif isinstance(impl, Var):
return_op = VariableOp(name=impl.name, value=impl.value)
elif isinstance(impl, Concat):
from stratum.logical_optimizer._dataframe_ops import ConcatOp
from stratum.optimizer.ir._dataframe_ops import ConcatOp
return_op = ConcatOp(first=impl.first, others=impl.others, axis=impl.axis)
else:
return_op = ImplOp(skrub_impl=impl, name=data_op.__skrub_short_repr__())
Expand Down
6 changes: 3 additions & 3 deletions stratum/runtime/_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from sklearn.model_selection import train_test_split, check_cv
from sklearn.metrics._scorer import _Scorer, get_scorer
from skrub._data_ops._data_ops import EvalMode
from stratum.logical_optimizer._dataframe_ops import SplitOp
from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.logical_optimizer._ops import ImplOp, Op
from stratum.optimizer.ir._dataframe_ops import SplitOp
from stratum.optimizer._op_utils import topological_iterator
from stratum.optimizer.ir._ops import ImplOp, Op
import polars as pl

import logging
Expand Down
2 changes: 1 addition & 1 deletion stratum/tests/application/test_multi_level_choice_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from xgboost import XGBRegressor
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import make_scorer, mean_squared_error, r2_score
from stratum.logical_optimizer._optimize import optimize
from stratum.optimizer._optimize import optimize


class TargetEncoder(BaseEstimator, TransformerMixin):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import unittest
import stratum as skrub
import numpy as np
from stratum.logical_optimizer._optimize import optimize
from stratum.logical_optimizer._optimize import OptConfig
from stratum.logical_optimizer._algebraic_rewrites import AlgebraicRewritesConfig
from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.optimizer._optimize import optimize
from stratum.optimizer._optimize import OptConfig
from stratum.optimizer._algebraic_rewrites import AlgebraicRewritesConfig
from stratum.optimizer._op_utils import topological_iterator

class TestCSE(unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from skrub import TableVectorizer

import stratum as skrub
from stratum.logical_optimizer._op_comparison import equals_data_op
from stratum.optimizer._op_comparison import equals_data_op
import pandas as pd

# dummy function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from skrub import TableVectorizer

import stratum as skrub
from stratum.logical_optimizer._op_comparison import equals_data_op
from stratum.optimizer._op_comparison import equals_data_op
import pandas as pd

# dummy function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from skrub import TableVectorizer

import stratum as skrub
from stratum.logical_optimizer._op_comparison import equals_data_op, hash_data_op
from stratum.optimizer._op_comparison import equals_data_op, hash_data_op
import pandas as pd

# dummy function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sklearn.preprocessing import StandardScaler

import stratum as skrub
from stratum.logical_optimizer._op_comparison import update_data_op
from stratum.optimizer._op_comparison import update_data_op
import pandas as pd

# dummy function
Expand Down
6 changes: 3 additions & 3 deletions stratum/tests/logical_optimizer/test_cse.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from skrub._data_ops._evaluation import _Graph
from stratum.logical_optimizer import apply_cse_on_skrub_ir
from stratum.logical_optimizer._cse import CSETable
from stratum.logical_optimizer._optimize import topological_traverse
from stratum.optimizer import apply_cse_on_skrub_ir
from stratum.optimizer._cse import CSETable
from stratum.optimizer._optimize import topological_traverse
import unittest
import stratum as skrub
import pandas as pd
Expand Down
8 changes: 4 additions & 4 deletions stratum/tests/logical_optimizer/test_dataframe_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import stratum as skrub
from skrub._data_ops._data_ops import DataOp
from stratum._config import FLAGS
from stratum.logical_optimizer._dataframe_ops import (
from stratum.optimizer.ir._dataframe_ops import (
ApplyUDFOp, AssignOp, ConcatOp, DataSourceOp, DatetimeConversionOp,
DropOp, GetAttrProjectionOp, GroupedDataframeOp, MetadataOp, ProjectionOp,
SplitOp, rewrite_fuse_get_item_ops,)
from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.logical_optimizer._ops import DATA_OP_PLACEHOLDER, GetItemOp, MethodCallOp, Op
from stratum.logical_optimizer._optimize import OptConfig, optimize as optimize_
from stratum.optimizer._op_utils import topological_iterator
from stratum.optimizer.ir._ops import DATA_OP_PLACEHOLDER, GetItemOp, MethodCallOp, Op
from stratum.optimizer._optimize import OptConfig, optimize as optimize_


def optimize(dag, conf=None):
Expand Down
2 changes: 1 addition & 1 deletion stratum/tests/logical_optimizer/test_numeric_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import stratum as skrub
import numpy as np
from sklearn.dummy import DummyRegressor
from stratum.logical_optimizer._numeric_ops import NumericOp
from stratum.optimizer.ir._numeric_ops import NumericOp

class TestNumericOps(unittest.TestCase):
def setUp(self):
Expand Down
4 changes: 2 additions & 2 deletions stratum/tests/logical_optimizer/test_op_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#from curses import flash
import unittest
import stratum as skrub
from stratum.logical_optimizer._optimize import optimize as optimize_, OptConfig, choice_unrolling
from stratum.logical_optimizer._op_utils import show_graph, clone_sub_dag, topological_iterator, FLAGS
from stratum.optimizer._optimize import optimize as optimize_, OptConfig, choice_unrolling
from stratum.optimizer._op_utils import show_graph, clone_sub_dag, topological_iterator, FLAGS
from stratum._config import config
graph = False

Expand Down
6 changes: 3 additions & 3 deletions stratum/tests/logical_optimizer/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
from sklearn.preprocessing import StandardScaler
from skrub._data_ops._data_ops import DataOp

from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.logical_optimizer._ops import (
from stratum.optimizer._op_utils import topological_iterator
from stratum.optimizer.ir._ops import (
DATA_OP_PLACEHOLDER, BinOp, CallOp, DummyConfigManager, GetAttrOp,
GetItemOp, ImplOp, MethodCallOp, Op, PlaceHolder, SearchEvalOp, ValueOp,
VariableOp, check_estm_inputs, estimator_parallel_config,
estm_supports_polars, process_estimator_task, process_transformer_task,
remove_datops_from_args,
)
from stratum.logical_optimizer._optimize import optimize as optimize_
from stratum.optimizer._optimize import optimize as optimize_


def _inp(val):
Expand Down
4 changes: 2 additions & 2 deletions stratum/tests/logical_optimizer/test_optimize.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from stratum.logical_optimizer._op_utils import topological_iterator
from stratum.logical_optimizer._optimize import OptConfig, optimize
from stratum.optimizer._op_utils import topological_iterator
from stratum.optimizer._optimize import OptConfig, optimize
import stratum as skrub
import pandas as pd
import unittest
Expand Down
Loading