diff --git a/benchmarks/bench_applytocols.py b/benchmarks/bench_applytocols.py
deleted file mode 100644
index 6a64a0c8..00000000
--- a/benchmarks/bench_applytocols.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import gc
-import time
-import numpy as np
-import pandas as pd
-from joblib import parallel_backend
-
-import stratum as skrub
-from skrub import ApplyToCols, StringEncoder
-
-# Create a synthetic test column
-def make_data (n_rows, seed, vocab_size,
-                 avg_words, words_len_range=(3, 10), n_features=1) -> pd.DataFrame:
-    rng = np.random.default_rng(seed)
-
-    # Create a random lowercase word from ascii characters
-    def rand_word():
-        size = rng.integers(words_len_range[0], words_len_range[1])
-        return ''.join(rng.choice(list('abcdefghijklmnopqrstuvwxyz'), size=size)) #use ascii
-
-    # Build a vocabulary of unique words
-    vocab = [rand_word() for _ in range(vocab_size)]
-
-    # Function to generate a single text series
-    def gen_series():
-        # Randomly generate number of words (around avg_words) in each row
-        n_per_row = np.maximum(1, rng.poisson(avg_words, size=n_rows))
-        rows = []
-        for k in n_per_row:
-            idx = rng.integers(0, vocab_size, size=k)
-            rows.append(' '.join(vocab[i] for i in idx))
-        return pd.Series(rows)
-
-    # Generate n_features columns
-    data = {f"text_{i+1}": gen_series() for i in range(n_features)}
-    return pd.DataFrame(data)
-
-def main():
-    n_rows = 100_000        #number of rows (=200K)
-    vocab_size = 20000      #number of unique words (=5K). Large -> more distinct tokens -> sparser matrix
-    avg_words = 8           #average number of words per row (=8)
-    words_len = (3, 10)     #length of each word (low to high)
-    n_features = 2          #number of features
-
-    # Generate synthetic data
-    print("Generate synthetic data")
-    X = make_data(n_rows, 42, vocab_size, avg_words, words_len, n_features)
-    print(X)
-
-    # Build encoder
-    enc = StringEncoder(
-        vectorizer="hashing", #hashing->tfidf
-        analyzer="char",
-        ngram_range=(3, 4),
-        n_components= 30,
-        random_state=0
-    )
-
-    # Main benchmark. Run on the entire dataset
-    print("\nStarting main benchmark")
-    skrub.set_config(rust_backend=True, debug_timing=True) #sklearn backend
-    t0 = time.perf_counter()
-    with parallel_backend('threading'):
-        enc_cols = ApplyToCols(enc, n_jobs=n_features) #apply one encoder on all columns
-        Z = enc_cols.fit_transform(X)
-    t1 = time.perf_counter()
-    exec_time = t1 - t0
-    print(f"Shape = {Z.shape}")
-    print(f"skrub - Execution time = {exec_time:8.3f}s\n")
-    del Z #optimize memory, especially for dense outputs
-    gc.collect()
-
-if __name__ == '__main__':
-    main()
diff --git a/benchmarks/bench_onehot_encoder.py b/benchmarks/bench_onehot_encoder.py
deleted file mode 100644
index f4e43e90..00000000
--- a/benchmarks/bench_onehot_encoder.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import gc
-import time
-import numpy as np
-import pandas as pd
-
-import stratum as skrub
-from stratum import OneHotEncoder
-
-# Create synthetic features
-def make_categorical_df(
-        n_rows: int,
-        n_features: int,
-        n_dists: int, #nuber of distinct items in each feature
-        seed: int = 42,
-) -> pd.DataFrame:
-    rng = np.random.default_rng(seed)
-
-    data = {}
-    #for j, k in enumerate(cardinals):
-    for j in range(n_features):
-        # Create category pool: e.g., ['c0_000001', ..., 'c0_00xxxx']
-        cats = np.array([f"c{j}_{i:06d}" for i in range(n_dists)], dtype=object)
-        idx = rng.integers(0, n_dists, size=n_rows)
-        col = cats[idx].copy()
-
-        data[f"col{j}"] = col
-    return pd.DataFrame(data)
-
-def OHE_benchmark(X, sparse_output):
-    # Build one hot encoder
-    enc = OneHotEncoder(
-        drop="if_binary",
-        dtype=np.float32,
-        handle_unknown="ignore",
-        sparse_output=sparse_output,
-    )
-
-    # Warm-up small runs
-    skrub.set_config(rust_backend=False)
-    X_small = X.iloc[: min(2048, len(X))]
-    _ = enc.fit_transform(X_small)
-    gc.collect()
-    skrub.set_config(rust_backend=True)
-    X_small = X.iloc[: min(2048, len(X))]
-    _ = enc.fit_transform(X_small)
-    gc.collect()
-
-    # Main benchmark. Run on the entire dataset
-    print("\nStarting main benchmark")
-    skrub.set_config(rust_backend=False, debug_timing=True) #sklearn backend
-    t0 = time.perf_counter()
-    Z = enc.fit_transform(X)
-    t1 = time.perf_counter()
-    exec_time = t1 - t0
-    print(f"Shape = {Z.shape}")
-    print(f"skrub - Execution time = {exec_time:8.3f}s\n")
-    del Z #optimize memory, especially for dense outputs
-    gc.collect()
-
-    skrub.set_config(rust_backend=True, debug_timing=True, num_threads=0) #rust backend
-    t0 = time.perf_counter()
-    Z = enc.fit_transform(X)
-    t1 = time.perf_counter()
-    exec_time = t1 - t0
-    print(f"Shape = {Z.shape}")
-    print(f"stratum - Execution time = {exec_time:8.3f}s\n")
-
-
-
-def main():
-    print("Generate synthetic data for sparse output")
-    n_rows = 2_000_000
-    n_features = 4
-    n_dists = 200_000 # num of distinct items in each feature
-    X = make_categorical_df(n_rows=n_rows, n_features=n_features, n_dists=n_dists)
-    print(X.head(), "\n")
-    OHE_benchmark(X, sparse_output=True)
-
-    print("Generate synthetic data for dense output")
-    n_rows = 200_000
-    n_features = 4
-    n_dists = 10_000 # num of distinct items in each feature
-    X = make_categorical_df(n_rows=n_rows, n_features=n_features, n_dists=n_dists)
-    print(X.head(), "\n")
-    OHE_benchmark(X, sparse_output=False)
-
-if __name__ == "__main__":
-    main()
diff --git a/benchmarks/bench_string_encoder.py b/benchmarks/bench_string_encoder.py
deleted file mode 100644
index a0e638de..00000000
--- a/benchmarks/bench_string_encoder.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-This script runs StringEncoder on a synthetic dataset and compares Sklearn and Rust backends.
-It is used to show the performance benefits of the Rust backend (~5.8x w/ 24*2 cores).
-This script also shows the use various config flags related to the Rust backend.
-"""
-import gc
-import time
-import numpy as np
-import pandas as pd
-import stratum as skrub
-from stratum import StringEncoder
-
-# Create a synthetic test column
-def make_series (n_rows, seed, vocab_size, avg_words, words_len_range=(3, 10)) -> pd.Series:
-    rng = np.random.default_rng(seed)
-
-    # Create a random lowercase word from ascii characters
-    def rand_word():
-        size = rng.integers(words_len_range[0], words_len_range[1])
-        return ''.join(rng.choice(list('abcdefghijklmnopqrstuvwxyz'), size=size)) #use ascii
-
-    # Build a vocabulary of unique words
-    vocab = [rand_word() for _ in range(vocab_size)]
-
-    # Randomly generate number of words (around avg_words) in each row
-    n_per_row = np.maximum(1, rng.poisson(avg_words, size=n_rows))
-
-    rows = []
-    for k in n_per_row:
-        idx = rng.integers(0, vocab_size, size=k)
-        rows.append(' '.join(vocab[i] for i in idx))
-
-    return pd.Series(rows, name="text")
-
-def main():
-    n_rows = 100_000        #number of rows (=100K)
-    vocab_size = 20000      #number of unique words (=20K). Large -> more distinct tokens -> sparser matrix
-    avg_words = 8           #average number of words per row (=8)
-    words_len = (3, 10)     #length of each word (low to high)
-
-    # Generate synthetic data
-    print("Generate synthetic data")
-    X = make_series(n_rows, 42, vocab_size, avg_words, words_len)
-    print(X)
-
-    # Build encoder
-    enc = StringEncoder(
-        vectorizer="tfidf",
-        analyzer="char_wb",
-        ngram_range=(3, 4),
-        n_components= 30,
-        random_state=0
-    )
-
-    # Warm-up small runs to load code paths, JIT caches inside SciPy, etc.
-    skrub.set_config(rust_backend=False) #sklearn backend
-    X_small = X.iloc[: min(2048, len(X))]
-    _ = enc.fit_transform(X_small)
-    gc.collect()
-    skrub.set_config(rust_backend=True) #rust backend
-    X_small = X.iloc[: min(2048, len(X))]
-    _ = enc.fit_transform(X_small)
-    gc.collect()
-
-    # Main benchmark: Run on the entire dataset
-    print("\nStarting main benchmark")
-    skrub.set_config(rust_backend=False) #sklearn
-    t0 = time.perf_counter()
-    X_enc = enc.fit_transform(X)
-    print(f"Shape = {X_enc.shape}")
-    t1 = time.perf_counter()
-    exec_time = t1 - t0
-    print(f"skrub - Execution time = {exec_time:8.3f}s\n")
-
-    skrub.set_config(rust_backend=True, debug_timing=False, num_threads=0) #rust
-    t0 = time.perf_counter()
-    X_enc = enc.fit_transform(X)
-    print(f"Shape = {X_enc.shape}")
-    t1 = time.perf_counter()
-    exec_time = t1 - t0
-    print(f"stratum - Execution time = {exec_time:8.3f}s\n")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/benchmarks/bench_tablevectorizer.py b/benchmarks/bench_tablevectorizer.py
deleted file mode 100644
index e1f0ab1e..00000000
--- a/benchmarks/bench_tablevectorizer.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import time
-import stratum as skrub
-from sklearn.preprocessing import OneHotEncoder
-from skrub.datasets import fetch_employee_salaries
-from skrub import TableVectorizer, StringEncoder
-import pandas as pd
-from joblib import parallel_backend
-
-def main():
-    # Load dataset
-    dataset = fetch_employee_salaries()
-    employees, salaries = dataset.X, dataset.y
-
-    # Append dataset n times to have a larger dataset
-    employees = pd.concat([employees] * 10, ignore_index=True)
-    print(employees.info())
-    employees = employees.dropna() #necessary for rusty one-hot encoder
-
-    # Use skrub's vanilla TableVectorizer
-    skrub.set_config(rust_backend=False, debug_timing=False)
-    t0 = time.perf_counter()
-    vectorizer = TableVectorizer(n_jobs=-1)
-    employees_enc = vectorizer.fit_transform(employees)
-    t1 = time.perf_counter()
-    exec_time = t1 - t0
-    print(f"skrub - Encoding time: {exec_time:8.3f}s\n")
-    print(f"Encoded data shape: {employees_enc.shape}")
-
-
-    # Use stratum's TableVectorizer
-    t0 = time.perf_counter()
-    skrub.set_config(rust_backend=True, debug_timing=False, scheduler=True, stats=True)
-    with parallel_backend('threading'):
-        vectorizer = TableVectorizer(high_cardinality=StringEncoder(), low_cardinality=OneHotEncoder(), n_jobs=-1) #default setup
-        employees_enc = vectorizer.fit_transform(employees)
-    t1 = time.perf_counter()
-    exec_time = t1 - t0
-    print(f"stratum - Encoding time: {exec_time:8.3f}s\n")
-    print(f"Encoded data shape: {employees_enc.shape}")
-
-    # Explore the encodings
-    print(vectorizer.kind_to_columns_)
-    print("Fitted transformers to department column")
-    print(vectorizer.transformers_["department"]) #low_cardinality
-    print("Fitted transformers to division column")
-    print(vectorizer.transformers_["division"]) #high_cardinality
-
-if __name__ == "__main__":
-    main()
diff --git a/benchmarks/logical_optimizer/end-to-end/20newsgroups.py b/benchmarks/logical_optimizer/end-to-end/20newsgroups.py
deleted file mode 100644
index 1b688cc3..00000000
--- a/benchmarks/logical_optimizer/end-to-end/20newsgroups.py
+++ /dev/null
@@ -1,147 +0,0 @@
-from sklearn.datasets import fetch_20newsgroups
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.model_selection import KFold, ShuffleSplit
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.linear_model import Ridge, LinearRegression, LogisticRegression
-from sklearn.svm import LinearSVC
-
-from stratum.logical_optimizer import apply_cse_on_skrub_ir
-from stratum.api.gridsearch import grid_search
-
-import stratum as skrub
-import logging
-import tempfile
-import numpy as np
-import pandas as pd
-from time import time
-
-logging.basicConfig(level=logging.INFO)
-
-from sklearn.base import BaseEstimator, TransformerMixin
-
-class PandasTfidfVectorizer(BaseEstimator, TransformerMixin):
-    """A CountVectorizer that returns a pandas DataFrame instead of a sparse matrix."""
-    def __init__(self, **kwargs):
-        self.vectorizer = TfidfVectorizer(**kwargs)
-
-    def fit(self, X: pd.DataFrame, y=None):
-        X = X.iloc[:,0]
-        self.vectorizer.fit(X)
-        return self
-
-    def transform(self, X):
-        X = X.iloc[:,0]
-        X_counts = self.vectorizer.transform(X)
-        df = pd.DataFrame.sparse.from_spmatrix(
-            X_counts,
-            columns=self.vectorizer.get_feature_names_out()
-        )
-        return df
-
-    def fit_transform(self, X, y=None, **kwargs):
-        return self.fit(X).transform(X)
-
-    def get_feature_names_out(self, *args, **kwargs):
-        return self.vectorizer.get_feature_names_out(*args, **kwargs)
-
-def tfidf_pipeline(df_path: str, show_graph: bool = False, stratum: bool = False, kfold: bool = False):
-    path = skrub.as_data_op(df_path)
-    data = path.skb.apply_func(pd.read_csv).skb.subsample(n=100)
-    data = data.fillna("")
-    y = data["y"].skb.mark_as_y()
-    X = data[["text"]].skb.mark_as_X()
-
-
-    vectorizer = PandasTfidfVectorizer()
-
-    pipes = {f"pipeline{i}": X.skb.apply(vectorizer).
-             skb.apply(model, y=y) for i, model in
-             enumerate(
-                 [LinearRegression(),
-                  Ridge(),
-                  LogisticRegression(max_iter=1000),
-                  LinearSVC(),
-                  MultinomialNB(),
-                  ])}
-    pred = skrub.choose_from(pipes).as_data_op()
-
-    if show_graph:
-        pred.skb.draw_graph().open()
-    print("----------------------------------------")
-    stats1 = make_gridsearch(pred, kfold=kfold)
-    print("\nOriginal Pipeline (njobs=1) took: ",sum(stats1))
-    print("----------------------------------------")
-
-    stats2 = make_gridsearch(pred, multi=True, kfold=kfold)
-    print("\nOriginal Pipeline (njobs=-1) took: ",sum(stats2))
-    print("----------------------------------------")
-
-
-    stats_opt = make_gridsearch(pred, optimize_enabled=True, stratum=stratum, kfold=kfold)
-    print("\nTotal optimized Pipeline took: ",sum(stats_opt))
-    print("----------------------------------------")
-    if show_graph:
-        pred.skb.draw_graph().open()
-    return np.array([stats1, stats2, stats_opt])
-
-
-def make_gridsearch(pred, random_state=42, optimize_enabled=False, stratum=False, multi=False, kfold=False) -> tuple[list, list]:
-    if optimize_enabled:
-        t00 = time()
-        pred = apply_cse_on_skrub_ir(pred)
-        t0 = time()
-        stats = [t0 - t00]
-    else:
-        t0 = time()
-        stats = [0.0]
-
-    cv = KFold(n_splits=3, shuffle=True, random_state=random_state) if kfold else ShuffleSplit(n_splits=1, test_size=0.2, random_state=42) 
-    
-    if stratum:
-        search = grid_search(pred, cv=cv, show_stats=True)
-    else:
-        search = pred.skb.make_grid_search(fitted=True, cv=cv, n_jobs=-1 if multi else 1)
-        print("Search results: \n", search.results_)
-
-    t1 = time()
-    stats.append(t1-t0)
-    return stats
-
-
-def run_tfidf_pipeline_benchmark(stratum: bool = False, kfold: bool = False):
-    data = fetch_20newsgroups(subset="train", remove=("headers", "footers", "quotes"))
-    df = pd.DataFrame({"text": data.data, "y": data.target})
-    df["text"].fillna("", inplace=True)
-    print("df shape: ", df.shape)
-    list_of_stats = []
-    parameters = [100, 500, 1000,10000]
-    for n_rows in parameters:
-        df_n_rows = df.head(n_rows)
-        with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8", suffix=".csv", delete=False) as f:
-            df_n_rows.to_csv(f)
-            f.flush()
-            temp_path = f.name
-
-        stats = tfidf_pipeline(temp_path, show_graph = False, stratum=stratum, kfold=kfold)
-        stats = np.hstack((np.array([n_rows, n_rows, n_rows]).reshape((3, 1)), stats))
-        list_of_stats.append(stats)
-
-    stats = np.vstack(list_of_stats)
-
-    columns_results = ["n_rows","optimize", "runtime"]
-    df = pd.DataFrame(stats, columns=columns_results)
-
-    columns_results.remove("n_rows")
-    df["total"] = df[columns_results].sum(axis=1).apply(lambda x: "{:.3f}".format(x))
-    for col in columns_results:
-        df[col] = df[col].apply(lambda x: "{:.3f}".format(x))
-    df["n_rows"] = df["n_rows"].astype(int)
-
-    print(df)
-    df.to_csv(f"bench_cse_tfidf_gridsearch.csv", index=False)
-
-def main():
-    run_tfidf_pipeline_benchmark(stratum=True, kfold=False)
-
-if __name__ == '__main__':
-    main()
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/README.md b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/README.md
deleted file mode 100644
index 0105a651..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# Bike Sharing Demand
-
-This benchmark evaluates data preprocessing pipelines on the bike-sharing demand dataset, comparing different approaches for feature engineering and encoding.
-
-## Overview
-
-The benchmark consists of multiple pipeline implementations (`pipeline0.py` through `pipeline4.py`) that demonstrate various preprocessing strategies for the bike-sharing demand prediction task. The goal is to compare the performance and execution time of different pipeline designs.
-Before running the benchmark, please download the dataset and augment the dataset, as described below.
-
-## Running the Benchmark
-
-### Baseline Pipelines
-
-To run the baseline pipeline comparisons:
-
-```bash
-python run_base_lines.py
-```
-
-This script executes multiple pipeline variants (pipeline0-4) and measures:
-- Training time
-- Prediction performance
-- Memory usage
-
-Each pipeline implements different preprocessing strategies, allowing you to compare trade-offs between complexity and performance.
-
-### Skrubified Pipelines
-
-To run the optimized stratum/skrub-based pipelines:
-
-```bash 
-python skrubified_pipelines.py
-```
-
-## Data
-```bash
-kaggle competitions download -c bike-sharing-demand
-unzip bike-sharing-demand.zip -d input/
-rm bike-sharing-demand.zip
-```
-
-
-The benchmark uses various versions of the bike-sharing demand dataset:
-- `train.csv` - Original training data
-- `train_augmented_2x.csv` - 2x augmented dataset
-- `train_augmented_3x.csv` - 3x augmented dataset
-- `train_augmented_stratified.csv` - Stratified augmentation
-
-Data augmentation scripts are available in `bike_data_augmentation.py`.
-```bash
-python bike_data_augmentation.py
-```
\ No newline at end of file
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/bike_data_augmentation.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/bike_data_augmentation.py
deleted file mode 100644
index f156dd91..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/bike_data_augmentation.py
+++ /dev/null
@@ -1,282 +0,0 @@
-"""
-Data augmentation script for bike-sharing demand dataset.
-Creates synthetic samples while avoiding duplicates.
-"""
-
-import pandas as pd
-import numpy as np
-from pathlib import Path
-
-def augment_bike_data(input_path, output_path=None, augmentation_factor=2, seed=42):
-    """
-    Augment bike-sharing demand data by creating synthetic samples.
-    
-    Parameters:
-    -----------
-    input_path : str or Path
-        Path to the input CSV file
-    output_path : str or Path, optional
-        Path to save augmented data. If None, returns DataFrame
-    augmentation_factor : int
-        Multiplier for dataset size (2 = double the data)
-    seed : int
-        Random seed for reproducibility
-    
-    Returns:
-    --------
-    pd.DataFrame
-        Augmented dataset with original + synthetic samples
-    """
-    np.random.seed(seed)
-    
-    # Load original data
-    df = pd.read_csv(input_path)
-    print(f"Original dataset size: {len(df)}")
-    
-    # Identify column types
-    categorical_cols = []
-    numeric_cols = []
-    target_cols = ['casual', 'registered', 'count']  # Don't augment targets directly
-    
-    for col in df.columns:
-        if col in ['datetime']:
-            continue  # Handle datetime separately
-        elif col in target_cols:
-            continue  # Handle targets separately
-        elif df[col].dtype in ['int64', 'float64'] and df[col].nunique() < 20:
-            categorical_cols.append(col)
-        elif df[col].dtype in ['int64', 'float64']:
-            numeric_cols.append(col)
-    
-    print(f"Categorical columns: {categorical_cols}")
-    print(f"Numeric columns: {numeric_cols}")
-    
-    # Generate synthetic samples
-    num_synthetic = len(df) * (augmentation_factor - 1)
-    synthetic_samples = []
-    
-    for i in range(num_synthetic):
-        # Randomly select two samples to interpolate/combine
-        idx1, idx2 = np.random.choice(len(df), size=2, replace=False)
-        sample1 = df.iloc[idx1]
-        sample2 = df.iloc[idx2]
-        
-        new_sample = {}
-        
-        # Handle datetime: use one of the existing samples or create nearby time
-        if 'datetime' in df.columns:
-            base_datetime = pd.to_datetime(sample1['datetime'])
-            # Add random time offset (±3 hours)
-            offset_hours = np.random.randint(-3, 4)
-            new_datetime = base_datetime + pd.Timedelta(hours=offset_hours)
-            new_sample['datetime'] = new_datetime.strftime('%Y-%m-%d %H:%M:%S')
-        
-        # Handle categorical columns: choose from one sample or pick random valid value
-        for col in categorical_cols:
-            if np.random.random() < 0.8:
-                # 80% chance: use value from one of the parent samples
-                new_sample[col] = sample1[col] if np.random.random() < 0.5 else sample2[col]
-            else:
-                # 20% chance: pick any valid value from the distribution
-                new_sample[col] = np.random.choice(df[col].dropna().values)
-        
-        # Handle numeric columns: interpolate with noise
-        for col in numeric_cols:
-            # Interpolation factor between the two samples
-            alpha = np.random.beta(2, 2)  # Beta distribution favors middle values
-            interpolated = alpha * sample1[col] + (1 - alpha) * sample2[col]
-            
-            # Add small Gaussian noise (5% of std)
-            noise = np.random.normal(0, df[col].std() * 0.05)
-            new_value = interpolated + noise
-            
-            # Ensure value stays in valid range
-            new_value = np.clip(new_value, df[col].min(), df[col].max())
-            new_sample[col] = new_value
-        
-        # Handle target columns: use relationship-based generation
-        # For bike sharing, casual + registered = count
-        if 'temp' in numeric_cols or 'atemp' in numeric_cols:
-            # Temperature strongly correlates with demand
-            temp_col = 'atemp' if 'atemp' in numeric_cols else 'temp'
-            temp_percentile = (new_sample[temp_col] - df[temp_col].min()) / (df[temp_col].max() - df[temp_col].min())
-            
-            # Find similar weather conditions
-            similar_mask = (
-                (df[temp_col] >= new_sample[temp_col] - df[temp_col].std() * 0.5) &
-                (df[temp_col] <= new_sample[temp_col] + df[temp_col].std() * 0.5)
-            )
-            
-            if similar_mask.sum() > 0:
-                similar_samples = df[similar_mask]
-                base_casual = similar_samples['casual'].mean()
-                base_registered = similar_samples['registered'].mean()
-            else:
-                base_casual = df['casual'].mean()
-                base_registered = df['registered'].mean()
-            
-            # Add variation based on other factors
-            variation = np.random.normal(1.0, 0.15)
-            new_sample['casual'] = max(0, int(base_casual * variation))
-            new_sample['registered'] = max(0, int(base_registered * variation))
-            new_sample['count'] = new_sample['casual'] + new_sample['registered']
-        else:
-            # Fallback: interpolate targets
-            alpha = np.random.beta(2, 2)
-            for target in target_cols:
-                if target in df.columns:
-                    new_sample[target] = int(alpha * sample1[target] + (1 - alpha) * sample2[target])
-        
-        synthetic_samples.append(new_sample)
-    
-    # Combine original and synthetic data
-    synthetic_df = pd.DataFrame(synthetic_samples)
-    augmented_df = pd.concat([df, synthetic_df], ignore_index=True)
-    
-    # Remove any exact duplicates (should be extremely rare with this approach)
-    original_len = len(augmented_df)
-    augmented_df = augmented_df.drop_duplicates()
-    duplicates_removed = original_len - len(augmented_df)
-    
-    if duplicates_removed > 0:
-        print(f"Removed {duplicates_removed} exact duplicates")
-    
-    print(f"Augmented dataset size: {len(augmented_df)}")
-    print(f"Augmentation ratio: {len(augmented_df) / len(df):.2f}x")
-    
-    # Verify no exact duplicates exist
-    assert augmented_df.duplicated().sum() == 0, "Duplicates detected in augmented data!"
-    
-    # Save if output path provided
-    if output_path:
-        augmented_df.to_csv(output_path, index=False)
-        print(f"Saved augmented data to {output_path}")
-    
-    return augmented_df
-
-
-def create_stratified_augmentation(input_path, output_path=None, target_col='count', 
-                                   bins=5, samples_per_bin=None, seed=42):
-    """
-    Augment data with stratification to maintain target distribution.
-    Useful for imbalanced datasets.
-    
-    Parameters:
-    -----------
-    input_path : str or Path
-        Path to the input CSV file
-    output_path : str or Path, optional
-        Path to save augmented data
-    target_col : str
-        Target column name for stratification
-    bins : int
-        Number of bins for stratification
-    samples_per_bin : int, optional
-        Number of synthetic samples per bin. If None, uses size of largest bin
-    seed : int
-        Random seed
-    
-    Returns:
-    --------
-    pd.DataFrame
-        Augmented dataset
-    """
-    np.random.seed(seed)
-    df = pd.read_csv(input_path)
-    
-    # Create bins for stratification
-    df['_bin'] = pd.qcut(df[target_col], q=bins, labels=False, duplicates='drop')
-    
-    bin_counts = df['_bin'].value_counts().sort_index()
-    print(f"Samples per bin: {bin_counts.to_dict()}")
-    
-    if samples_per_bin is None:
-        samples_per_bin = bin_counts.max()
-    
-    augmented_dfs = [df.drop('_bin', axis=1)]
-    
-    for bin_id in df['_bin'].unique():
-        bin_df = df[df['_bin'] == bin_id].drop('_bin', axis=1)
-        current_count = len(bin_df)
-        needed = samples_per_bin - current_count
-        
-        if needed <= 0:
-            continue
-        
-        print(f"Augmenting bin {bin_id}: adding {needed} samples")
-        
-        # Generate synthetic samples within this bin
-        synthetic_samples = []
-        for _ in range(needed):
-            idx1, idx2 = np.random.choice(len(bin_df), size=2, replace=True)
-            sample1 = bin_df.iloc[idx1]
-            sample2 = bin_df.iloc[idx2]
-            
-            new_sample = {}
-            alpha = np.random.uniform(0.3, 0.7)  # Interpolation weight
-            
-            for col in bin_df.columns:
-                if col == 'datetime':
-                    new_sample[col] = sample1[col]
-                elif bin_df[col].dtype == 'object':
-                    new_sample[col] = sample1[col] if np.random.random() < 0.5 else sample2[col]
-                else:
-                    val = alpha * sample1[col] + (1 - alpha) * sample2[col]
-                    if bin_df[col].dtype in ['int64']:
-                        val = int(round(val))
-                    new_sample[col] = val
-            
-            synthetic_samples.append(new_sample)
-        
-        synthetic_df = pd.DataFrame(synthetic_samples)
-        augmented_dfs.append(synthetic_df)
-    
-    result = pd.concat(augmented_dfs, ignore_index=True)
-    result = result.drop_duplicates()
-    
-    print(f"Final augmented size: {len(result)}")
-    
-    if output_path:
-        result.to_csv(output_path, index=False)
-        print(f"Saved to {output_path}")
-    
-    return result
-
-
-if __name__ == "__main__":
-    # Example usage
-    input_file = Path(__file__).parent / "input" / "train.csv"
-    
-    # Basic augmentation (double the dataset)
-    output_file = Path(__file__).parent / "input" / "train_augmented_2x.csv"
-    augmented_data = augment_bike_data(
-        input_file, 
-        output_file, 
-        augmentation_factor=2,
-        seed=42
-    )
-    
-    # Triple the dataset
-    output_file_3x = Path(__file__).parent / "input" / "train_augmented_3x.csv"
-    augmented_data_3x = augment_bike_data(
-        input_file, 
-        output_file_3x, 
-        augmentation_factor=3,
-        seed=42
-    )
-    
-    # Stratified augmentation (maintains target distribution)
-    output_file_stratified = Path(__file__).parent / "input" / "train_augmented_stratified.csv"
-    stratified_data = create_stratified_augmentation(
-        input_file,
-        output_file_stratified,
-        target_col='count',
-        bins=5,
-        seed=42
-    )
-    
-    print("\n✅ Data augmentation complete!")
-    print("Generated files:")
-    print(f"  - {output_file.name}")
-    print(f"  - {output_file_3x.name}")
-    print(f"  - {output_file_stratified.name}")
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/get_data.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/get_data.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline0.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline0.py
deleted file mode 100644
index 599c2274..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline0.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import os
-import pandas as pd
-import numpy as np
-import lightgbm as lgb
-from sklearn.model_selection import KFold
-from sklearn.metrics import mean_squared_log_error
-
-# Load data
-_input_filename = os.getenv("BIKE_INPUT_FILE", "train.csv")
-train = pd.read_csv(f"./input/{_input_filename}", parse_dates=["datetime"])
-
-
-# Feature engineering
-def add_datetime_features(df):
-    df["year"] = df["datetime"].dt.year
-    df["month"] = df["datetime"].dt.month
-    df["dayofweek"] = df["datetime"].dt.dayofweek
-    df["hour"] = df["datetime"].dt.hour
-    return df
-
-
-train = add_datetime_features(train)
-
-# Define features and target
-features = [
-    "season",
-    "holiday",
-    "workingday",
-    "weather",
-    "temp",
-    "atemp",
-    "humidity",
-    "windspeed",
-    "year",
-    "month",
-    "dayofweek",
-    "hour",
-]
-X = train[features]
-y = train["count"]
-
-
-# RMSLE scorer
-def rmsle(y_true, y_pred):
-    return np.sqrt(mean_squared_log_error(y_true, np.clip(y_pred, 0, None)))
-
-
-# 5-fold CV
-kf = KFold(n_splits=5, shuffle=True, random_state=42)
-scores = []
-for train_idx, val_idx in kf.split(X):
-    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
-    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
-    model = lgb.LGBMRegressor(random_state=42)
-    model.fit(X_train, y_train)
-    preds = model.predict(X_val)
-    scores.append(rmsle(y_val, preds))
-
-print(f"5-fold RMSLE: {np.mean(scores):.5f}")
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline1.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline1.py
deleted file mode 100644
index 3a549c7c..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline1.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import os
-import pandas as pd
-import numpy as np
-from sklearn.model_selection import KFold
-from xgboost import XGBRegressor
-from sklearn.metrics import mean_squared_log_error
-import warnings
-
-warnings.filterwarnings("ignore")
-
-# Load data
-_input_filename = os.getenv("BIKE_INPUT_FILE", "train.csv")
-train = pd.read_csv(f"./input/{_input_filename}")
-
-
-# Feature engineering
-def fe(df):
-    df["datetime"] = pd.to_datetime(df["datetime"])
-    df["year"] = df["datetime"].dt.year
-    df["month"] = df["datetime"].dt.month
-    df["hour"] = df["datetime"].dt.hour
-    df["weekday"] = df["datetime"].dt.weekday
-    return df
-
-
-train = fe(train)
-
-# Define features and target
-features = [
-    "season",
-    "holiday",
-    "workingday",
-    "weather",
-    "temp",
-    "atemp",
-    "humidity",
-    "windspeed",
-    "year",
-    "month",
-    "hour",
-    "weekday",
-]
-X = train[features]
-y = train["count"]
-
-
-# RMSLE function
-def rmsle(y_true, y_pred):
-    return np.sqrt(mean_squared_log_error(y_true, y_pred.clip(0, None)))
-
-
-# 5-fold CV
-kf = KFold(n_splits=5, shuffle=True, random_state=42)
-scores = []
-for train_idx, val_idx in kf.split(X):
-    X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
-    y_tr, y_val = y.iloc[train_idx], y.iloc[val_idx]
-    # log1p transform
-    y_tr_log = np.log1p(y_tr)
-    model = XGBRegressor(n_estimators=100, random_state=42, n_jobs=-1)
-    model.fit(X_tr, y_tr_log)
-    preds_log = model.predict(X_val)
-    preds = np.expm1(preds_log)
-    scores.append(rmsle(y_val, preds))
-print(f"CV RMSLE: {np.mean(scores):.5f}")
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline2.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline2.py
deleted file mode 100644
index 27be54a9..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline2.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import os
-import pandas as pd
-import numpy as np
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.model_selection import KFold
-from sklearn.metrics import mean_squared_error
-
-# Load data
-_input_filename = os.getenv("BIKE_INPUT_FILE", "train.csv")
-train = pd.read_csv(f"./input/{_input_filename}")
-
-# Identify target column from sample submission
-
-# Feature engineering
-for df in [train]:
-    df["datetime"] = pd.to_datetime(df["datetime"])
-    df["year"] = df["datetime"].dt.year
-    df["month"] = df["datetime"].dt.month
-    df["dayofweek"] = df["datetime"].dt.dayofweek
-    df["hour"] = df["datetime"].dt.hour
-
-features = [
-    "season",
-    "holiday",
-    "workingday",
-    "weather",
-    "temp",
-    "atemp",
-    "humidity",
-    "windspeed",
-    "year",
-    "month",
-    "dayofweek",
-    "hour",
-]
-
-X = train[features]
-y = train["count"].values
-
-# Log-transform target
-y_log = np.log1p(y)
-
-# 5-fold CV for RMSLE
-kf = KFold(n_splits=5, shuffle=True, random_state=42)
-rmsle_scores = []
-for train_idx, val_idx in kf.split(X):
-    X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
-    y_tr, y_val = y_log[train_idx], y[val_idx]
-    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
-    model.fit(X_tr, y_tr)
-    preds_log = model.predict(X_val)
-    preds = np.expm1(preds_log)
-    preds[preds < 0] = 0
-    rmsle = np.sqrt(mean_squared_error(np.log1p(y_val), np.log1p(preds)))
-    rmsle_scores.append(rmsle)
-
-cv_rmsle = np.mean(rmsle_scores)
-print(f"CV RMSLE: {cv_rmsle:.5f}")
\ No newline at end of file
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline3.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline3.py
deleted file mode 100644
index 40e762c4..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline3.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import os
-import pandas as pd
-import numpy as np
-from sklearn.linear_model import Ridge
-from sklearn.model_selection import KFold
-from sklearn.metrics import mean_squared_error
-
-# Load data
-_input_filename = os.getenv("BIKE_INPUT_FILE", "train.csv")
-train = pd.read_csv(f"./input/{_input_filename}")
-
-
-# Feature engineering
-def preprocess(df):
-    df = df.copy()
-    df["datetime"] = pd.to_datetime(df["datetime"])
-    df["year"] = df["datetime"].dt.year
-    df["month"] = df["datetime"].dt.month
-    df["dayofweek"] = df["datetime"].dt.dayofweek
-    df["hour"] = df["datetime"].dt.hour
-    return df[
-        [
-            "year",
-            "month",
-            "dayofweek",
-            "hour",
-            "season",
-            "weather",
-            "temp",
-            "atemp",
-            "humidity",
-            "windspeed",
-            "workingday",
-            "holiday",
-        ]
-    ]
-
-
-X = preprocess(train)
-y = np.log1p(train["count"])
-
-# 5-fold CV for RMSLE on log scale
-kf = KFold(n_splits=5, shuffle=True, random_state=42)
-rmsle_scores = []
-for train_idx, val_idx in kf.split(X):
-    X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
-    y_tr, y_val = y.iloc[train_idx], y.iloc[val_idx]
-    model = Ridge()
-    model.fit(X_tr, y_tr)
-    y_pred_log = model.predict(X_val)
-    rmsle = np.sqrt(mean_squared_error(y_val, y_pred_log))
-    rmsle_scores.append(rmsle)
-
-cv_rmsle = np.mean(rmsle_scores)
-print(f"CV RMSLE: {cv_rmsle:.5f}")
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline4.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline4.py
deleted file mode 100644
index d4f43803..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/pipeline4.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import os
-import pandas as pd
-import numpy as np
-from sklearn.model_selection import KFold
-from sklearn.metrics import mean_squared_log_error
-import xgboost as xgb
-
-# Load data
-_input_filename = os.getenv("BIKE_INPUT_FILE", "train.csv")
-train = pd.read_csv(f"./input/{_input_filename}")
-
-# Identify target from sample submission
-target_col = "count"
-
-
-# Feature engineering
-def prepare(df):
-    df = df.copy()
-    df["datetime"] = pd.to_datetime(df["datetime"])
-    df["year"] = df["datetime"].dt.year
-    df["month"] = df["datetime"].dt.month
-    df["day_of_week"] = df["datetime"].dt.dayofweek
-    df["hour"] = df["datetime"].dt.hour
-    return df
-
-
-train_p = prepare(train)
-
-features = [
-    "season",
-    "weather",
-    "temp",
-    "atemp",
-    "humidity",
-    "windspeed",
-    "workingday",
-    "holiday",
-    "year",
-    "month",
-    "day_of_week",
-    "hour",
-]
-X = train_p[features]
-y = np.log1p(train_p[target_col])
-
-# 5-fold CV evaluation
-kf = KFold(n_splits=5, shuffle=True, random_state=42)
-rmsle_scores = []
-for train_idx, val_idx in kf.split(X):
-    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
-    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
-    model = xgb.XGBRegressor(
-        objective="reg:squarederror",
-        n_estimators=100,
-        learning_rate=0.1,
-        max_depth=6,
-        random_state=42,
-        n_jobs=-1,
-    )
-    model.fit(X_train, y_train)
-    y_pred = model.predict(X_val)
-    score = np.sqrt(mean_squared_log_error(np.expm1(y_val), np.expm1(y_pred)))
-    rmsle_scores.append(score)
-
-print(f"CV RMSLE: {np.mean(rmsle_scores):.5f}")
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/run_base_lines.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/run_base_lines.py
deleted file mode 100644
index e38dd0be..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/run_base_lines.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import time
-import subprocess
-import sys
-import os
-
-
-def run_pipeline(pipeline_name, input_filename):
-    """Run a pipeline script and measure its execution time."""
-    print(f"\n{'=' * 60}")
-    print(f"Running {pipeline_name}...")
-    print(f"{'=' * 60}")
-
-    start_time = time.time()
-
-    try:
-        # Run the pipeline script
-        # Prepare environment with selected input filename
-        env = dict(os.environ)
-        env["BIKE_INPUT_FILE"] = input_filename
-
-        result = subprocess.run(
-            [sys.executable, pipeline_name],
-            capture_output=True,
-            text=True,
-            check=True,
-            env=env,
-        )
-
-        elapsed_time = time.time() - start_time
-
-        # Print the output
-        print(result.stdout)
-        if result.stderr:
-            print("STDERR:", result.stderr)
-
-        print(f"\n✓ {pipeline_name} completed in {elapsed_time:.2f} seconds")
-
-        return elapsed_time, True
-
-    except subprocess.CalledProcessError as e:
-        elapsed_time = time.time() - start_time
-        print(f"\n✗ {pipeline_name} failed after {elapsed_time:.2f} seconds")
-        print("STDOUT:", e.stdout)
-        print("STDERR:", e.stderr)
-        return elapsed_time, False
-    except Exception as e:
-        elapsed_time = time.time() - start_time
-        print(f"\n✗ {pipeline_name} error after {elapsed_time:.2f} seconds: {e}")
-        return elapsed_time, False
-
-
-def main():
-    """Run all pipelines sequentially and report timing results."""
-    # Allow selecting input file variant via CLI arg; fallback to 'train.csv'
-    input_filename = sys.argv[1] if len(sys.argv) > 1 else "train_augmented_3x.csv"
-    pipelines = [f"pipeline{i}.py" for i in range(5)]
-
-    results = {}
-    total_start = time.time()
-
-    print("Starting pipeline execution...")
-    print(f"Total pipelines to run: {len(pipelines)}")
-
-    for pipeline in pipelines:
-        elapsed, success = run_pipeline(pipeline, input_filename)
-        results[pipeline] = {
-            'time': elapsed,
-            'success': success
-        }
-
-    total_time = time.time() - total_start
-
-    # Print summary
-    print(f"\n{'=' * 60}")
-    print("EXECUTION SUMMARY")
-    print(f"{'=' * 60}")
-
-    for pipeline, result in results.items():
-        status = "✓ SUCCESS" if result['success'] else "✗ FAILED"
-        print(f"{pipeline:20s} - {result['time']:8.2f}s - {status}")
-
-    print(f"{'-' * 60}")
-    print(f"{'Total time:':20s}   {total_time:8.2f}s")
-    print(f"{'=' * 60}")
-
-    # Count successes
-    successful = sum(1 for r in results.values() if r['success'])
-    print(f"\nCompleted: {successful}/{len(pipelines)} pipelines successful")
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/skrubified_pipelines.py b/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/skrubified_pipelines.py
deleted file mode 100644
index 8e918eef..00000000
--- a/benchmarks/logical_optimizer/end-to-end/bike-sharing-demand/skrubified_pipelines.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import skrub
-import pandas as pd
-import numpy as np
-import lightgbm as lgb
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.linear_model import Ridge
-from xgboost import XGBRegressor
-from sklearn.model_selection import KFold
-from sklearn.metrics import mean_squared_log_error, make_scorer
-import time
-
-from stratum.logical_optimizer import apply_cse_on_skrub_ir
-
-t0 = time.time()
-
-# Load data
-train = pd.read_csv("./input/train_augmented_3x.csv", parse_dates=["datetime"])
-
-# Skrub DataOps plan
-data = skrub.var("data", train).skb.subsample(n=1000)
-X = data.drop("count", axis=1).skb.mark_as_X()
-y = data["count"].skb.mark_as_y()
-mode = skrub.eval_mode()
-
-# Pipeline 0
-datetime_col = X["datetime"].dt
-X_feat_pipe0 = X.assign(
-    year=datetime_col.year,
-    month=datetime_col.month,
-    dayofweek=datetime_col.dayofweek,
-    hour=datetime_col.hour)
-
-X_feat_pipe0 = X_feat_pipe0.drop(["datetime", "casual", "registered"], axis=1, errors="ignore")
-model_pipe0 = lgb.LGBMRegressor(random_state=42)
-pred_pipe0 = X_feat_pipe0.skb.apply(model_pipe0, y=y).skb.set_name("Pipeline 0")
-
-# Pipeline 1
-X_feat_pipe1 = X.assign(
-    year=datetime_col.year,
-    month=datetime_col.month,
-    weekday=datetime_col.weekday,
-    hour=datetime_col.hour)
-
-X_feat_pipe1 = X_feat_pipe1.drop(["datetime", "casual", "registered"], axis=1, errors="ignore")
-
-model_pipe1 = XGBRegressor(n_estimators=100, random_state=42, n_jobs=-1)
-pred_pipe1 = X_feat_pipe1.skb.apply(model_pipe1, y=y.skb.apply_func(np.log1p)).skb.set_name("Pipeline 1")
-pred_final_pipe1 = pred_pipe1.skb.apply_func(
-    lambda a,b: np.expm1(a) if b=="predict" else a,
-    mode).skb.set_name("Reverse log for prediction1")
-
-# Pipeline 2
-X_feat_pipe2 = X.assign(
-    year=datetime_col.year,
-    month=datetime_col.month,
-    dayofweek=datetime_col.dayofweek,
-    hour=datetime_col.hour)
-X_feat_pipe2 = X_feat_pipe2.drop(["datetime", "casual", "registered"], axis=1, errors="ignore")
-
-model_pipe2 = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
-pred_pipe2 = X_feat_pipe2.skb.apply(model_pipe2, y=y.skb.apply_func(np.log1p)).skb.set_name("Pipeline 2")
-pred_final_pipe2 = pred_pipe2.skb.apply_func(
-    lambda a,b: np.expm1(a) if b=="predict" else a,
-    mode).skb.set_name("Reverse log for prediction2")
-
-# Pipeline 3
-X_feat_pipe3 = X.assign(
-    year=datetime_col.year,
-    month=datetime_col.month,
-    dayofweek=datetime_col.dayofweek,
-    hour=datetime_col.hour)
-X_feat_pipe3 = X_feat_pipe3.drop(["datetime", "casual", "registered"], axis=1, errors="ignore")
-
-model_pipe3 = Ridge()
-pred_pipe3 = X_feat_pipe3.skb.apply(model_pipe3, y=y.skb.apply_func(np.log1p)).skb.set_name("Pipeline 3")
-pred_final_pipe3 = pred_pipe3.skb.apply_func(
-    lambda a,b: np.expm1(a) if b=="predict" else a,
-    mode).skb.set_name("Reverse log for prediction3")
-
-# Pipeline 4
-X_feat_pipe4 = X.assign(
-    year=datetime_col.year,
-    month=datetime_col.month,
-    dayofweek=datetime_col.dayofweek,
-    hour=datetime_col.hour)
-X_feat_pipe4 = X_feat_pipe4.drop(["datetime", "casual", "registered"], axis=1, errors="ignore")
-
-model_pipe4 = XGBRegressor(
-        objective="reg:squarederror",
-        n_estimators=100,
-        learning_rate=0.1,
-        max_depth=6,
-        random_state=42,
-        n_jobs=-1,
-)
-pred_pipe4 = X_feat_pipe4.skb.apply(model_pipe4, y=y.skb.apply_func(np.log1p)).skb.set_name("Pipeline 4")
-pred_final_pipe4 = pred_pipe4.skb.apply_func(
-    lambda a,b: np.expm1(a) if b=="predict" else a,
-    mode).skb.set_name("Reverse log for prediction4")
-
-merged_pipelines = skrub.choose_from({
-    "pipeline0": pred_pipe0,
-    "pipeline1": pred_final_pipe1,
-    "pipeline2": pred_final_pipe2,
-    "pipeline3": pred_final_pipe3,
-    "pipeline4": pred_final_pipe4,
-}, name="merged pipelines").as_data_op().skb.set_name("GridSearchCV")
-
-# merged_pipelines.skb.draw_graph().open()
-merged_pipelines = apply_cse_on_skrub_ir(merged_pipelines)
-# merged_pipelines.skb.draw_graph().open()
-
-# RMSLE scorer
-def rmsle(y_true, y_pred):
-    return np.sqrt(mean_squared_log_error(y_true, np.clip(y_pred, 0, None)))
-scorer = make_scorer(rmsle)
-
-cv = KFold(n_splits=5, shuffle=True, random_state=42)
-t0_ = time.time()
-search = merged_pipelines.skb.make_grid_search(fitted=True, cv=cv, scoring=scorer, n_jobs=-1)
-print(search.results_)
-
-t1 = time.time()
-print(t1 - t0)
-print(t1 - t0_)
diff --git a/benchmarks/logical_optimizer/end-to-end/california-housing/bar_plot.py b/benchmarks/logical_optimizer/end-to-end/california-housing/bar_plot.py
deleted file mode 100644
index eeabd1b3..00000000
--- a/benchmarks/logical_optimizer/end-to-end/california-housing/bar_plot.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import matplotlib.pyplot as plt
-import pandas as pd
-import numpy as np
-
-base_path = "benchmarks/logical_optimizer/end-to-end/california-housing/"
-data = pd.read_csv(base_path + "california_housing_pipelines_benchmark.csv", sep=";")
-data["time"] = data["time"].apply(np.round, decimals=2)
-
-# Prepare data in desired order: non-optimized, optimized
-non_optimized = data.iloc[:3]["time"].values
-optimized = data.iloc[3:6]["time"].values
-
-labels = ["skrub-njobs=1", "skrub-njobs=-1", "stratum-njobs=1"]
-data = pd.DataFrame({"non_optimized": non_optimized, "optimized": optimized, "labels": labels})
-
-# Publication-quality colorblind-friendly colors
-# Using a 4-color palette: blue, orange, green, red (ColorBrewer inspired)
-pub_colors = ['#F18F01', '#C73E1D', '#6A994E']  # Blue, Orange, Red, Green
-exp_names = ("w/o Logical Rewrites", "w/ Logical Rewrites")
-x = np.arange(len(exp_names)) # the label locations
-width = 0.25  # the width of the bars
-
-multiplier = 0
-
-fig, ax = plt.subplots(figsize=(5, 5), dpi=100, layout='constrained')
-for i, row in data.iterrows():
-    offset = width * multiplier
-    rects = ax.bar(x + offset, row[:2], width=width, label=labels[i], color=pub_colors[i])
-    ax.bar_label(rects, padding=3)
-    multiplier += 1
-
-ax.set_xticks(x + width, exp_names)
-ax.set_yscale("log")
-ax.set_ylabel("Time (s)")
-ax.legend(loc="upper right", ncols=2)
-plt.ylim(0.01, 30)
-ax.grid(axis='y', alpha=0.3, linestyle='--')
-plt.tight_layout()
-plt.savefig(base_path + "california_housing_pipelines_benchmark_bar_plot.pdf")
\ No newline at end of file
diff --git a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline0.py b/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline0.py
deleted file mode 100644
index 4917903d..00000000
--- a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline0.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import numpy as np
-import pandas as pd
-from sklearn.model_selection import KFold, cross_val_score
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import Pipeline
-from sklearn.compose import ColumnTransformer
-from sklearn.linear_model import LinearRegression
-
-# Load dataset
-df = pd.read_csv("input/train.csv")
-target = "MedHouseVal"
-
-# Feature engineering
-def feat_eng(X):
-    return X.assign(
-        BedroomsPerRoom=X["AveBedrms"] / X["AveRooms"],
-        IncomeSquared=X["MedInc"] ** 2,
-        IncomeRoomInteraction=X["MedInc"] * X["AveRooms"],
-        Density=X["Population"] / X["AveOccup"],
-        LatitudeLongitude=X["Latitude"] * X["Longitude"],
-        MedInc3=X["MedInc"] ** 3,
-        RoomDensity=X["AveRooms"] / X["Population"]
-    )
-
-# Prepare features and target
-X = df.drop(columns=[target])
-X = feat_eng(X)
-y = df[target]
-
-numeric_features = X.columns.tolist()
-
-# Build preprocessing pipeline
-numeric_transformer = Pipeline(steps=[
-    ("scaler", StandardScaler()),
-])
-
-preprocessor = ColumnTransformer(
-    transformers=[
-        ("num", numeric_transformer, numeric_features)
-    ]
-)
-
-# Build model pipeline
-model = Pipeline(steps=[
-    ("preprocess", preprocessor),
-    ("regressor", LinearRegression())
-])
-
-# Cross-validation
-cv = KFold(n_splits=5, shuffle=True, random_state=42)
-cv_scores = cross_val_score(model, X, y, cv=cv, scoring='neg_mean_squared_error')
-mse_scores = cv_scores.mean()
-
-print(f"Pipeline 0 (LinearRegression) MSE: {mse_scores:.4f}")
-
diff --git a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline1.py b/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline1.py
deleted file mode 100644
index 41d8dc91..00000000
--- a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline1.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import pandas as pd
-from sklearn.model_selection import KFold, cross_val_score
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import Pipeline
-from sklearn.compose import ColumnTransformer
-from sklearn.linear_model import Ridge
-
-# Load dataset
-df = pd.read_csv("input/train.csv")
-target = "MedHouseVal"
-
-# Feature engineering
-def feat_eng(X):
-    return X.assign(
-        BedroomsPerRoom=X["AveBedrms"] / X["AveRooms"],
-        IncomeSquared=X["MedInc"] ** 2,
-        IncomeRoomInteraction=X["MedInc"] * X["AveRooms"],
-        Density=X["Population"] / X["AveOccup"],
-        LatitudeLongitude=X["Latitude"] * X["Longitude"],
-        MedInc3=X["MedInc"] ** 3,
-        RoomDensity=X["AveRooms"] / X["Population"]
-    )
-
-# Prepare features and target
-X = df.drop(columns=[target])
-X = feat_eng(X)
-y = df[target]
-
-numeric_features = X.columns.tolist()
-
-# Build preprocessing pipeline
-numeric_transformer = Pipeline(steps=[
-    ("scaler", StandardScaler()),
-])
-
-preprocessor = ColumnTransformer(
-    transformers=[
-        ("num", numeric_transformer, numeric_features)
-    ]
-)
-
-# Build model pipeline
-model = Pipeline(steps=[
-    ("preprocess", preprocessor),
-    ("regressor", Ridge())
-])
-
-# Cross-validation
-cv = KFold(n_splits=5, shuffle=True, random_state=42)
-cv_scores = cross_val_score(model, X, y, cv=cv, scoring='neg_mean_squared_error')
-mse_scores = cv_scores.mean()
-
-print(f"Pipeline 1 (Ridge) MSE: {mse_scores:.4f}")
-
diff --git a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline2.py b/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline2.py
deleted file mode 100644
index 1ec3af7a..00000000
--- a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline2.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import pandas as pd
-from sklearn.model_selection import KFold, cross_val_score
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import Pipeline
-from sklearn.compose import ColumnTransformer
-from sklearn.linear_model import Lasso
-
-# Load dataset
-df = pd.read_csv("input/train.csv")
-target = "MedHouseVal"
-
-# Feature engineering
-def feat_eng(X):
-    return X.assign(
-        BedroomsPerRoom=X["AveBedrms"] / X["AveRooms"],
-        IncomeSquared=X["MedInc"] ** 2,
-        IncomeRoomInteraction=X["MedInc"] * X["AveRooms"],
-        Density=X["Population"] / X["AveOccup"],
-        LatitudeLongitude=X["Latitude"] * X["Longitude"],
-        MedInc3=X["MedInc"] ** 3,
-        RoomDensity=X["AveRooms"] / X["Population"]
-    )
-
-# Prepare features and target
-X = df.drop(columns=[target])
-X = feat_eng(X)
-y = df[target]
-
-numeric_features = X.columns.tolist()
-
-# Build preprocessing pipeline
-numeric_transformer = Pipeline(steps=[
-    ("scaler", StandardScaler()),
-])
-
-preprocessor = ColumnTransformer(
-    transformers=[
-        ("num", numeric_transformer, numeric_features)
-    ]
-)
-
-# Build model pipeline
-model = Pipeline(steps=[
-    ("preprocess", preprocessor),
-    ("regressor", Lasso())
-])
-
-# Cross-validation
-cv = KFold(n_splits=5, shuffle=True, random_state=42)
-cv_scores = cross_val_score(model, X, y, cv=cv, scoring='neg_mean_squared_error')
-mse_scores = cv_scores.mean()
-
-print(f"Pipeline 2 (Lasso) MSE: {mse_scores:.4f}")
-
diff --git a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline3.py b/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline3.py
deleted file mode 100644
index 0b4d7520..00000000
--- a/benchmarks/logical_optimizer/end-to-end/california-housing/pipeline3.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import pandas as pd
-from sklearn.model_selection import KFold, cross_val_score
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import Pipeline
-from sklearn.compose import ColumnTransformer
-from sklearn.linear_model import ElasticNet
-
-# Load dataset
-df = pd.read_csv("input/train.csv")
-target = "MedHouseVal"
-
-# Feature engineering
-def feat_eng(X):
-    return X.assign(
-        BedroomsPerRoom=X["AveBedrms"] / X["AveRooms"],
-        IncomeSquared=X["MedInc"] ** 2,
-        IncomeRoomInteraction=X["MedInc"] * X["AveRooms"],
-        Density=X["Population"] / X["AveOccup"],
-        LatitudeLongitude=X["Latitude"] * X["Longitude"],
-        MedInc3=X["MedInc"] ** 3,
-        RoomDensity=X["AveRooms"] / X["Population"]
-    )
-
-# Prepare features and target
-X = df.drop(columns=[target])
-X = feat_eng(X)
-y = df[target]
-
-numeric_features = X.columns.tolist()
-
-# Build preprocessing pipeline
-numeric_transformer = Pipeline(steps=[
-    ("scaler", StandardScaler()),
-])
-
-preprocessor = ColumnTransformer(
-    transformers=[
-        ("num", numeric_transformer, numeric_features)
-    ]
-)
-
-# Build model pipeline
-model = Pipeline(steps=[
-    ("preprocess", preprocessor),
-    ("regressor", ElasticNet())
-])
-
-# Cross-validation
-cv = KFold(n_splits=5, shuffle=True, random_state=42)
-cv_scores = cross_val_score(model, X, y, cv=cv, scoring='neg_mean_squared_error')
-mse_scores = cv_scores.mean()
-
-print(f"Pipeline 3 (ElasticNet) MSE: {mse_scores:.4f}")
-
diff --git a/benchmarks/logical_optimizer/end-to-end/california-housing/run_base_lines.py b/benchmarks/logical_optimizer/end-to-end/california-housing/run_base_lines.py
deleted file mode 100644
index 6fb38ff3..00000000
--- a/benchmarks/logical_optimizer/end-to-end/california-housing/run_base_lines.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import time
-import subprocess
-import sys
-import os
-
-
-def run_pipeline(pipeline_name, input_filename=None):
-    """Run a pipeline script and measure its execution time."""
-    print(f"\n{'=' * 60}")
-    print(f"Running {pipeline_name}...")
-    print(f"{'=' * 60}")
-
-    start_time = time.time()
-
-    try:
-        # Run the pipeline script
-        # Note: california-housing pipelines don't use input files,
-        # but we keep the interface consistent with bike-sharing
-        env = dict(os.environ)
-
-        result = subprocess.run(
-            [sys.executable, pipeline_name],
-            capture_output=True,
-            text=True,
-            check=True,
-            env=env,
-        )
-
-        elapsed_time = time.time() - start_time
-
-        # Print the output
-        print(result.stdout)
-        if result.stderr:
-            print("STDERR:", result.stderr)
-
-        print(f"\n✓ {pipeline_name} completed in {elapsed_time:.2f} seconds")
-
-        return elapsed_time, True
-
-    except subprocess.CalledProcessError as e:
-        elapsed_time = time.time() - start_time
-        print(f"\n✗ {pipeline_name} failed after {elapsed_time:.2f} seconds")
-        print("STDOUT:", e.stdout)
-        print("STDERR:", e.stderr)
-        return elapsed_time, False
-    except Exception as e:
-        elapsed_time = time.time() - start_time
-        print(f"\n✗ {pipeline_name} error after {elapsed_time:.2f} seconds: {e}")
-        return elapsed_time, False
-
-
-def main():
-    """Run all pipelines sequentially and report timing results."""
-    pipelines = [f"pipeline{i}.py" for i in range(4)]
-
-    results = {}
-    total_start = time.time()
-
-    print("Starting pipeline execution...")
-    print(f"Total pipelines to run: {len(pipelines)}")
-
-    for pipeline in pipelines:
-        elapsed, success = run_pipeline(pipeline)
-        results[pipeline] = {
-            'time': elapsed,
-            'success': success
-        }
-
-    total_time = time.time() - total_start
-
-    # Print summary
-    print(f"\n{'=' * 60}")
-    print("EXECUTION SUMMARY")
-    print(f"{'=' * 60}")
-
-    for pipeline, result in results.items():
-        status = "✓ SUCCESS" if result['success'] else "✗ FAILED"
-        print(f"{pipeline:20s} - {result['time']:8.2f}s - {status}")
-
-    print(f"{'-' * 60}")
-    print(f"{'Total time:':20s}   {total_time:8.2f}s")
-    print(f"{'=' * 60}")
-
-    # Count successes
-    successful = sum(1 for r in results.values() if r['success'])
-    print(f"\nCompleted: {successful}/{len(pipelines)} pipelines successful")
-
-
-if __name__ == "__main__":
-    main()
-
diff --git a/benchmarks/logical_optimizer/end-to-end/california-housing/skrubified_merged_pipelines.py b/benchmarks/logical_optimizer/end-to-end/california-housing/skrubified_merged_pipelines.py
deleted file mode 100644
index 82b4e4b8..00000000
--- a/benchmarks/logical_optimizer/end-to-end/california-housing/skrubified_merged_pipelines.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import numpy as np
-import pandas as pd
-from sklearn.model_selection import KFold
-import skrub
-from sklearn.preprocessing import StandardScaler
-from sklearn.linear_model import ElasticNet, Lasso, LinearRegression, Ridge
-
-from stratum.logical_optimizer import apply_cse_on_skrub_ir
-from stratum.api.gridsearch import grid_search
-from time import time
-
-def pipeline_definition(show_graph=False):
-    # csv file contains the data from sklearn.datasets.fetch_california_housing
-    df_path = "input/train.csv"
-    target = "MedHouseVal"
-
-    df_path = skrub.as_data_op(df_path)
-    df = df_path.skb.apply_func(pd.read_csv).skb.subsample(n=100)
-
-    y = df[target].skb.mark_as_y()
-    X = df.drop(columns=[target]).skb.mark_as_X()
-
-    def feat_eng(X):
-        return X.assign(BedroomsPerRoom=X["AveBedrms"] / X["AveRooms"],
-        IncomeSquared=X["MedInc"] ** 2,
-        IncomeRoomInteraction=X["MedInc"] * X["AveRooms"],
-        Density=X["Population"] / X["AveOccup"],
-        LatitudeLongitude=X["Latitude"] * X["Longitude"],
-        MedInc3=X["MedInc"] ** 3,
-        RoomDensity=X["AveRooms"] / X["Population"]
-    )
-
-    # pipeline 0
-    X2 = feat_eng(X)
-    scaler = StandardScaler()
-    X_scaled = X2.skb.apply(scaler)
-    pred0 = X_scaled.skb.apply(LinearRegression(), y=y)
-
-    # Pipeline 1
-    X2 = feat_eng(X)
-    scaler = StandardScaler()
-    X_scaled = X2.skb.apply(scaler) 
-    pred1 = X_scaled.skb.apply(Ridge(), y=y)
-
-    # Pipeline 2
-    X2 = feat_eng(X)
-    scaler = StandardScaler()
-    X_scaled = X2.skb.apply(scaler)
-    pred2 = X_scaled.skb.apply(Lasso(), y=y)
-
-    # Pipeline 3
-    X2 = feat_eng(X)
-    scaler = StandardScaler()
-    X_scaled = X2.skb.apply(scaler)
-    pred3 = X_scaled.skb.apply(ElasticNet(), y=y)
-
-    preds = {
-        "pipeline0": pred0,
-        "pipeline1": pred1,
-        "pipeline2": pred2,
-        "pipeline3": pred3,
-    }
-    pred = skrub.choose_from(preds, name="predictions").as_data_op()
-    if show_graph:
-        pred.skb.draw_graph().open()
-    
-    
-    return pred
-
-
-def run_experiment(pred, show_graph=False):
-    cv = KFold(n_splits=5, shuffle=True, random_state=42)
-    runs = 1
-
-    def run_and_average(name, search_func, print_results=True):
-        times = []
-        search_result = None
-        for run_idx in range(runs):
-            t0 = time()
-            search_result = search_func()
-            t1 = time()
-            times.append(t1 - t0)
-            if runs > 1:
-                print(f"  Run {run_idx + 1}/{runs}: {t1 - t0:.4f}s")
-        
-        avg_time = np.mean(times)
-        std_time = np.std(times) if runs > 1 else 0
-        print(f"Gridsearch time (avg over {runs} runs): {avg_time:.4f}s" + 
-                (f" (std: {std_time:.4f}s)" if runs > 1 else ""))
-        
-        if print_results and search_result is not None:
-            if hasattr(search_result, 'results_'):
-                print(search_result.results_)
-            else:
-                print(search_result)
-            print("----------------------------------------")
-        
-        return {"impl": name, "time": avg_time}
-
-    df_vals = []
-
-    # Skrub - non optimized gridsearch (n_jobs=1)
-    print("Skrub - non optimized gridsearch (n_jobs=1)")
-    df_vals.append(run_and_average(
-        "skrub-njobs=1",
-        lambda: pred.skb.make_grid_search(fitted=True, cv=cv, n_jobs=1, scoring="neg_mean_squared_error", refit=False)
-    ))
-
-    # Skrub - non optimized gridsearch (n_jobs=-1)
-    print("Skrub - non optimized gridsearch (n_jobs=-1)")
-    df_vals.append(run_and_average(
-        "skrub-njobs=-1",
-        lambda: pred.skb.make_grid_search(fitted=True, cv=cv, n_jobs=-1, scoring="neg_mean_squared_error", refit=False)
-    ))
-
-    # Stratum - gridsearch (n_jobs=1)
-    print("Stratum - optimized gridsearch (n_jobs=1)")
-    df_vals.append(run_and_average(
-        "stratum-njobs=1",
-        lambda: grid_search(pred, cv=cv, scoring="neg_mean_squared_error")
-    ))
-
-    # Optimization step (only run once)
-    t00 = time()
-    pred_optimized = apply_cse_on_skrub_ir(pred)
-    t01 = time()
-    print("Optimization time: ", t01 - t00) 
-
-    if show_graph:
-        pred_optimized.skb.draw_graph().open()
-
-    # Skrub - optimized gridsearch (n_jobs=1)
-    print("Skrub - optimized gridsearch (n_jobs=1)")
-    df_vals.append(run_and_average(
-        "skrub-optimized-njobs=1",
-        lambda: pred_optimized.skb.make_grid_search(fitted=True, cv=cv, n_jobs=1, scoring="neg_mean_squared_error", refit=False)
-    ))
-
-    # Skrub - optimized gridsearch (n_jobs=-1)
-    print("Skrub - optimized gridsearch (n_jobs=-1)")
-    df_vals.append(run_and_average(
-        "skrub-optimized-njobs=-1",
-        lambda: pred_optimized.skb.make_grid_search(fitted=True, cv=cv, n_jobs=-1, scoring="neg_mean_squared_error", refit=False)
-    ))
-
-    # Stratum - optimized gridsearch (n_jobs=1)
-    print("Stratum - optimized gridsearch (n_jobs=1)")
-    df_vals.append(run_and_average(
-        "stratum-optimized-njobs=1",
-        lambda: grid_search(pred_optimized, cv=cv, scoring="neg_mean_squared_error")
-    ))
-
-    df_vals.append({"impl": "baseline", "time": 3.81})
-
-    df = pd.DataFrame(df_vals)
-    df.to_csv("california_housing_pipelines_benchmark.csv", index=False, header=True, sep=";")
-    print("\nSummary:")
-    print(df)
-
-
-show_graph = False
-t0 = time()
-pred = pipeline_definition(show_graph=show_graph)
-t1 = time()
-print("Pipeline definition time: ", t1 - t0)
-run_experiment(pred, show_graph=show_graph)
\ No newline at end of file
diff --git a/benchmarks/logical_optimizer/end-to-end/plot_20newsgroup_results.py b/benchmarks/logical_optimizer/end-to-end/plot_20newsgroup_results.py
deleted file mode 100644
index de96e41a..00000000
--- a/benchmarks/logical_optimizer/end-to-end/plot_20newsgroup_results.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pandas as pd
-import matplotlib.pyplot as plt
-import numpy as np
-
-base_path = "benchmarks/logical_optimizer/end-to-end/"
-
-data = pd.read_csv(base_path + 'bench_cse_tfidf_gridsearch.csv')
-data["total"] = data["total"].apply(np.round, decimals=2)
-
-labels = ["skrub-njobs=1", "skrub-njobs=-1", "stratum-njobs=1"]
-exp_names = (100, 500, 1000, 1000)
-
-# Publication-quality colorblind-friendly colors
-# Using a 4-color palette: blue, orange, green, red (ColorBrewer inspired)
-pub_colors = ['#F18F01', '#C73E1D', '#6A994E']  # Blue, Orange, Red, Green
-exp_names = (100, 500, 1000, 1000)
-x = np.arange(len(exp_names)) # the label locations
-width = 0.85  # the width of the bars
-x = x* width*(len(labels)+1)
-multiplier = 0
-
-fig, ax = plt.subplots(figsize=(9, 5), dpi=100)
-for scheduler, group in data.groupby("scheduler"):
-    offset = width * multiplier
-    rects = ax.bar(x + offset, group["total"], width=width, label=scheduler, color=pub_colors[multiplier])
-    ax.bar_label(rects, padding=3)
-    multiplier += 1
-
-ax.set_xticks(x + width, exp_names)
-ax.set_yscale("log")
-ax.set_ylabel("Time (s)")
-ax.legend(loc="upper right", ncols=len(labels))
-plt.ylim(0.1, 300)
-ax.grid(axis='y', alpha=0.3, linestyle='--')
-plt.tight_layout()
-plt.savefig(base_path + "20newsgroup_results_bar_plot.pdf")
\ No newline at end of file
diff --git a/benchmarks/logical_optimizer/end-to-end/uk-house-price/feature_transform.py b/benchmarks/logical_optimizer/end-to-end/uk-house-price/feature_transform.py
deleted file mode 100644
index 591b3cae..00000000
--- a/benchmarks/logical_optimizer/end-to-end/uk-house-price/feature_transform.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import numpy as np
-import pandas as pd
-import stratum as skrub
-from sklearn.preprocessing import OneHotEncoder
-from skrub import TableVectorizer, StringEncoder
-
-file_path = "input/price_paid_records_small.csv"
-df = pd.read_csv(file_path)
-df = df.rename(columns={"Town/City": "Town"}, inplace=False)
-df.drop("Price", axis=1, inplace=True)
-print(df.info())
-
-skrub.set_config(rust_backend=True, debug_timing=True)
-enc = TableVectorizer(high_cardinality=StringEncoder(), low_cardinality=OneHotEncoder(), n_jobs=-1) #default setup
-X_cat_enc = enc.fit_transform(df)
-print(X_cat_enc)
-
diff --git a/benchmarks/logical_optimizer/end-to-end/uk-house-price/setup.sh b/benchmarks/logical_optimizer/end-to-end/uk-house-price/setup.sh
deleted file mode 100755
index 6af5e83e..00000000
--- a/benchmarks/logical_optimizer/end-to-end/uk-house-price/setup.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-# download dataset from kaggle
-curl -L -o uk-housing-prices-paid.zip\
-  https://www.kaggle.com/api/v1/datasets/download/hm-land-registry/uk-housing-prices-paid
-
-unzip uk-housing-prices-paid.zip -d tmp
-mkdir -p input
-mv tmp/* input/
-rm -rf tmp
-rm uk-housing-prices-paid.zip
-
-# downsample for testing:
-head -100000 input/price_paid_records.csv > input/price_paid_records_small.csv
diff --git a/benchmarks/logical_optimizer/end-to-end/uk-house-price/tabvec_lightgbm.py b/benchmarks/logical_optimizer/end-to-end/uk-house-price/tabvec_lightgbm.py
deleted file mode 100644
index eae7cdf4..00000000
--- a/benchmarks/logical_optimizer/end-to-end/uk-house-price/tabvec_lightgbm.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from time import perf_counter
-import pandas as pd
-import polars
-from joblib import parallel_backend
-from sklearn.metrics import make_scorer, r2_score
-
-#import skrub
-import stratum as skrub
-from lightgbm import LGBMRegressor
-from sklearn.model_selection import train_test_split, ShuffleSplit
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
-from skrub import StringEncoder, TableVectorizer
-import cProfile
-import pstats
-pr = cProfile.Profile()
-
-# 1. Load Data
-dtypes = {
-    "Transaction unique identifier": "category",
-    "Price": "int32",
-    "Property Type": "category",
-    "Old/New": "category",
-    "Duration": "category",
-    "Town/City": "category",
-    "District": "category",
-    "Country": "category",
-    "County": "category",
-    "PPDCategory Type": "category",
-    "Record Status - monthly file only": "category"
-}
-file_path = "input/price_paid_records_small.csv"
-#df_raw = pd.read_csv(file_path, dtype=dtypes) #setting datatypes reduces size and speeds up
-df_raw = pd.read_csv(file_path) #setting datatypes reduces size and speeds up
-#print(df_raw.memory_usage(deep=True).sum() / 1024**2) #in-memory size in MB
-print(df_raw.info())
-df = skrub.as_data_op(df_raw)
-
-y = df["Price"].skb.mark_as_y()
-X = df.drop("Price", axis=1).skb.mark_as_X()
-
-# 3. Pre-processing (pre_process_2 logic)
-vec = TableVectorizer(n_jobs=1,
-    high_cardinality=StringEncoder(),
-    low_cardinality=OneHotEncoder(drop='if_binary', dtype='float32', handle_unknown='ignore', sparse_output=False)
-)
-X_enc = X.skb.apply(vec)
-X_vec = X_enc.skb.apply(StandardScaler())
-
-# 4. Modeling
-model = LGBMRegressor(random_state=42)
-preds = X_vec.skb.apply(model, y=y)
-
-# 5. Grid search
-skrub.set_config(rust_backend=True, debug_timing=False, scheduler=True, stats=True)
-cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
-scorer = make_scorer(r2_score)
-t0 = perf_counter()
-#pr.enable()
-search = preds.skb.make_grid_search(cv=cv, n_jobs=1, scoring=scorer, fitted=True)
-#pr.disable()
-t1 = perf_counter()
-print(f"Time taken: {t1 - t0} seconds")
-print(search.results_)
-
-#stats = pstats.Stats(pr).sort_stats("tottime")
-#stats.print_stats(60)
diff --git a/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp.py b/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp.py
deleted file mode 100644
index bbe6cf3d..00000000
--- a/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from sklearn.metrics import make_scorer, mean_squared_error, r2_score
-from sklearn.model_selection import KFold, ShuffleSplit
-import pandas as pd
-from xgboost import XGBRegressor
-from lightgbm import LGBMRegressor
-from sklearn.linear_model import ElasticNet,  Ridge
-
-from time import perf_counter
-import numpy as np
-from sklearn.preprocessing import StandardScaler
-import stratum as skrub
-test=True
-
-import logging
-
-logging.basicConfig(level=logging.DEBUG)
-
-file_path = "price_paid_records_1M.csv" if test else "input/price_paid_records.csv"
-df = skrub.as_data_op(file_path).skb.apply_func(pd.read_csv).skb.subsample(n=1000)
-print(df.columns.skb.preview())
-df = df.rename(columns={"Town/City": "Town"}, inplace=False)
-y = df["Price"].skb.mark_as_y()
-X = df.drop("Price", axis=1).skb.mark_as_X()
-
-from sklearn.base import BaseEstimator, TransformerMixin
-class TargetEncoder(BaseEstimator, TransformerMixin):
-    def fit(self, X, y=None):
-        print("fit target encoder")
-        self.global_mean_ = y.mean()
-        tmp = pd.concat([X, y], axis=1)
-        self.cols = X.columns
-        self.means = {}
-        for col in self.cols:
-            self.means[col] = tmp.groupby(col)[tmp.columns[-1]].mean()
-        return self
-
-    def transform(self, X):
-        print("transform target encoder")
-        X_out = X.copy()
-        for col in self.cols:
-            X_out[col] = X_out[col].map(self.means[col]).fillna(self.global_mean_)
-        return X_out
-
-    def fit_transform(self, X, y=None):
-        self.fit(X, y)
-        return self.transform(X)
-
-    def get_feature_names_out(self):
-        return self.cols
-
-
-def pre_process_1(X, y):
-    date = X["Date of Transfer"].skb.apply_func(pd.to_datetime)
-    X = X.assign(
-        year=date.dt.year, 
-        month=date.dt.month, 
-        day=date.dt.day, 
-        dayofweek=date.dt.dayofweek, 
-        hour=date.dt.hour)
-    X = X.assign(
-        month_sin=(date.dt.month * (2 * np.pi / 12)).apply(np.sin),
-        month_cos=(date.dt.month * (2 * np.pi / 12)).apply(np.cos),
-        day_sin=(date.dt.day * (2 * np.pi / 30)).apply(np.sin),
-        day_cos=(date.dt.day * (2 * np.pi / 30)).apply(np.cos),
-        dayofweek_sin=(date.dt.dayofweek * (2 * np.pi / 7)).apply(np.sin),
-        dayofweek_cos=(date.dt.dayofweek * (2 * np.pi / 7)).apply(np.cos),
-        hour_sin=(date.dt.hour * (2 * np.pi / 24)).apply(np.sin),
-        hour_cos=(date.dt.hour * (2 * np.pi / 24)).apply(np.cos),
-    )
-    X = X.drop([
-        "Date of Transfer", 
-        'Duration', 
-        'Transaction unique identifier', 
-        'PPDCategory Type', 
-        'Record Status - monthly file only'], axis=1)
-
-    cat_selector = skrub.selectors.filter(lambda col: col.dtype == "object")
-    X_cat = X.skb.select(cat_selector)
-    X_cat_enc = X_cat.skb.apply(skrub.StringEncoder())
-    num_selector = skrub.selectors.filter(lambda col: col.dtype != "object")
-
-    X_te = X[["District", "County", "Town"]].skb.apply(TargetEncoder(), y=y)
-    X_te = X_te.rename(columns={"District": "district_te", "County": "county_te", "Town": "town_te"})
-    X_num = X.skb.select(num_selector)
-    X_num = X_num.skb.concat([X_te], axis=1)
-
-    X_num_scaled = X_num.skb.apply(StandardScaler())
-    X_vec = X_num_scaled.skb.concat([X_cat_enc], axis=1)
-    return X_vec
-
-def pre_process_2(X):
-    X_enc = X.skb.apply(skrub.TableVectorizer())
-    return X_enc
-
-X_1 = pre_process_1(X,y)
-X_2 = pre_process_2(X)
-X_enc = skrub.choose_from({
-    "1": X_1, 
-    "2": X_2
-    }, name="feat_eng").as_data_op()
-
-models = {
-    "Ridge": Ridge(random_state=42),
-    "XGBoost": XGBRegressor(random_state=42),
-    "LightGBM": LGBMRegressor(random_state=42),
-    "ElasticNet": ElasticNet(random_state=42),
-}
-preds = {k: X_enc.skb.apply(model, y=y) for k,model in models.items()}
-preds = skrub.choose_from(preds, name="models").as_data_op()
-preds = preds.skb.apply_func(lambda a, m: (a, print(m))[0], skrub.eval_mode())
-
-# play with cvs
-cv = 3
-cv = ShuffleSplit(n_splits=1,test_size=0.2,random_state=42) if cv == 1 else KFold(n_splits=cv, shuffle=True, random_state=42)
-scorer = make_scorer(r2_score)
-t0 = perf_counter()
-with skrub.config(scheduler=True, stats=20, rust_backend=True):
-    search_stratum = preds.skb.make_grid_search(cv=cv, n_jobs=1, fitted=True, scoring=scorer)
-t1 = perf_counter()
-print("="*80)
-print(f"Stratum gridsearch scheduler time: {t1 - t0} seconds")
-print("="*80)
-search = preds.skb.make_grid_search(cv=cv, n_jobs=1, fitted=True, scoring=scorer, refit=False)
-t2 = perf_counter()
-print("="*80)
-print(f"Skrub default gridsearch time: {t2 - t1} seconds")
-print("="*80)
-print("Results:")
-print(search.results_)
-print(search_stratum.results_)
-print("="*80)
\ No newline at end of file
diff --git a/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp2.py b/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp2.py
deleted file mode 100644
index 63112ab5..00000000
--- a/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp2.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import cProfile
-from joblib import parallel_backend
-from sklearn.metrics import r2_score, make_scorer
-
-import stratum as skrub
-from skrub import StringEncoder
-from sklearn.model_selection import KFold
-#import skrub
-import pandas as pd
-from xgboost import XGBRegressor
-from lightgbm import LGBMRegressor
-from sklearn.linear_model import ElasticNet,  Ridge
-
-from time import perf_counter
-import numpy as np
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
-import pstats
-
-pr = cProfile.Profile()
-
-test=True
-
-file_path = "input/price_paid_records_small.csv" if test else "input/price_paid_records.csv"
-df = skrub.as_data_op(file_path).skb.apply_func(pd.read_csv).skb.subsample(n=1000)
-print(df.columns.skb.preview())
-df = df.rename(columns={"Town/City": "Town"}, inplace=False)
-y = df["Price"].skb.mark_as_y()
-X = df.drop("Price", axis=1).skb.mark_as_X()
-
-from sklearn.base import BaseEstimator, TransformerMixin
-class TargetEncoder(BaseEstimator, TransformerMixin):
-    def fit(self, X, y=None):
-        print("fit target encoder")
-        self.global_mean_ = y.mean()
-        tmp = pd.concat([X, y], axis=1)
-        self.cols = X.columns
-        self.means = {}
-        for col in self.cols:
-            self.means[col] = tmp.groupby(col)[tmp.columns[-1]].mean()
-        return self
-
-    def transform(self, X):
-        print("transform target encoder")
-        X_out = X.copy()
-        for col in self.cols:
-            X_out[col] = X_out[col].map(self.means[col]).fillna(self.global_mean_)
-        return X_out
-
-    def fit_transform(self, X, y=None):
-        self.fit(X, y)
-        return self.transform(X)
-
-    def get_feature_names_out(self):
-        return self.cols
-
-
-def pre_process_1(X, y):
-    date = X["Date of Transfer"].skb.apply_func(pd.to_datetime)
-    X = X.assign(
-        year=date.dt.year, 
-        month=date.dt.month, 
-        day=date.dt.day, 
-        dayofweek=date.dt.dayofweek, 
-        hour=date.dt.hour)
-    X = X.assign(
-        month_sin=(date.dt.month * (2 * np.pi / 12)).apply(np.sin),
-        month_cos=(date.dt.month * (2 * np.pi / 12)).apply(np.cos),
-        day_sin=(date.dt.day * (2 * np.pi / 30)).apply(np.sin),
-        day_cos=(date.dt.day * (2 * np.pi / 30)).apply(np.cos),
-        dayofweek_sin=(date.dt.dayofweek * (2 * np.pi / 7)).apply(np.sin),
-        dayofweek_cos=(date.dt.dayofweek * (2 * np.pi / 7)).apply(np.cos),
-        hour_sin=(date.dt.hour * (2 * np.pi / 24)).apply(np.sin),
-        hour_cos=(date.dt.hour * (2 * np.pi / 24)).apply(np.cos),
-    )
-    X = X.drop([
-        "Date of Transfer", 
-        'Duration', 
-        'Transaction unique identifier', 
-        'PPDCategory Type', 
-        'Record Status - monthly file only'], axis=1)
-
-    cat_selector = skrub.selectors.filter(lambda col: col.dtype == "object")
-    X_cat = X.skb.select(cat_selector)
-    X_cat_enc = X_cat.skb.apply(skrub.StringEncoder())
-    num_selector = skrub.selectors.filter(lambda col: col.dtype != "object")
-
-    X_te = X[["District", "County", "Town"]].skb.apply(TargetEncoder(), y=y)
-    X_te = X_te.rename(columns={"District": "district_te", "County": "county_te", "Town": "town_te"})
-    X_num = X.skb.select(num_selector)
-    X_num = X_num.skb.concat([X_te], axis=1)
-
-    X_num_scaled = X_num.skb.apply(StandardScaler())
-    X_vec = X_num_scaled.skb.concat([X_cat_enc], axis=1)
-    return X_vec
-
-def pre_process_2(X):
-    X_enc = X.skb.apply(skrub.TableVectorizer(high_cardinality=StringEncoder(), low_cardinality=OneHotEncoder()))
-    # Scaling is necessary for ElasticNet and Ridge (converge quick and fast)
-    X_vec = X_enc.skb.apply(StandardScaler())
-    return X_vec
-
-X_1 = pre_process_1(X,y)
-X_2 = pre_process_2(X)
-X_enc = skrub.choose_from({
-    "data engineering 1": X_1,
-    "data engineering 2": X_2
-    }, name="X_enc").as_data_op()
-
-X_enc = X_enc.skb.apply_func(lambda x, m: (x, print(m))[0], skrub.eval_mode())
-
-models = {
-    "Ridge": Ridge(random_state=42),
-    "XGBoost": XGBRegressor(random_state=42),
-    "LightGBM": LGBMRegressor(random_state=42),
-    "ElasticNet": ElasticNet(random_state=42),
-}
-preds = {k: X_enc.skb.apply(model, y=y) for k,model in models.items()}
-preds = skrub.choose_from(preds, name="preds").as_data_op()
-
-skrub.set_config(rust_backend=True, debug_timing=False, scheduler=True, stats=True)
-cv = KFold(n_splits=3, shuffle=True, random_state=42)
-scorer = make_scorer(r2_score)
-t0 = perf_counter()
-#pr.enable()
-#with parallel_backend('threading'):
-search = preds.skb.make_grid_search(cv=cv, scoring=scorer, n_jobs=1, fitted=True, refit=True)
-#pr.disable()
-t1 = perf_counter()
-print(f"Time taken: {t1 - t0} seconds")
-print(search.results_)
-#stats = pstats.Stats(pr).sort_stats("tottime")
-#stats.print_stats(60)
diff --git a/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp_polars.py b/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp_polars.py
deleted file mode 100644
index 15363571..00000000
--- a/benchmarks/logical_optimizer/end-to-end/uk-house-price/two_level_gridsearch_exp_polars.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from sklearn.metrics import make_scorer, mean_squared_error, r2_score
-from sklearn.model_selection import KFold, ShuffleSplit
-import polars as pl
-from xgboost import XGBRegressor
-from lightgbm import LGBMRegressor
-from sklearn.linear_model import ElasticNet,  Ridge
-
-from time import perf_counter
-import numpy as np
-from sklearn.preprocessing import StandardScaler
-import stratum as skrub
-test=True
-
-import logging
-
-logging.basicConfig(level=logging.INFO)
-
-file_path = "input/price_paid_records_small.csv" if test else "input/price_paid_records.csv"
-df = skrub.as_data_op(file_path).skb.apply_func(pl.read_csv).skb.subsample(n=1000)
-df = df.rename({"Town/City": "Town"})
-y = df["Price"].skb.mark_as_y()
-X = df.drop("Price").skb.mark_as_X()
-
-from sklearn.base import BaseEstimator, TransformerMixin
-class TargetEncoder(BaseEstimator, TransformerMixin):
-    def fit(self, X, y=None):
-        print("fit target encoder")
-        self.global_mean_ = y.mean()
-        y_name = y.name if isinstance(y, pl.Series) and y.name else 'target'
-        # Handle both Polars Series and numpy arrays
-        if isinstance(y, pl.Series):
-            tmp = X.with_columns(y.alias(y_name))
-        else:
-            tmp = X.with_columns(pl.Series(y_name, y))
-        self.cols = X.columns
-        self.means = {}
-        for col in self.cols:
-            # Store as DataFrame with column name and mean for efficient join
-            self.means[col] = tmp.group_by(col).agg(pl.col(y_name).mean().alias(f"{col}_mean"))
-        return self
-
-    def transform(self, X):
-        print("transform target encoder")
-        X_out = X.clone()
-        for col in self.cols:
-            # Use join instead of map for better performance
-            mean_col_name = f"{col}_mean"
-            X_out = X_out.join(
-                self.means[col],
-                on=col,
-                how="left"
-            ).with_columns(
-                pl.col(mean_col_name).fill_null(self.global_mean_).alias(col)
-            ).drop(mean_col_name)
-        return X_out
-
-    def fit_transform(self, X, y=None):
-        self.fit(X, y)
-        return self.transform(X)
-
-    def get_feature_names_out(self):
-        return self.cols
-
-
-def pre_process_1(X, y):
-    date = X["Date of Transfer"].str.to_datetime()
-    X = X.with_columns(
-        year=date.dt.year(), 
-        month=date.dt.month(), 
-        day=date.dt.day(), 
-        dayofweek=date.dt.weekday(), 
-        hour=date.dt.hour())
-    X = X.with_columns(
-        month_sin=(date.dt.month() * (2 * np.pi / 12)).sin(),
-        month_cos=(date.dt.month() * (2 * np.pi / 12)).cos(),
-        day_sin=(date.dt.day() * (2 * np.pi / 30)).sin(),
-        day_cos=(date.dt.day()   * (2 * np.pi / 30)).cos(),
-        dayofweek_sin=(date.dt.weekday() * (2 * np.pi / 7)).sin(),
-        dayofweek_cos=(date.dt.weekday() * (2 * np.pi / 7)).cos(),
-        hour_sin=(date.dt.hour() * (2 * np.pi / 24)).sin(),
-        hour_cos=(date.dt.hour() * (2 * np.pi / 24)).cos(),
-    )
-    X = X.drop([
-        "Date of Transfer", 
-        'Duration', 
-        'Transaction unique identifier', 
-        'PPDCategory Type', 
-        'Record Status - monthly file only'])
-
-    cat_selector = skrub.selectors.filter(lambda col: col.dtype == pl.String)
-    X_cat = X.skb.select(cat_selector)
-    X_cat_enc = X_cat.skb.apply(skrub.StringEncoder())
-    num_selector = skrub.selectors.filter(lambda col: col.dtype != pl.String)
-
-    X_te = X[["District", "County", "Town"]].skb.apply(TargetEncoder(), y=y)
-    X_te = X_te.rename({"District": "district_te", "County": "county_te", "Town": "town_te"})
-    X_num = X.skb.select(num_selector)
-    X_num = X_num.skb.concat([X_te], axis=1)
-
-    X_num_scaled = X_num.skb.apply(StandardScaler())
-    X_vec = X_num_scaled.skb.concat([X_cat_enc], axis=1)
-    return X_vec
-
-def pre_process_2(X):
-    X_enc = X.skb.apply(skrub.TableVectorizer())
-    return X_enc
-X_1 = pre_process_1(X,y)
-print(X_1.skb.preview())
-X_2 = pre_process_2(X)
-X_enc = skrub.choose_from({
-    "1": X_1, 
-    "2": X_2
-    }, name="feat_eng").as_data_op()
-
-models = {
-    "Ridge": Ridge(random_state=42),
-    "XGBoost": XGBRegressor(random_state=42),
-    "LightGBM": LGBMRegressor(random_state=42),
-    "ElasticNet": ElasticNet(random_state=42),
-}
-preds = {k: X_enc.skb.apply(model, y=y) for k,model in models.items()}
-preds = skrub.choose_from(preds, name="models").as_data_op()
-preds = preds.skb.apply_func(lambda a, m: (a, print(m))[0], skrub.eval_mode())
-
-# play with cvs
-cv = 1
-cv = ShuffleSplit(n_splits=1,test_size=0.2,random_state=42) if cv == 1 else KFold(n_splits=cv, shuffle=True, random_state=42)
-scorer = make_scorer(mean_squared_error)
-t0 = perf_counter()
-with skrub.config(scheduler=True, stats=True):
-    search_stratum = preds.skb.make_grid_search(cv=cv, n_jobs=1, fitted=True, scoring=scorer)
-t1 = perf_counter()
-print("="*80)
-print(f"Stratum gridsearch scheduler time: {t1 - t0} seconds")
-print("="*80)
-search = preds.skb.make_grid_search(cv=cv, n_jobs=-1, fitted=True, scoring=scorer, refit=False)
-t2 = perf_counter()
-print("="*80)
-print(f"Skrub default gridsearch time: {t2 - t1} seconds")
-print("="*80)
-print("Results:")
-print(search.results_)
-print(search_stratum.results_)
-print("="*80)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 79da8e4b..a0030c02 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,10 +25,13 @@ classifiers = [
 requires-python = ">=3.11"
 dependencies = [
     "scikit-learn==1.8",
-    "skrub>=0.3",
+    "skrub==0.6.2",
+    "pandas==2.3.3",
     "polars",
     "graphviz",
     "pyarrow>=22.0.0",
+    "joblib",
+    "psutil",
 ]
 
 [project.optional-dependencies]
diff --git a/stratum/_api.py b/stratum/_api.py
index 687471af..a5fd7413 100644
--- a/stratum/_api.py
+++ b/stratum/_api.py
@@ -3,18 +3,35 @@
 
 from stratum._config import FLAGS
 from stratum.logical_optimizer._optimize import optimize
-from stratum.runtime._scheduler import Scheduler
+from stratum.runtime._caching import Cache
+from stratum.runtime._physical_planning import physical_planning
+from stratum.runtime._scheduler import ParallelScheduler, SequentialScheduler
+from time import perf_counter
 
-
-def grid_search(dag: DataOp, cv=None, scoring=None, return_predictions=False):
+def grid_search(dag: DataOp, cv=None, scoring=None, return_predictions=False, env=None):
     """Perform grid search with cross-validation on a DataOp DAG."""
-
+    t0 = perf_counter()
     show_stats = FLAGS.stats is not None
-    ops_ordered = optimize(dag)
-    sched = Scheduler(ops_ordered, show_stats)
+    env_extra = env if env else {}
+    env = dag.skb.get_data()
+    for k, v in env_extra.items():
+        env[k] = v
+    cache = None
+    if FLAGS.caching:
+        cache = Cache()
+    dag = optimize(dag)
+    if FLAGS.scheduler_parallelism is not None:
+        dag = physical_planning(dag)
+        sched = ParallelScheduler(dag, {}, show_stats, backend=FLAGS.scheduler_parallelism, cache=cache, env=env)
+    else:
+        sched = SequentialScheduler(dag, show_stats, cache=cache, env=env, t0=t0)
 
     preds = sched.grid_search(cv, scoring, return_predictions)
 
+    if FLAGS.caching:
+        # persist cache to disk
+        cache.persist()
+
     # Heavy hitters
     if show_stats:
         table = pd.DataFrame(sched.timings, columns=["Op", "time"])
@@ -22,13 +39,30 @@ def grid_search(dag: DataOp, cv=None, scoring=None, return_predictions=False):
         table.columns = ["Time", "Count"]
         table = table.reset_index().sort_values(by="Time", ascending=False)
         print("\n" + "=" * 80)
-        print(f"Heavy hitters (sorted by time spent in DataOp evaluation):")
+        print(f"Heavy hitters (sorted by time spent in DataOp evaluation):\n")
         print(table.head(FLAGS.stats).to_string(index=False))
+        table.head(FLAGS.stats).to_csv("heavy_hitters.csv", index=False)
         print("=" * 80 + "\n")
+        if FLAGS.caching and cache is not None:
+            print("\n" + "=" * 80)
+            print("Cache timing statistics:\n")
+            cache_stats = []
+            for op_name, duration in cache.timings:
+                cache_stats.append({"Operation": op_name, "Time (s)": f"{duration:.4f}", "Count": "1"})
+            if cache.hit_count > 0:
+                cache_stats.append({"Operation": "cache_hits", "Time (s)": f"{cache.hit_time:.4f}", "Count": f"{cache.hit_count}"})
+            if cache.miss_count > 0:
+                cache_stats.append({"Operation": "cache_misses", "Time (s)": "-", "Count": f"{cache.miss_count}"})
+            if cache.set_count > 0:
+                cache_stats.append({"Operation": "cache_sets", "Time (s)": f"{cache.set_time:.4f}", "Count": f"{cache.set_count}"})
+            if cache_stats:
+                cache_table = pd.DataFrame(cache_stats)
+                print(cache_table.to_string(index=False))
+            print("=" * 80 + "\n")
+            
     return (sched,preds) if return_predictions else sched
 
-
 def evaluate(dag: DataOp, seed: int = 42, test_size = 0.2, cse: bool = False):
     """Evaluate a DataOp DAG with train/test split."""
     ops_ordered = optimize(dag)
-    return Scheduler(ops_ordered).evaluate(seed, test_size)
\ No newline at end of file
+    return SequentialScheduler(ops_ordered).evaluate(seed, test_size)
\ No newline at end of file
diff --git a/stratum/_config.py b/stratum/_config.py
index 8468be2e..2389d6b9 100644
--- a/stratum/_config.py
+++ b/stratum/_config.py
@@ -5,6 +5,9 @@
 import logging
 logger = logging.getLogger(__name__)
 
+# Sentinel to detect if scheduler_parallelism was explicitly provided
+_UNSET = object()
+
 def _env_bool(name, default=False):
     val = os.getenv(name)
     if val is None:
@@ -20,6 +23,15 @@ def _env_int(name, default=0):
     v = os.getenv(name)
     return int(v) if v is not None else int(default)
 
+def _env_str(name, default=None):
+    v = os.getenv(name)
+    if v is None:
+        return default
+    s = str(v).strip().lower()
+    if s in ("", "none", "null"):
+        return None
+    return s
+
 @dataclass
 class _Flags:
     rust_backend: bool = _env_bool("SKRUB_RUST", False)
@@ -28,9 +40,12 @@ class _Flags:
     allow_patch: bool = _env_bool("SKRUB_RUST_ALLOW_PATCH", True)
     scheduler: bool =  False
     stats: int | None = None # TODO if we want to use that flag on other runtimes we need to set envirenment variable as well
-    open_graph: bool = True,
+    open_graph: bool = False,
+    cse: bool = True,
     DEBUG: bool = False
+    scheduler_parallelism: str | None = _env_str("STRATUM_SCHEDULER_PARALLELISM", None)
     force_polars: bool = _env_bool("STRATUM_FORCE_POLARS", False)
+    caching: bool = _env_bool("STRATUM_CACHING", False)
 
 FLAGS = _Flags()
 
@@ -42,7 +57,10 @@ def set_config(rust_backend: bool | None = None,
            scheduler: bool | None = None,
            open_graph: bool | None = None,
            DEBUG: bool | None = None,
-           force_polars: bool | None = None) -> None:
+           force_polars: bool | None = None,
+           scheduler_parallelism: str | None = _UNSET,
+           caching: bool | None = None,
+           cse: bool = True) -> None:
     """Runtime toggles (synced env for Rust to read).
 
     Parameter:
@@ -72,6 +90,13 @@ def set_config(rust_backend: bool | None = None,
 
         force_polars: bool, default false
             Force use of Polars instead of Pandas for dataframe operations.
+
+        scheduler_parallelism: str | None, default None
+            Scheduler parallelism mode. None uses SequentialScheduler, "threading" or "process" 
+            uses ParallelScheduler with the specified backend.
+
+        caching: bool, default false
+            Enable/disable caching for DataOp operations.
     """
     if rust_backend is not None:
         FLAGS.rust_backend = bool(rust_backend)
@@ -102,6 +127,22 @@ def set_config(rust_backend: bool | None = None,
     if force_polars is not None:
         FLAGS.force_polars = bool(force_polars)
         os.environ["STRATUM_FORCE_POLARS"] = "1" if FLAGS.force_polars else "0"
+    if scheduler_parallelism is not _UNSET:
+        if scheduler_parallelism is not None:
+            if scheduler_parallelism not in ("threading", "process", "auto"):
+                raise ValueError(f"scheduler_parallelism must be None, 'threading', 'process', or 'auto', got {scheduler_parallelism}")
+            FLAGS.scheduler_parallelism = scheduler_parallelism
+            os.environ["STRATUM_SCHEDULER_PARALLELISM"] = scheduler_parallelism
+        else:
+            # Explicitly set to None
+            FLAGS.scheduler_parallelism = None
+            if "STRATUM_SCHEDULER_PARALLELISM" in os.environ:
+                del os.environ["STRATUM_SCHEDULER_PARALLELISM"]
+    if caching is not None:
+        FLAGS.caching = bool(caching)
+        os.environ["STRATUM_CACHING"] = "1" if FLAGS.caching else "0"
+    if cse is not None:
+        FLAGS.cse = bool(cse)
 
 
 def get_config() -> dict:
@@ -116,6 +157,9 @@ def get_config() -> dict:
         "open_graph": FLAGS.open_graph,
         "DEBUG" : FLAGS.DEBUG,
         "force_polars": FLAGS.force_polars,
+        "scheduler_parallelism": FLAGS.scheduler_parallelism,
+        "caching": FLAGS.caching,
+        "cse": FLAGS.cse,
     }
 
 @contextmanager
diff --git a/stratum/logical_optimizer/_dataframe_ops.py b/stratum/logical_optimizer/_dataframe_ops.py
index 763590e1..693a3e4e 100644
--- a/stratum/logical_optimizer/_dataframe_ops.py
+++ b/stratum/logical_optimizer/_dataframe_ops.py
@@ -1,18 +1,21 @@
-from stratum.logical_optimizer._ops import DATA_OP_PLACEHOLDER, BinOp, CallOp, GetAttrOp, GetItemOp, MethodCallOp, Op, ValueOp
+from stratum.logical_optimizer._ops import DATA_OP_PLACEHOLDER, BaseEstimatorOp, BinOp, CallOp, GetAttrOp, GetItemOp, MethodCallOp, Op, ValueOp, VariableOp
 from pandas import DataFrame
 import pandas as pd
 import polars as pl
 from stratum.logical_optimizer._op_utils import topological_iterator
 from stratum._config import FLAGS
-
-POLARS = FLAGS.force_polars
+from stratum.runtime._hash_utils import stable_hash
+from skrub._data_ops._data_ops import DataOp
+import logging
+from numpy import sin, cos
+logger = logging.getLogger(__name__)
 
 class DataSourceOp(Op):
     def __init__(self, data: DataFrame = None, file_path: str = None, _format: str = None,
-                 read_args: tuple | list = None, read_kwargs: dict = None, is_X=False, is_y=False, outputs: list[Op] = None):
+                 read_args: tuple | list = None, read_kwargs: dict = None, is_X=False, is_y=False, outputs: list[Op] = None, inputs: list[Op] = None):
         if outputs is None:
             outputs = []
-        super().__init__(name="Frame" if data is not None else f"read_{_format}", is_X=is_X, is_y=is_y, outputs=outputs, inputs=None)
+        super().__init__(name="Frame" if data is not None else f"read_{_format}", is_X=is_X, is_y=is_y, outputs=outputs, inputs=inputs)
         if read_kwargs is not None:
             self.check_kwargs(read_kwargs)
         self.data = data
@@ -22,18 +25,26 @@ def __init__(self, data: DataFrame = None, file_path: str = None, _format: str =
         self.read_kwargs = read_kwargs
         self.is_dataframe_op = True
 
+    def simple_hash(self):
+        if self.data is not None:
+            raise NotImplementedError("Hashing is not implemented for DataSourceOp with data yet")
+        else:
+            return stable_hash((self.file_path, self.format, self.read_args, self.read_kwargs))
+
     def process(self, mode: str, environment: dict):
+        logger.debug(f"Using Polars: {FLAGS.force_polars}")
         if self.data is not None:
-            if POLARS:
+            if FLAGS.force_polars:
                 self.intermediate = pl.DataFrame(self.data)
             else:
                 self.intermediate = self.data
 
         else:
-            if POLARS:
-                self.intermediate = pl.read_csv(self.file_path, *self.read_args, **self.read_kwargs)
+            file_path = self.inputs[0].intermediate if self.file_path is DATA_OP_PLACEHOLDER else self.file_path
+            if FLAGS.force_polars:
+                self.intermediate = pl.read_csv(file_path, *self.read_args, **self.read_kwargs)
             else:
-                self.intermediate = pd.read_csv(self.file_path, *self.read_args, **self.read_kwargs)
+                self.intermediate = pd.read_csv(file_path, *self.read_args, **self.read_kwargs)
 
     def clone(self):
         raise ValueError(f"We should not clone DataSourceOp objects.")
@@ -50,12 +61,15 @@ def __init__(self, func: str, args: tuple | list = None, kwargs: dict = None, in
         self.kwargs = kwargs
         self.is_dataframe_op = True
 
+    def simple_hash(self):
+        return stable_hash((self.func, self.args, self.kwargs))
+
     def process(self, mode: str, environment: dict):
         iter_ins = iter(self.inputs)
         _obj = next(iter_ins).intermediate
         _args = [next(iter_ins).intermediate if arg is DATA_OP_PLACEHOLDER else arg for arg in self.args]
         _kwargs = {k: next(iter_ins).intermediate if v is DATA_OP_PLACEHOLDER else v for k, v in self.kwargs.items()}
-        if POLARS:
+        if FLAGS.force_polars:
             if "columns" in _kwargs:
                 _args.append(_kwargs["columns"])
             self.intermediate = getattr(_obj, self.func)(*_args)
@@ -90,7 +104,7 @@ def _extract_args_and_kwargs(self):
     def process(self, mode: str, environment: dict):
         _obj, _args, _kwargs = self._extract_args_and_kwargs()
         if self.is_method:
-            if POLARS:
+            if FLAGS.force_polars:
                 raise ValueError(f"Unsupported method: {self.func}")
             else:
                 self.intermediate = getattr(_obj, self.func)(*_args, **_kwargs)
@@ -98,14 +112,18 @@ def process(self, mode: str, environment: dict):
             self.intermediate = self.func(_obj, *_args, **_kwargs)
 
 class DropOp(ProjectionOp):
+    fields = ["args", "kwargs", "columns"]
     def __init__(self, args: tuple | list = (), kwargs: dict = {},
         inputs: list[Op] = None, outputs: list[Op] = None, columns: list[str] = None):
         super().__init__(args=args, kwargs=kwargs, inputs=inputs, outputs=outputs, columns=columns)
 
+    def simple_hash(self):
+        return stable_hash((self.args, self.kwargs))
+
     def process(self, mode: str, environment: dict):
         _obj, _args, _kwargs = self._extract_args_and_kwargs()
 
-        if POLARS:
+        if FLAGS.force_polars:
             if "columns" in _kwargs:
                 _args.append(_kwargs["columns"])
             if "ignore_errors" in _kwargs:
@@ -131,16 +149,26 @@ def process(self, mode: str, environment: dict):
             else:
                 n_cols = len(self.columns)
 
-        if POLARS:
+        if FLAGS.force_polars:
             if isinstance(_obj, pl.Series):
                 n_cols = 1
             if n_cols == 1:
-                self.intermediate = _obj.map_elements(*_args, **_kwargs)
+                if _args[0] == sin:
+                    logger.debug("Rewrite UDF sin to polars sin")
+                    self.intermediate = _obj.sin()
+                elif _args[0] == cos:
+                    logger.debug("Rewrite UDF cos to polars cos")
+                    self.intermediate = _obj.cos()
+                else:
+                    self.intermediate = _obj.map_elements(*_args, **_kwargs)
             else:
                 self.intermediate = _obj.map_rows(*_args, **_kwargs)
         else:
             self.intermediate = _obj.apply(*_args, **_kwargs)
 
+    def simple_hash(self):
+        return stable_hash((self.args, self.kwargs, "apply_udf"))
+
 class AssignOp(ProjectionOp):
     def __init__(self, args: tuple | list = (), kwargs: dict = {},
         inputs: list[Op] = None, outputs: list[Op] = None, columns: list[str] = None):
@@ -148,26 +176,42 @@ def __init__(self, args: tuple | list = (), kwargs: dict = {},
 
     def process(self, mode: str, environment: dict):
         _obj, _args, _kwargs = self._extract_args_and_kwargs()
-        if POLARS:
-            self.intermediate = _obj.with_columns(*_args, **_kwargs)
+        if FLAGS.force_polars:
+            checked_kwargs = {}
+            for k, v in _kwargs.items():
+                if v is DATA_OP_PLACEHOLDER:
+                    raise NotImplementedError("Is not yet suppoerted, please report this issue")
+                elif isinstance(v, pd.Series) or isinstance(v, pd.DataFrame):
+                    logger.warning(f"Converting pandas object to polars object for column {k}")
+                    checked_kwargs[k] = pl.from_pandas(v)
+                else:
+                    checked_kwargs[k] = v
+            self.intermediate = _obj.with_columns(*_args, **checked_kwargs)
         else:
             self.intermediate = _obj.assign(*_args, **_kwargs)
 
+    def simple_hash(self):
+        return stable_hash((self.args, self.kwargs, "assign"))
+
 class DatetimeConversionOp(ProjectionOp):
     def __init__(self, args: tuple | list = (), kwargs: dict = {},
         inputs: list[Op] = None, outputs: list[Op] = None, columns: list[str] = None):
-        super().__init__(args=args, kwargs=kwargs, inputs=inputs, outputs=outputs, columns=columns)
+        super().__init__(args=args, inputs=inputs, outputs=outputs, columns=columns)
+        self.strict = kwargs.get("errors", "raise") == "raise"
 
     def process(self, mode: str, environment: dict):
-        if POLARS:
-            self.intermediate = self.inputs[0].intermediate.str.to_datetime(*self.args, **self.kwargs)
+        if FLAGS.force_polars:
+            self.intermediate = self.inputs[0].intermediate.str.to_datetime(*self.args, strict=self.strict)
         else:
-            self.intermediate = pd.to_datetime(self.inputs[0].intermediate, *self.args, **self.kwargs)
+            self.intermediate = pd.to_datetime(self.inputs[0].intermediate, *self.args, errors="raise" if self.strict else "coerce")
+
+    def simple_hash(self):
+        return stable_hash((self.args, self.kwargs, "datetime_conversion"))
 
 class GetAttrProjectionOp(Op):
     fields = ["attr_name"]
 
-    POLARS_ATTR_NAME_MAP = {"dayofweek": "weekday"}
+    POLARS_ATTR_NAME_MAP = {"dayofweek": "weekday","dayofyear": "ordinal_day"}
 
     def __init__(self, attr_name: list[str] | str = None, inputs: list[Op] = None, outputs: list[Op] = None):
         if attr_name is None:
@@ -188,17 +232,24 @@ def __str__(self):
 
     def process(self, mode: str, environment: dict):
         self.intermediate = self.inputs[0].intermediate
-        if POLARS:
+        tmp = self.intermediate
+        if FLAGS.force_polars:
             for attr in self.attr_name:
                 attr = self.POLARS_ATTR_NAME_MAP.get(attr, attr)
+
+                # TODO find better way to handle this
+                if attr == "is_month_end":
+                    self.intermediate = (self.intermediate.dt.month_end() == self.intermediate)
+                    return
+
                 # polars implements dt.day as a method, not an attribute
                 # use getattr to handle both attributes and methods
-                self.intermediate = getattr(self.intermediate, attr)
-            self.intermediate = self.intermediate()
+                tmp = getattr(tmp, attr)
+            self.intermediate = tmp()
         else:
             for attr in self.attr_name:
-                self.intermediate = self.intermediate.__getattribute__(attr)
-
+                tmp = getattr(tmp, attr)
+            self.intermediate = tmp
 class GroupedDataframeOp(Op):
     def __init__(self, ops: list[Op]):
         super().__init__(name="GROUPED_DATAFRAME", is_X=False, is_y=False)
@@ -210,6 +261,30 @@ def process(self, mode: str, environment: dict):
             op.process(mode, environment)
         self.intermediate = self.ops[-1].intermediate
 
+class ConcatOp(Op):
+    fields = ["first", "others", "axis"] # Add more if needed
+
+    axis_map = {
+        0: "diagonal_relaxed",
+        1: "horizontal",
+    }
+    def __init__(self, first: Op, others: list[Op], axis: int):
+        super().__init__(name="CONCAT", is_X=False, is_y=False)
+        self.first = DATA_OP_PLACEHOLDER if isinstance(first, DataOp) else first
+        self.others = [DATA_OP_PLACEHOLDER if isinstance(other, DataOp) else other for other in others]
+        self.axis = DATA_OP_PLACEHOLDER if isinstance(axis, DataOp) else axis
+        self.is_dataframe_op = True
+
+    def process(self, mode: str, environment: dict):
+        input_iter = iter(self.inputs)
+        first = next(input_iter).intermediate if self.first is DATA_OP_PLACEHOLDER else self.first
+        others = [next(input_iter).intermediate if other is DATA_OP_PLACEHOLDER else other for other in self.others]
+        axis = next(input_iter).intermediate if self.axis is DATA_OP_PLACEHOLDER else self.axis
+        if FLAGS.force_polars:
+            self.intermediate = pl.concat([first, *others], how=self.axis_map[axis])
+        else:
+            self.intermediate = pd.concat([first, *others], axis=axis)
+
 
 def rewrite_fuse_get_item_ops(op: Op) -> Op:
     pass
@@ -249,6 +324,9 @@ def process(self, mode: str, environment: dict):
         else:
             raise ValueError(f"Unsupported dataframe type: {type(x)}")
 
+    def simple_hash(self):
+        return 1
+
 class SplitOutput(Op):
     def __init__(self, inputs: list[Op]=None, outputs: list[Op]=None, is_x = True, ):
         name = "X" if is_x else "y"
@@ -262,6 +340,9 @@ def process(self, mode: str, environment: dict):
         else:
             self.intermediate = self.inputs[0].intermediate[1]
 
+    def simple_hash(self):
+        return 2 if self.is_x else 3
+
 def add_splitting_op(sink: Op) -> Op:
     x_op = None
     y_op = None
@@ -330,9 +411,10 @@ def rewrite_dataframe_ops(sink: Op) -> Op:
                 op.is_dataframe_op = True
 
             # mark as dataframe op
-            elif isinstance(op, GetItemOp):
+            elif isinstance(op, GetItemOp) or isinstance(op, BaseEstimatorOp):
                 op.is_dataframe_op = True
 
+
         if new_op is not None:
             op.replace_input_of_outputs(new_op)
             if sink is op:
@@ -356,11 +438,33 @@ def make_datetime_conversion_op(new_op: DatetimeConversionOp, op: CallOp) -> Dat
 def make_read_op(new_op: DataSourceOp, op: CallOp) -> DataSourceOp:
     input_iter = iter(op.inputs)
     # assume all inputs are ValueOps
-    assert all(isinstance(arg, ValueOp) for arg in op.inputs), "All inputs must be ValueOps"
-    args = [next(input_iter).value if arg is DATA_OP_PLACEHOLDER else arg for arg in op.args]
-    kwargs = {k: next(input_iter).value if v is DATA_OP_PLACEHOLDER else v for k, v in op.kwargs.items()}
-    new_op = DataSourceOp(file_path=args[0], _format="csv", read_args=args[1:], read_kwargs=kwargs)
-    new_op.outputs = op.outputs
+    assert all(isinstance(arg, ValueOp) or isinstance(arg, VariableOp) for arg in op.inputs), "All inputs must be ValueOps or VariableOps"
+    inputs = []
+    args = []
+    for arg in op.args:
+        if arg is DATA_OP_PLACEHOLDER:
+            actual_input_op = next(input_iter)
+            if isinstance(actual_input_op, VariableOp):
+                args.append(DATA_OP_PLACEHOLDER)
+                inputs.append(actual_input_op)
+            else:
+                args.append(actual_input_op.value)
+        else:
+            args.append(arg)
+    kwargs = {}
+    for k, v in op.kwargs.items():
+        if v is DATA_OP_PLACEHOLDER:
+            actual_input_op = next(input_iter)
+            if isinstance(actual_input_op, VariableOp):
+                kwargs[k] = DATA_OP_PLACEHOLDER
+                inputs.append(actual_input_op)
+            else:
+                kwargs[k] = actual_input_op.value
+        else:
+            kwargs[k] = v
+    new_op = DataSourceOp(file_path=args[0], _format="csv", read_args=args[1:], read_kwargs=kwargs, inputs=inputs, outputs=op.outputs)
+    for in_ in inputs:
+        in_.replace_output(op, new_op)
     return new_op
 
 
diff --git a/stratum/logical_optimizer/_op_comparison.py b/stratum/logical_optimizer/_op_comparison.py
index 0ad18b24..e63d3269 100644
--- a/stratum/logical_optimizer/_op_comparison.py
+++ b/stratum/logical_optimizer/_op_comparison.py
@@ -1,9 +1,11 @@
 from typing import Iterable
 from sklearn.base import BaseEstimator
+from skrub import SelectCols
 from skrub._data_ops import DataOp
 from skrub._data_ops._choosing import Choice
-from skrub._data_ops._data_ops import Call, GetItem, CallMethod, GetAttr, Apply, Value, BinOp
-from skrub.selectors._base import All
+from skrub._data_ops._data_ops import Call, GetItem, CallMethod, GetAttr, Apply, Value, BinOp, Concat
+from skrub.selectors._base import All, Filter, Inv
+from pandas import isna
 
 def equals_data_op(op1: DataOp, op2: DataOp):
     """
@@ -54,6 +56,10 @@ def equals_skrub_impl(impl1, impl2):
                 # TODO also match All with set(cols) if cols contains all columns of the input frame
                 if set(cols1) == set(cols2):
                     return estimator_equality_check(est1, est2)
+        elif isinstance(impl1, Concat):
+            # op1 = col1.skb.concat(col2, axis=1)
+            # op2 = col1.skb.concat(col2, axis=1)
+            return _stable_id(impl1.first) == _stable_id(impl2.first) and _stable_id(impl1.others) == _stable_id(impl2.others)
         elif isinstance(impl1, BinOp):
             # op1 = col1 / col2
             # op2 = col1 / col2
@@ -72,7 +78,7 @@ def estimator_equality_check(est1: BaseEstimator, est2: BaseEstimator) -> bool:
     params2 = est2.get_params()
     for key, value in params1.items():
         value2 = params2.get(key)
-        if value2 != value and (
+        if value2 != value and not isna(value) and not isna(value2) and (
                 type(value) != type(value2)
                 or not isinstance(value, BaseEstimator)
                 or not estimator_equality_check(value, value2)):
@@ -129,7 +135,10 @@ def hash_skrub_impl(impl) -> int:
             return hash((t, id(impl.X), col_ids, est_type, est_params))
     elif isinstance(impl, BinOp):
         return hash((t, impl.op, _stable_id(impl.left), _stable_id(impl.right)))
-
+    elif isinstance(impl, Concat):
+        # op1 = col1.skb.concat(col2, axis=1)
+        # op2 = col1.skb.concat(col2, axis=1)
+        return hash((_stable_id(impl.first), _stable_id(impl.others)))
     else:
         # Fallback for unknown DataOp types
         return hash((t, id(impl)))
@@ -159,6 +168,10 @@ def _stable_id(obj):
         return frozenset(_stable_id(x) for x in obj)
     elif isinstance(obj, dict):
         return frozenset((k, _stable_id(v)) for k, v in obj.items())
+    elif isinstance(obj, Filter):
+        return id(obj.predicate)
+    elif isinstance(obj, Inv):
+        return _stable_id(obj.complement)*-1
     elif hasattr(obj, "__hash__") and not isinstance(obj, DataOp):
         # hashable primitive or object
         return hash(obj)
@@ -234,6 +247,14 @@ def update_data_op(op: DataOp, old_input: DataOp, new_input: DataOp):
         elif impl.right is old_input:
             impl.right = new_input
             return
+    elif isinstance(impl, Concat):
+        if impl.first is old_input:
+            impl.first = new_input
+            return
+        for i, other in enumerate(impl.others):
+            if other is old_input:
+                impl.others[i] = new_input
+                return
     raise Exception(f"Could not find old DataOp {old_input} during input update for {op}")
 
 
diff --git a/stratum/logical_optimizer/_op_utils.py b/stratum/logical_optimizer/_op_utils.py
index 7e492fb7..3f6ee4ea 100644
--- a/stratum/logical_optimizer/_op_utils.py
+++ b/stratum/logical_optimizer/_op_utils.py
@@ -2,10 +2,12 @@
 from collections import deque
 from typing import Iterator
 from graphviz import Digraph
-from stratum.logical_optimizer._ops import Op, ChoiceOp
+from stratum.logical_optimizer._ops import DATA_OP_PLACEHOLDER, Op, ChoiceOp
 from stratum._config import get_config
 import os
 
+bfs = False
+
 
 def replace_op_in_outputs(op: Op, replacement: Op):
     """Replace op in all its outputs with a replacement op."""
@@ -110,18 +112,42 @@ def topological_iterator(sink: Op) -> Iterator[Op]:
         else:
             for in_op in op.inputs:
                 if in_op not in indegree:
-                    indegree[in_op] = 0 if not in_op.inputs else len(in_op.inputs)
+                    if in_op is DATA_OP_PLACEHOLDER:
+                        raise RuntimeError(f"Encountered DATA_OP_PLACEHOLDER as input of op {op}, which should not happen.")
+                    curr_indegree = len(in_op.inputs) + (0 if in_op.additional_inputs is None else len(in_op.additional_inputs))
+                    indegree[in_op] = curr_indegree
                     queue1.append(in_op)
 
     # now we can do topological traversal
-    while queue2:
-        op = queue2.popleft()
+    if bfs:
+        return topological_iterator_bfs(sink, queue2, indegree)
+    else:
+        return topological_iterator_dfs(sink, queue2, indegree)
+
+def topological_iterator_bfs(sink: Op, queue, indegree) -> Iterator[Op]:
+    while queue:
+        op = queue.popleft()
         yield op
-        for out_op in op.outputs:
+        op_outputs = op.outputs + (op.additional_outputs if op.additional_outputs is not None else [])
+        for out_op in op_outputs:
+            if out_op not in indegree:
+                raise RuntimeError(f"Encountered op {out_op} which should not exist in the DAG. Probably due to a buggy rewrite, which did not updated the its inputs / outputs correctly.")
             indegree[out_op] -= 1
             if indegree[out_op] == 0:
-                queue2.append(out_op)
+                queue.append(out_op)
 
+def topological_iterator_dfs(sink: Op, queue, indegree) -> Iterator[Op]:
+    stack = list(queue)
+    while stack:
+        op = stack.pop()
+        yield op
+        op_outputs = op.outputs + (op.additional_outputs if op.additional_outputs is not None else [])
+        for out_op in op_outputs:
+            if out_op not in indegree:
+                raise RuntimeError(f"Encountered op {out_op} which should not exist in the DAG. Probably due to a buggy rewrite, which did not updated the its inputs / outputs correctly.")
+            indegree[out_op] -= 1
+            if indegree[out_op] == 0:
+                stack.append(out_op)
 
 def show_graph(sink: Op, filename: str = 'plan'):  
     """Show the runtime plan of the DataOp DAG."""
@@ -134,6 +160,9 @@ def show_graph(sink: Op, filename: str = 'plan'):
             dot.node(str(id(current_op)), name)
             for outputs in current_op.outputs:
                 dot.edge(str(id(current_op)), str(id(outputs)))
+            if current_op.additional_outputs is not None:
+                for additional_output in current_op.additional_outputs:
+                    dot.edge(str(id(current_op)), str(id(additional_output)), color='blue')
         filename = "graphs/" + filename
         # make sure folder exists
         os.makedirs(os.path.dirname(filename), exist_ok=True)
diff --git a/stratum/logical_optimizer/_ops.py b/stratum/logical_optimizer/_ops.py
index 8c37968b..2531192f 100644
--- a/stratum/logical_optimizer/_ops.py
+++ b/stratum/logical_optimizer/_ops.py
@@ -1,13 +1,18 @@
 from __future__ import annotations
+import sys
 from types import SimpleNamespace
 from typing import Callable
 
+from joblib import parallel_config
 from sklearn import clone
 from sklearn.base import BaseEstimator
 from skrub._data_ops._choosing import Choice
-from skrub._data_ops._data_ops import DataOp, Apply, Value, CallMethod, Call, GetAttr, GetItem, BinOp as SkrubBinOp, _wrap_estimator
-from pandas import DataFrame
+from skrub._data_ops._data_ops import DataOp, Apply, Value, CallMethod, Call, GetAttr, GetItem, BinOp as SkrubBinOp, Concat, Var, _wrap_estimator
+from pandas import DataFrame, Series
 from polars import DataFrame as PlDataFrame, Series as PlSeries
+from stratum.runtime._hash_utils import stable_hash
+import logging
+logger = logging.getLogger(__name__)
 
 class PlaceHolder():
     def __init__(self, name: str):
@@ -27,17 +32,24 @@ def __init__(self, inputs=None,outputs=None, name=None, is_X=False, is_y=False):
         self.name = name
         self.outputs = outputs if outputs is not None else []
         self.inputs = inputs if inputs is not None else []
+        self.additional_inputs = None
+        self.additional_outputs = None
         self.intermediate = None
         self.is_X = is_X
         self.is_y = is_y
         self.is_dataframe_op = False
         self.is_split_op = False
         self.was_cloned = False
+        self.parallel_group = None
+        self.cached_hash = None
 
     def to_str_helper(self):
         class_name = self.__class__.__name__
         is_df = " [df]" if self.is_dataframe_op else ""
         name = f"({self.name})" if self.name and len(self.name) > 0 else ""
+        # truncate name if it is too long
+        if len(name) > 50:
+            name = name[:50] + "..."
         return class_name, name, is_df
 
     def __str__(self):
@@ -103,6 +115,28 @@ def check_kwargs(self, kwargs):
                 f" {type(kwargs).__name__!r} instead: {kwargs!r}"
             )
 
+    def simple_hash(self):
+        raise NotImplementedError(f"Simple_hash must be implemented in {self.__class__.__name__}")
+
+    def get_hash(self):
+        if self.cached_hash is not None:
+            return self.cached_hash
+        sub_dag_hash = [op.get_hash() for op in self.inputs]
+        sub_dag_hash.append(self.simple_hash())
+        self.cached_hash = stable_hash(sub_dag_hash)
+        return self.cached_hash
+
+
+    def get_intermediate_size(self):
+        if isinstance(self.intermediate, DataFrame):
+            return self.intermediate.memory_usage(deep=True).sum()
+        elif isinstance(self.intermediate, Series):
+            return self.intermediate.memory_usage(deep=True)
+        elif isinstance(self.intermediate, PlDataFrame) or isinstance(self.intermediate, PlSeries):
+            return self.intermediate.estimated_size()
+        else:
+            return sys.getsizeof(self.intermediate)
+
 def clone_value(value):
     if isinstance(value, dict):
         return {k:clone_value(v) for k,v in value.items()}
@@ -162,7 +196,22 @@ def process(self, mode: str, environment: dict):
             ns = self.replace_fields_with_values()
             self.intermediate = self.skrub_impl.compute(ns, mode, environment)
 
-class EstimatorOp(Op):
+class VariableOp(Op):
+    def __init__(self, name: str, value = None):
+        super().__init__(name=name)
+        self.name = name
+        if value is not None:
+            self.value = value
+        else:
+            self.value = "EMPTY_VARIABLE"
+
+    def clone(self):
+        return VariableOp(name=self.name)
+
+    def process(self, mode: str, environment: dict):
+        self.intermediate = environment[self.name]
+
+class BaseEstimatorOp(Op):
     fields = ["estimator", "y", "cols", "how", "allow_reject", "unsupervised", "kwargs"]
     
     def __init__(self, estimator: BaseEstimator, y=None, cols=None, how="no-wrap", allow_reject=False, unsupervised=False, kwargs=None):
@@ -171,18 +220,25 @@ def __init__(self, estimator: BaseEstimator, y=None, cols=None, how="no-wrap", a
             kwargs = {}
         self.check_kwargs(kwargs)
         self.estimator = estimator
+        place_holders = {k: v for k, v in self.estimator.get_params().items() if isinstance(v, DataOp)}
+        self.estimator.set_params(**place_holders)
+        self.original_estimator = clone(self.estimator)
         self.y = DATA_OP_PLACEHOLDER if isinstance(y, DataOp) else y
         self.cols = DATA_OP_PLACEHOLDER if isinstance(cols, DataOp) else cols
         self.how = how
         self.allow_reject = allow_reject
         self.unsupervised = unsupervised
         self.kwargs = remove_datops_from_args(kwargs) if kwargs is not None else kwargs
+        self.parallelism = 8
+
+    def simple_hash(self):
+        return stable_hash((self.estimator, self.y, self.cols, self.how, self.allow_reject, self.unsupervised, self.kwargs))
 
     def clone(self):
         params = self.estimator.get_params()
         estimator_new = clone(self.estimator)
         estimator_new.set_params(**params)
-        new_op = EstimatorOp(
+        new_op = self.__class__(
             estimator=estimator_new, 
             y=self.y, 
             cols=self.cols, 
@@ -193,35 +249,119 @@ def clone(self):
         )
         new_op.was_cloned = True
         return new_op
-    
-    def process(self, mode: str, environment: dict):
+
+    def extract_args_from_inputs(self, mode: str):
+        """
+        Extract all necessary data from an EstimatorOp to make it picklable for multiprocessing.
+        
+        Returns a tuple of picklable data that can be sent to worker processes.
+        """
         input_iter = iter(self.inputs)
         x = next(input_iter).intermediate
-        if isinstance(x, PlDataFrame):
-            x = x.to_pandas()
-        y = next(input_iter).intermediate if self.y == DATA_OP_PLACEHOLDER else self.y
-        if isinstance(y, PlSeries):
-            y = y.to_pandas()
+        assert x is not None, f"X is None for {self}"
+        y = None if mode == 'predict' else next(input_iter).intermediate if self.y == DATA_OP_PLACEHOLDER else self.y
+        estm = self.estimator if mode == "predict" else self.original_estimator
+        place_holders = {k: next(input_iter).intermediate for k, v in estm.get_params().items() if isinstance(v, DataOp)}
+        estm.set_params(**place_holders)
         cols = next(input_iter).intermediate if self.cols == DATA_OP_PLACEHOLDER else self.cols
+        return (
+            estm,
+            x,
+            y,
+            cols,
+            self.how,
+            self.allow_reject,
+            self.unsupervised,
+            self.kwargs,
+            mode,
+            self.parallelism
+        )
+    
+    def process(self, mode: str, environment: dict):
+        # we use a separate function to process the estimator to allow reuse for multiprocessing
+        task_data = self.extract_args_from_inputs(mode)
+        process_task = self.get_process_task()
+        self.intermediate, self.estimator = process_task(task_data)
+
+    def get_process_task(self):
+        raise NotImplementedError(f"get_process_task must be implemented in {self.__class__.__name__}")
+
+class EstimatorOp(BaseEstimatorOp):
+    def get_process_task(self):
+        return process_estimator_task
+
+class TransformerOp(BaseEstimatorOp):
+    def get_process_task(self):
+        return process_transformer_task
+
+class DummyConfigManager:
+    """A no-op context manager that does nothing."""
+    def __enter__(self):
+        return self
+    
+    def __exit__(self, *args):
+        return False
+
+def estimator_parallel_config(n_jobs: int = None):
+    if n_jobs is not None:
+        logger.debug(f"Using threading backend with {n_jobs} jobs")
+        return parallel_config(backend='threading', n_jobs=n_jobs)
+    else:
+        return DummyConfigManager()
+
+def estm_supports_polars(estimator):
+    is_sklearn = estimator.__class__.__module__.startswith("sklearn.") or estimator.__class__.__module__.startswith("skrub.")
+    is_stratum = estimator.__class__.__module__.startswith("stratum.") and estimator.__class__.__name__.startswith("Rusty")
+    # other_frameworks = estimator.__class__.__module__.startswith("xgboost.")
+    return is_sklearn or is_stratum #or other_frameworks
+
+def check_estm_inputs(estimator, mode, x, y):
+    input_is_polars = type(x) == PlDataFrame
+    converted = False
+    if estimator.__class__.__module__.startswith("skrub."):
+        if estimator.__class__.__name__.startswith("ApplyTo"):
+            estimator = estimator.transformer
+    if input_is_polars and not estm_supports_polars(estimator):
+        converted = True
+        logger.debug(f"Estimator {estimator.__class__.__name__} does not support Polars DataFrame. Converting to Pandas DataFrame.")
+        x = x.to_pandas()
+        if y is not None and mode == "fit_transform":
+            y = y.to_pandas()
+    return converted, x, y
+
+def process_estimator_task(task_data):
+    """ Process a predictor (EstimatorOp) task in a worker process. """
+    (estimator, x, y, cols, how, allow_reject, unsupervised, kwargs, mode, parallelism) = task_data
+    _, x, y = check_estm_inputs(estimator, mode, x, y)
+    if mode == "fit_transform":
+        estimator = _wrap_estimator(estimator, cols, how=how, allow_reject=allow_reject, X=x)
+        y_arg = () if unsupervised else (y,)
+        estimator.fit(x, *y_arg, **kwargs)
+        result = estimator.predict(x, **kwargs)
+        # Return both result and fitted estimator (in case of multi-processing)
+        return result, estimator
+    elif mode == "predict":
+        result = estimator.predict(x, **kwargs)
+        return result, estimator
+    else:
+        raise ValueError(f"Mode {mode} not supported for EstimatorOp.")
+
+def process_transformer_task(task_data):
+    """ Process a transformer (TransformerOp) task in a worker process. """
+    (estimator, x, y, cols, how, allow_reject, unsupervised, kwargs, mode, parallelism) = task_data
+    converted, x, y = check_estm_inputs(estimator, mode, x, y)
+    with estimator_parallel_config(parallelism):
         if mode == "fit_transform":
-            self.estimator = _wrap_estimator(self.estimator, cols, how=self.how, allow_reject=self.allow_reject, X=x)
-            y_arg = () if self.unsupervised else (y,)
-            if not hasattr(self.estimator, mode):
-                # Predictors
-                self.estimator.fit(x, *y_arg, **self.kwargs)
-                self.intermediate = self.estimator.predict(x, **self.kwargs)
-            else:
-                # Transformers
-                self.intermediate = self.estimator.fit_transform(x, *y_arg, **self.kwargs)
+            estimator = _wrap_estimator(estimator, cols, how=how, allow_reject=allow_reject, X=x)
+            y_arg = () if unsupervised else (y,)
+            result = estimator.fit_transform(x, *y_arg, **kwargs)
         elif mode == "predict":
-            if not hasattr(self.estimator, mode):
-                # Transformers
-                self.intermediate = self.estimator.transform(x, **self.kwargs)
-            else:
-                # Predictors
-                self.intermediate = self.estimator.predict(x, **self.kwargs)
+            result = estimator.transform(x, **kwargs)
         else:
-            raise ValueError(f"Mode {mode} not supported for EstimatorOp.")
+            raise ValueError(f"Mode {mode} not supported for TransformerOp.")
+    if converted:
+        result = PlDataFrame(result)
+    return result, estimator
 
 
 class ChoiceOp(Op):
@@ -248,7 +388,11 @@ def make_outcome_names(self):
             ) for combi in self.outcome_names]
 
     def update_name(self):
-        self.name = "  |  ".join(self.make_outcome_names())
+        opts = " | ".join(self.make_outcome_names())
+        max_len = 50
+        if len(opts) > max_len:
+            opts = opts[:max_len] + "..."
+        self.name = opts
 
     def clone(self):
         new_op = ChoiceOp(outcome_names=self.outcome_names, append_choice_name=False)
@@ -287,6 +431,8 @@ def __init__(self, method_name: str, args = None, kwargs = None):
     def process(self, mode: str, environment: dict):
         iter_ins = iter(self.inputs)
         _obj = next(iter_ins).intermediate
+        if isinstance(_obj, PlDataFrame) or isinstance(_obj, PlSeries):
+            _obj = _obj.to_pandas()
         _args = [next(iter_ins).intermediate if arg is DATA_OP_PLACEHOLDER else arg for arg in self.args]
         _kwargs = {k: next(iter_ins).intermediate if v is DATA_OP_PLACEHOLDER else v for k, v in self.kwargs.items()}
         self.intermediate = _obj.__getattribute__(self.method_name)(*_args, **_kwargs)
@@ -294,7 +440,9 @@ def process(self, mode: str, environment: dict):
 class CallOp(Op):
     fields = ["func", "args", "kwargs"]
     
-    def __init__(self, name: str = "CallOp", func=None, args=None, kwargs=None):
+    def __init__(self, name=None, func=None, args=None, kwargs=None):
+        if name is None:
+            name = "CallOp" if func is None else func.__name__
         super().__init__(name=name)
         if kwargs is not None:
             self.check_kwargs(kwargs)
@@ -319,19 +467,30 @@ def process(self, mode: str, environment: dict):
         if self.is_dataframe_op:
             self.intermediate = self.inputs[0].intermediate
             for attr in self.attr_name:
-                self.intermediate = self.intermediate.__getattribute__(attr)
+                self.intermediate = getattr(self.intermediate, attr)
         else:
-            self.intermediate = self.inputs[0].intermediate.__getattribute__(self.attr_name)
+            self.intermediate = getattr(self.inputs[0].intermediate, self.attr_name)
 
 class GetItemOp(Op):
     fields = ["key"]
     
     def __init__(self, key=None):
-        super().__init__(name=str(key) if key is not None else '?')
-        self.key = key
+        self.key = DATA_OP_PLACEHOLDER if isinstance(key, DataOp) else key
+        name = key._skrub_impl.__class__.__name__ if isinstance(key, DataOp) else str(self.key)
+        super().__init__(name=name)
+
 
     def process(self, mode: str, environment: dict):
-        self.intermediate = self.inputs[0].intermediate[self.key]
+        key = self.key
+        if key is DATA_OP_PLACEHOLDER:
+            key = self.inputs[1].intermediate
+        self.intermediate = self.inputs[0].intermediate[key]
+
+    def simple_hash(self):
+        if isinstance(self.key, str) or isinstance(self.key, list):
+            return stable_hash(self.key)
+        else:
+            raise NotImplementedError(f"Hashing is nt implemented for key type: {type(self.key)}")
 
 class BinOp(Op):
     fields = ["op", "left", "right"]
@@ -343,7 +502,7 @@ def __init__(self, op: Callable, left, right):
         self.right = DATA_OP_PLACEHOLDER if isinstance(right, DataOp) else right
 
 
-    def process(self, mode: str, environment: dict):
+    def process(self, mode: str, environment: dict, cv_id = None):
         i = 0
         if self.left is DATA_OP_PLACEHOLDER:
             left = self.inputs[i].intermediate
@@ -411,14 +570,20 @@ def as_op(data_op: DataOp):
     elif isinstance(impl, SkrubBinOp):
         return_op = BinOp(op=impl.op, left=impl.left, right=impl.right)
     elif isinstance(impl, Apply):
-        return_op = EstimatorOp(
+        estimator_class = EstimatorOp if hasattr(impl.estimator, "predict") else TransformerOp
+        return_op = estimator_class(
             y=impl.y, 
             estimator=impl.estimator, 
             cols=impl.cols, 
             how=impl.how, 
             allow_reject=impl.allow_reject, 
             unsupervised=impl.unsupervised, 
-            kwargs=impl.kwargs if hasattr(impl, "kwargs") else {})
+            kwargs= {})
+    elif isinstance(impl, Var):
+        return_op = VariableOp(name=impl.name, value=impl.value)
+    elif isinstance(impl, Concat):
+        from stratum.logical_optimizer._dataframe_ops import ConcatOp
+        return_op = ConcatOp(first=impl.first, others=impl.others, axis=impl.axis)
     else:
         return_op = ImplOp(skrub_impl=impl, name=data_op.__skrub_short_repr__())
 
diff --git a/stratum/logical_optimizer/_optimize.py b/stratum/logical_optimizer/_optimize.py
index 72d1a6a9..de3ee26c 100644
--- a/stratum/logical_optimizer/_optimize.py
+++ b/stratum/logical_optimizer/_optimize.py
@@ -1,13 +1,12 @@
-from numpy import True_
-from skrub._data_ops._evaluation import _Graph
 from skrub._data_ops import DataOp
 from skrub._data_ops._subsampling import SubsamplePreviews
 from collections import deque
 from ._cse import apply_cse
-from ._dataframe_ops import rewrite_dataframe_ops, add_splitting_op
+from ._dataframe_ops import add_splitting_op
 from ._dataframe_ops import rewrite_dataframe_ops, group_dataframe_ops
 from ._ops import ChoiceOp, ImplOp, Op, SearchEvalOp, as_op
 from ._op_utils import clone_sub_dag, find_choice_naive, replace_op_in_outputs, show_graph, topological_iterator
+from ._skrub_graph import build_graph
 from time import perf_counter
 import logging
 from stratum._config import FLAGS
@@ -38,7 +37,7 @@ def topological_traverse(nodes, parents, children):
 
 def apply_cse_on_skrub_ir(dag: DataOp):
     """ Apply CSE on a Skrub DataOp DAG and return the deduplicated DAG. (Deprecated versio of optimize function)"""
-    graph = _Graph().run(dag)
+    graph = build_graph(dag)
     nodes = graph["nodes"]
     parents = graph["parents"]
     children = graph["children"]
@@ -48,6 +47,7 @@ def apply_cse_on_skrub_ir(dag: DataOp):
     return dag
 
 class OptConfig():
+    # TODO we should move this class to the _config.py file
     def __init__(self, cse: bool = True, unroll_choices: bool = True, dataframe_ops: bool = True):
         self.cse = cse
         self.dataframe_ops = dataframe_ops
@@ -57,58 +57,83 @@ def _debug_show_graph(sink: Op, name: str):
     if FLAGS.DEBUG:
         show_graph(sink, name)
 
-def optimize(dag: DataOp, config: OptConfig = None):
+def optimize(dag_sink: DataOp, config: OptConfig = None):
     """ Entry point for the logical optimizer. Takes a Skrub DataOp DAG, applies logical optimizations 
-    and returns a topologically sorted list of Op nodes."""
+    and returns an Op sink node."""
     t0 = perf_counter()
     if config is None:
         config = OptConfig()
         
-    graph = _Graph().run(dag)
-    nodes = graph["nodes"]
-    parents = graph["parents"]
-    children = graph["children"]
-
+    t0_graph = perf_counter()
+    g = build_graph(dag_sink)
+    nodes = g["nodes"]
+    parents = g["parents"]
+    children = g["children"]
+    t1_graph = perf_counter()
+    logger.info(f"Graph construction took {t1_graph - t0_graph:.2f} seconds")
     order = topological_traverse(nodes, parents, children)
-    if config.cse:
-        apply_cse(dag, nodes, order, parents)
+    if FLAGS.cse:
+        t0_cse = perf_counter()
+        apply_cse(dag_sink, nodes, order, parents)
         # TODO cse should direcly return the new list of ops ordered so we dont have to iterate again
+        t1_cse = perf_counter()
+        logger.info(f"CSE took {t1_cse - t0_cse:.2f} seconds")
 
-    sink = convert_to_ops(dag)
+    t0_convert = perf_counter()
+    sink = convert_to_ops(dag_sink)
+    t1_convert = perf_counter()
+    logger.info(f"Conversion took {t1_convert - t0_convert:.2f} seconds")
+
+    t0_splitting = perf_counter()
     sink = add_splitting_op(sink)
-    _debug_show_graph(sink, "convertion")
+    t1_splitting = perf_counter()
+    logger.info(f"Splitting took {t1_splitting - t0_splitting:.2f} seconds")
 
+
+    _debug_show_graph(sink, "convertion")
+    t1_splitting = perf_counter()
+    logger.info(f"Splitting took {t1_splitting - t0_splitting:.2f} seconds")
     # Rewrites:
 
     # Parsing of dataframe ops
     if config.dataframe_ops:
+        t0_dataframe = perf_counter()
         sink = rewrite_dataframe_ops(sink)
         sink = group_dataframe_ops(sink)
         _debug_show_graph(sink, "dataframe_rewrite")
-
+        t1_dataframe = perf_counter()
+        logger.info(f"Dataframe rewrite took {t1_dataframe - t0_dataframe:.2f} seconds")
     # Unrolling of choices to a dag wit only a single choice op at the end
     if config.unroll_choices:
+        t0_choices = perf_counter()
         sink = choice_unrolling(sink)
+        _debug_show_graph(sink, "unrolled")
+        t1_choices = perf_counter()
+        logger.info(f"Choices unrolling took {t1_choices - t0_choices:.2f} seconds")
     
     # Final optimized DAG
-    _debug_show_graph(sink, "optimized")
-    output = [op for op in topological_iterator(sink)]
+    
     t1 = perf_counter()
-    logger.info("="*100 + f"\nOptimization took {t1 - t0:.2f} seconds\n" + "="*100)
-    return output
+    logger.info(f"Optimization took in total {t1 - t0:.2f} seconds")
+    return sink
 
 
 def convert_to_ops(dag: DataOp) -> Op:
     """ Convert a Skrub DataOp DAG to a stratum's logical IR (Op DAG)"""
-    graph = _Graph().run(dag)
-    nodes = graph["nodes"]
-    parents = graph["parents"]
-    children = graph["children"]
-
+    t0_convert = perf_counter()
+    g = build_graph(dag)
+    nodes = g["nodes"]
+    parents = g["parents"]
+    children = g["children"]
+    t1_convert = perf_counter()
+    logger.info(f"Conversion dag took {t1_convert - t0_convert:.2f} seconds")
     order = topological_traverse(nodes, parents, children)
     sink_id = order[-1]
+
     # make logical IR:
+    # we start by making unconnected ops
     ids_to_ops = {node: as_op(nodes[node]) for node in order}
+    # we then connect the ops to a graph
     for node in order:
         op = ids_to_ops[node]
         if isinstance(op, ImplOp) and isinstance(op.skrub_impl, SubsamplePreviews):
@@ -169,7 +194,9 @@ def choice_unrolling(sink: Op):
                 else:
                     assert sink is last_op, "Sink should be the last op in the dag"
                     # we reached the end of the dag
+                    logger.debug(f"Unrolling simple choice: {op}")
                     sink = unroll_simple_choice(sink, op, outcomes)
+                    logger.debug(f"New sink after unrolling: {sink}")
 
                 # if FLAGS.DEBUG:
                 #     show_graph(sink, f"choice-unrolled={i}")
diff --git a/stratum/logical_optimizer/_skrub_graph.py b/stratum/logical_optimizer/_skrub_graph.py
new file mode 100644
index 00000000..84d7525a
--- /dev/null
+++ b/stratum/logical_optimizer/_skrub_graph.py
@@ -0,0 +1,92 @@
+"""Fast graph extraction from a skrub DataOp DAG.
+
+Drop-in replacement for ``skrub._data_ops._evaluation._Graph().run(dag)``
+that avoids the heavyweight generator-based ``_DataOpTraversal`` machinery.
+We only need the DataOp-to-DataOp adjacency; choices, estimators, slices etc.
+are irrelevant for graph structure and can be skipped.
+"""
+
+from collections import defaultdict
+from skrub._data_ops import DataOp
+from skrub._data_ops._choosing import BaseChoice, Choice, Match
+
+
+_BUILTIN_SEQ = (list, tuple, frozenset, set)
+
+
+def _collect_child_data_ops(value):
+    """Yield all DataOp objects reachable from *value*.
+
+    Handles DataOps stored directly in a field, or nested inside the built-in
+    container types that skrub uses (tuple, list, dict, set, frozenset),
+    as well as skrub Choice/Match wrappers.
+    """
+    if isinstance(value, DataOp):
+        yield value
+    elif isinstance(value, Match):
+        yield from _collect_child_data_ops(value.choice)
+        yield from _collect_child_data_ops(value.outcome_mapping)
+    elif isinstance(value, Choice):
+        for outcome in value.outcomes:
+            yield from _collect_child_data_ops(outcome)
+    elif isinstance(value, BaseChoice):
+        pass
+    elif isinstance(value, dict):
+        for v in value.values():
+            yield from _collect_child_data_ops(v)
+    elif isinstance(value, _BUILTIN_SEQ):
+        for item in value:
+            yield from _collect_child_data_ops(item)
+
+
+def _unique(seq):
+    """Deduplicate while preserving order."""
+    return list(dict.fromkeys(seq))
+
+
+def build_graph(data_op):
+    """Build the graph dict for a DataOp DAG.
+
+    Returns the same ``{"nodes", "children", "parents"}`` dict produced by
+    ``skrub._data_ops._evaluation._Graph().run()``, with integer ids starting
+    from 0.
+
+    Uses an iterative stack-based DFS that only visits DataOp nodes,
+    skipping the generator protocol and all non-DataOp node types.
+    """
+    raw_nodes = {}
+    raw_children = defaultdict(list)
+    raw_parents = defaultdict(list)
+
+    stack = [data_op]
+    visited = set()
+
+    while stack:
+        node = stack.pop()
+        node_id = id(node)
+        if node_id in visited:
+            continue
+        visited.add(node_id)
+        raw_nodes[node_id] = node
+
+        impl = node._skrub_impl
+        for field_name in impl._fields:
+            attr = getattr(impl, field_name)
+            for child in _collect_child_data_ops(attr):
+                child_id = id(child)
+                raw_children[node_id].append(child_id)
+                raw_parents[child_id].append(node_id)
+                if child_id not in visited:
+                    stack.append(child)
+
+    short = {obj_id: i for i, obj_id in enumerate(raw_nodes)}
+    nodes = {short[k]: v for k, v in raw_nodes.items()}
+    children = {
+        short[k]: [short[c] for c in _unique(v)]
+        for k, v in raw_children.items()
+    }
+    parents = {
+        short[k]: [short[p] for p in _unique(v)]
+        for k, v in raw_parents.items()
+    }
+    return {"nodes": nodes, "children": children, "parents": parents}
diff --git a/stratum/patching/_gridsearch.py b/stratum/patching/_gridsearch.py
index 18148354..c827b132 100644
--- a/stratum/patching/_gridsearch.py
+++ b/stratum/patching/_gridsearch.py
@@ -19,6 +19,7 @@ def _stratum_make_grid_search(self, *, fitted=False, keep_subsampling=False, **k
         cv = kwargs.get("cv", None)
         scoring = kwargs.get("scoring", None)
         return_predictions = kwargs.get("return_predictions", False)
+        env = kwargs.get("environment", {})
         
         # Get the DataOp from the namespace instance
         dag = self._data_op
@@ -27,7 +28,8 @@ def _stratum_make_grid_search(self, *, fitted=False, keep_subsampling=False, **k
             dag=dag,
             cv=cv,
             scoring=scoring,
-            return_predictions=return_predictions
+            return_predictions=return_predictions,
+            env=env
         )
     else:
         # Fall back to original implementation
diff --git a/stratum/runtime/_caching.py b/stratum/runtime/_caching.py
new file mode 100644
index 00000000..48d4c497
--- /dev/null
+++ b/stratum/runtime/_caching.py
@@ -0,0 +1,112 @@
+from fileinput import filename
+import json
+import os
+import logging
+import polars as pl
+import pandas as pd
+from time import perf_counter
+
+
+logger = logging.getLogger(__name__)
+
+CACHE_DIR = os.path.join(os.path.expanduser("~"), ".stratum", "cache")
+if not os.path.exists(CACHE_DIR):
+    os.makedirs(CACHE_DIR)
+INTERMEDIATES_DIR = os.path.join(CACHE_DIR, "intermediates")
+if not os.path.exists(INTERMEDIATES_DIR):
+    os.makedirs(INTERMEDIATES_DIR)
+
+
+class Cache:
+    def __init__(self):
+        self.cache = {}
+        self.timings = []
+        # try to load cache from file
+        if os.path.exists(os.path.join(CACHE_DIR, "cache.json")):
+            logger.info(f"Loading cache from {os.path.join(CACHE_DIR, 'cache.json')}")
+            t0 = perf_counter()
+            with open(os.path.join(CACHE_DIR, "cache.json"), "r") as f:
+                tmp_cache = json.load(f)
+                for key,(file_name, converted) in tmp_cache.items():
+                    key = int(key)
+                    self.cache[key] = read_value(file_name, converted)
+            t1 = perf_counter()
+            duration = t1 - t0
+            logger.info(f"Cache loaded in {duration} seconds")
+            self.timings.append(("load_cache", duration))
+            
+        # Cache operation counters
+        self.hit_count = 0
+        self.miss_count = 0
+        self.set_count = 0
+        self.hit_time = 0.0
+        self.set_time = 0.0
+
+    def get(self, key):
+        t0 = perf_counter()
+        result = self.cache.get(key)
+        t1 = perf_counter()
+        duration = t1 - t0
+        
+        if result is not None:
+            self.hit_count += 1
+            self.hit_time += duration
+        else:
+            self.miss_count += 1
+        
+        return result
+
+    def set(self, key, value):
+        t0 = perf_counter()
+        self.cache[key] = value
+        t1 = perf_counter()
+        duration = t1 - t0
+        self.set_count += 1
+        self.set_time += duration
+
+    def persist(self):
+        logger.info(f"Saving cache to {os.path.join(CACHE_DIR, 'cache.json')}")
+        t0 = perf_counter()
+        file_name_cache = {}
+        for key, value in self.cache.items():
+            converted = isinstance(value, pd.DataFrame)
+            if not check_if_intermediate_exists(key):
+                write_value(key, value)
+            else:
+                logger.debug(f"Intermediate {key} already exists, skipping write")
+            file_name_cache[key] = (make_intermediate_file_name(key), converted)
+        # clear existing cache file
+        if os.path.exists(os.path.join(CACHE_DIR, "cache.json")):
+            os.remove(os.path.join(CACHE_DIR, "cache.json"))
+        # write new cache file
+        with open(os.path.join(CACHE_DIR, "cache.json"), "w") as f:
+            json.dump(file_name_cache, f)
+        t1 = perf_counter()
+        duration = t1 - t0
+        logger.info(f"Cache saved in {duration} seconds")
+        self.timings.append(("save_cache", duration))
+        del self.cache
+
+
+def make_intermediate_file_name(key):
+    return os.path.join(INTERMEDIATES_DIR, f"{key}.parquet")
+
+def check_if_intermediate_exists(key):
+    return os.path.exists(make_intermediate_file_name(key))
+
+def read_value(file_name, converted=False):
+    if not os.path.exists(file_name):
+        raise RuntimeError(f"Intermediate {file_name} not found. Cache is corrupted. Please do 'rm -rf {CACHE_DIR}' and run your code again.")
+    df = pl.read_parquet(file_name)
+    if converted:
+        df = df.to_pandas()
+    return df
+
+def write_value(key, value):
+    if isinstance(value, pd.DataFrame):
+        value = pl.from_pandas(value)
+    if isinstance(value, pl.DataFrame):
+        with open(make_intermediate_file_name(key), "wb") as f:
+            value.write_parquet(f)
+    else:
+        raise ValueError(f"Unsupported value type: {type(value)}")
\ No newline at end of file
diff --git a/stratum/runtime/_hash_utils.py b/stratum/runtime/_hash_utils.py
new file mode 100644
index 00000000..159454a5
--- /dev/null
+++ b/stratum/runtime/_hash_utils.py
@@ -0,0 +1,54 @@
+import hashlib
+from sklearn.base import BaseEstimator
+from skrub import TableVectorizer
+
+def _stable_hash_tuple(items):
+    """
+    Hash a tuple/sequence of items deterministically across processes.
+    """
+    # Create a deterministic hash by hashing the stable hashes of each item
+    hash_values = tuple(stable_hash(item) for item in items)
+    # Convert tuple of integers to bytes for hashing
+    # Use struct.pack or a simple byte representation
+    # For simplicity, use a delimiter-separated string representation
+    byte_data = b'|'.join(str(h).encode('utf-8') for h in hash_values)
+    return int.from_bytes(hashlib.sha256(byte_data).digest()[:8], byteorder='big')
+
+
+def hash_estimator(est: BaseEstimator) -> int:
+    """
+    Hash an estimator.
+    """
+    param_hashes = []
+    items = list(est.get_params().items())
+    for key, value in items:
+        if key != "fitted_":
+            if isinstance(value, BaseEstimator):
+                param_hashes.append((key, hash_estimator(value)))
+            else:
+                param_hashes.append(((key, stable_hash(value))))
+    if "fitted_" in items:
+        param_hashes.append(("fitted_", stable_hash(est.fitted_)))
+    return _stable_hash_tuple(param_hashes)
+
+def stable_hash(obj):
+    if isinstance(obj, str):
+        # Use SHA256 for stable hashing across processes
+        return int.from_bytes(hashlib.sha256(obj.encode('utf-8')).digest()[:8], byteorder='big')
+    elif isinstance(obj, BaseEstimator):
+        return hash_estimator(obj)
+    elif isinstance(obj, (int, float, bool, type(None))):
+        # These types have stable representations
+        # Convert to string and hash for consistency
+        return int.from_bytes(hashlib.sha256(repr(obj).encode('utf-8')).digest()[:8], byteorder='big')
+    elif isinstance(obj, list):
+        return _stable_hash_tuple(obj)
+    elif isinstance(obj, tuple):
+        return _stable_hash_tuple(obj)
+    elif isinstance(obj, dict):
+        # Sort items by key hash for deterministic ordering
+        sorted_items = sorted(obj.items(), key=lambda x: stable_hash(x[0]))
+        return _stable_hash_tuple((stable_hash(key), stable_hash(value)) for key, value in sorted_items)
+    else:
+        # For other types, use repr() to get a stable string representation
+        return int.from_bytes(hashlib.sha256(repr(obj).encode('utf-8')).digest()[:8], byteorder='big')
\ No newline at end of file
diff --git a/stratum/runtime/_physical_planning.py b/stratum/runtime/_physical_planning.py
new file mode 100644
index 00000000..a3b66e5e
--- /dev/null
+++ b/stratum/runtime/_physical_planning.py
@@ -0,0 +1,85 @@
+from skrub import StringEncoder, TableVectorizer
+from stratum.logical_optimizer._op_utils import topological_iterator
+from stratum.logical_optimizer._ops import EstimatorOp, Op, TransformerOp
+from skrub._data_ops._data_ops import _wrap_estimator
+from time import perf_counter
+import uuid
+import logging
+logger = logging.getLogger(__name__)
+
+def get_estimator_memory_estimate(op: Op, size = 1) -> int | None:
+    if isinstance(op, TransformerOp):
+        estm = op.estimator
+        if isinstance(estm, TableVectorizer):
+            return 10*size
+        elif isinstance(estm, StringEncoder):
+            return 3*size
+        return None
+    # elif isinstance(op, EstimatorOp):
+    #     return 10*size
+    else:
+        return 1
+
+def get_independent_set(ops: list[Op], ancestors: dict[Op]) -> list[Op]:
+    # Find the largest subset of ops that don't depend on each other
+    # Two ops conflict if one is an ancestor of the other
+    def have_dependency(est1: Op, est2: Op) -> bool:
+        """Check if est1 and est2 have a dependency (one is ancestor of the other)."""
+        return est1 in ancestors.get(est2, set()) or est2 in ancestors.get(est1, set())
+
+    # Greedily find the largest independent set
+    # TODO instead of greedily finding the largest independent set, we should decide based of mem and compute estimates
+    # Sort by number of conflicts (fewer conflicts first) to maximize the set size
+    conflict_counts = {est: sum(1 for other in ops if have_dependency(est, other))
+                      for est in ops}
+
+    # Sort by conflict count (ascending) - estimators with fewer conflicts are prioritized
+    # prefer string encoder and table vectorizer over other estimators if they have the same conflict count
+    sorted_ests = sorted(ops, key=lambda e: (conflict_counts[e], not (isinstance(e.estimator, StringEncoder) or isinstance(e.estimator, TableVectorizer))))
+
+    # Greedily build the largest independent set
+    independent_set = []
+    for est in sorted_ests:
+        # Check if this estimator conflicts with any already in the set
+        if not any(have_dependency(est, added) for added in independent_set):
+            independent_set.append(est)
+
+    return independent_set
+
+def mark_ops_for_parallelization(ops: list[Op], ancestors: dict[Op]):
+    par_group_id = uuid.uuid4()
+    ops = [op for op in ops if get_estimator_memory_estimate(op) is not None]
+    selected_ops = get_independent_set(ops, ancestors)
+    if len(selected_ops) > 1:
+        selected_ops_str = ",".join(op.name for op in selected_ops)
+        logger.debug(f"Selected {len(selected_ops)} ops for parallelization: [{selected_ops_str}]")
+        for op in selected_ops:
+            op.parallel_group = par_group_id
+    else:
+        logger.debug(f"No ops selected for parallelization. Not enough ops to parallelize: {len(selected_ops)}.")
+
+
+def compute_ancestors(sink: Op) -> dict[Op]:
+    """ Compute the ancestors of each op in the DAG. """
+    ancestors = {op: set() for op in topological_iterator(sink)}
+    for op in topological_iterator(sink):
+        ancestors[op] = set()
+        for in_ in op.inputs:
+            ancestors[op].update(ancestors[in_])
+            ancestors[op].add(in_)
+    return ancestors
+
+def physical_planning(sink: Op) -> Op:
+    """ Apply physical planning to the DAG. """
+    t0 = perf_counter()
+    ancestors = compute_ancestors(sink)
+
+    estimators = [op for op in topological_iterator(sink) if isinstance(op, EstimatorOp)]
+    mark_ops_for_parallelization(estimators, ancestors)
+    transformers = [op for op in topological_iterator(sink) if isinstance(op, TransformerOp)]
+    mark_ops_for_parallelization(transformers, ancestors)
+    # make_parallel_block(estimators, ancestors)
+    # make_parallel_block(transformers, ancestors)
+    t1 = perf_counter()
+    logger.info(f"Physical planning took: {t1 - t0:.2f} seconds")
+    return sink
\ No newline at end of file
diff --git a/stratum/runtime/_scheduler.py b/stratum/runtime/_scheduler.py
index 1fed012c..2cc3cf8a 100644
--- a/stratum/runtime/_scheduler.py
+++ b/stratum/runtime/_scheduler.py
@@ -1,55 +1,122 @@
+import ctypes
+import gc
+import sys
 from time import perf_counter
+from numpy import int32
+import psutil
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split, check_cv
-from sklearn.metrics._scorer import _Scorer
+from sklearn.metrics._scorer import _Scorer, get_scorer
 from skrub._data_ops._data_ops import EvalMode
 from stratum.logical_optimizer._dataframe_ops import SplitOp
-from stratum.logical_optimizer._ops import ImplOp, Op
+from stratum.logical_optimizer._op_utils import show_graph, topological_iterator
+from stratum.logical_optimizer._ops import EstimatorOp, ImplOp, Op, TransformerOp
+from joblib import Parallel, delayed
+from concurrent.futures import ThreadPoolExecutor
 import polars as pl
+from stratum._config import FLAGS
+import os
+from dataclasses import dataclass
 
 import logging
+
+from stratum.runtime._hash_utils import stable_hash
 logger = logging.getLogger(__name__)
 
+
+@dataclass
+class _SchedulerFlags:
+    show_memory_usage: bool = False
+    stratum_gc: bool = True
+    stratum_malloc_trim: bool = False
+
+SchedulerFlags = _SchedulerFlags()
+
+
+_libc = None
+if sys.platform == "linux":
+    try:
+        _libc = ctypes.CDLL("libc.so.6")
+    except OSError:
+        pass
+
+def _malloc_trim():
+    """Ask glibc to return free heap pages to the OS."""
+    if _libc is not None:
+        _libc.malloc_trim(0)
+
+
+def measure_memory_usage():
+    memory_usage = psutil.Process().memory_info().rss
+    return format_bytes(memory_usage)
+
+def format_bytes(bytes: int32):
+    l = ["B", "KB", "MB", "GB"]
+    for i in range(len(l)):
+        if bytes < 1024:
+            return f"{bytes:.2f} {l[i]}"
+        bytes /= 1024
+    return f"{bytes:.2f} {l[-1]}"
+
+def get_scoring_func(scoring):
+    """Get scoring function from str or _Scorer object."""
+    if type(scoring) == str:
+        scoring = get_scorer(scoring)
+    if type(scoring) == _Scorer:
+        logger.info(f"Using scorer: {scoring}")
+        greater_is_better = scoring._sign > 0 
+        scoring_func = scoring._score_func
+    else:
+        greater_is_better = False
+        scoring_func = mean_squared_error
+    return scoring_func, greater_is_better
+
 class Scheduler:
     """Scheduler for executing DataOpDAGs in topological order."""
     
-    def __init__(self, ops_ordered: list[Op], print_heavy_hitters=False):
+    def __init__(self, print_heavy_hitters=False, cache=None, env=None, t0 = None):
         """Initialize scheduler with a data operations DAG."""
-        self.ops_ordered = ops_ordered
         self.mode = "fit_transform"
-        self.env = {}
+        self.env = env if env else {}
         self.flagged_for_recomputation = []
         self.pos_split_op = None
         self.timings = [] if print_heavy_hitters else None
         self.results_ = None
+        self.cv_id = -1
+        self.cache = cache
+        self.intermediate_dependencies = {}
+        self.t0 = t0 if t0 is not None else perf_counter()
 
-    def evaluate(self, seed: int = 42, test_size = 0.2):
-        """Evaluate the pipeline with a train/test split and return predictions."""
-        try:
-            split_op = self.compute_xy()
-        except RuntimeError as e:
-            if "X and y nodes not found in the DAG" in str(e):
-                logger.warning("X and y nodes not found in the DAG, returning the last node")
-                return self.ops_ordered[-1].intermediate
-            else:
-                raise e
+    def run_gc(self):
+        if SchedulerFlags.stratum_gc:
+            freed_any = False
+            kv = list(self.intermediate_dependencies.items())
+            for k, v in kv:
+                if v == 0:
+                    logger.debug(f"GC: deleting {k}")
+                    k.intermediate = None
+                    del self.intermediate_dependencies[k]
+                    freed_any = True
 
-        train_index, test_index = train_test_split(range(len(split_op.inputs[0].intermediate)), test_size=test_size, random_state=seed)
-        split_op.indices = train_index
-        self.compute(self.pos_split_op)
-        split_op.indices = test_index
-        pred = self.compute(self.pos_split_op, mode="predict")
-        return pred["vals"][0]
+            if freed_any and SchedulerFlags.stratum_malloc_trim:
+                gc.collect()
+                _malloc_trim()
 
 
     def grid_search(self, cv=None, scoring=None, return_predictions=False):
-        """Perform grid search with cross-validation on the DataOp DAG in a sequential top-down manner."""
+        """Perform grid search with cross-validation on the logical DAG."""
         # default to scikit-learn's CV
         cv = check_cv(cv)
 
-        # start with computing till X and y node
+        if SchedulerFlags.show_memory_usage:
+            memory_usage = measure_memory_usage()
+            logger.debug(f"Memory usage at start of grid search: {memory_usage}")
+
+        # start with computing till we reach the split op
         logger.debug("\n" + "="*100 + "\n" + "Starting grid search" + "\n" + "="*100 + "\n")
         split_op = self.compute_xy()
+        for in_ in split_op.inputs:
+            self.intermediate_dependencies[in_] *= cv.get_n_splits()*2
         results, predictions = [], []
 
         logger.debug("\n" + "="*100 + "\n" + "XY computed" + "\n" + "="*100 + "\n")
@@ -57,23 +124,13 @@ def grid_search(self, cv=None, scoring=None, return_predictions=False):
         self.results_ = results
         return predictions if return_predictions else None
 
-
-    def get_scoring_func(self, scoring):
-        """Get scoring function from str or _Scorer object."""
-        if type(scoring) == str:
-            coeff = -1 if scoring.startswith("neg_") else 1
-            scoring_func = lambda test, pred: mean_squared_error(test, pred) * coeff
-        elif type(scoring) == _Scorer:
-            scoring_func = scoring._score_func
-        else:
-            scoring_func = mean_squared_error
-        return scoring_func
-
     def cross_validate(self, split_op, cv, scoring, predictions: list, results: list, return_predictions: bool):
-        scoring_func = self.get_scoring_func(scoring)
+        """Perform cross-validation on the logical DAG."""
+        scoring_func, greater_is_better = get_scoring_func(scoring)
 
         # TODO we can parallelize over the folds
         for i, (train_index, test_index) in enumerate(cv.split(split_op.inputs[0].intermediate)):
+            self.cv_id = i
             logger.debug(f"CV Fold Nr. {i + 1}")
 
             # fit and predict the pipeline
@@ -81,21 +138,105 @@ def cross_validate(self, split_op, cv, scoring, predictions: list, results: list
             self.compute(self.pos_split_op)
             logger.debug("\n" + "="*100 + "\n" + "Training done for fold " + str(i+1) + "\n" + "="*100 + "\n")
             split_op.indices = test_index
-            df = self.compute(self.pos_split_op, mode="predict")
+            df, y_test = self.compute(self.pos_split_op, mode="predict")
             logger.debug("\n" + "="*100 + "\n" + "Predicting done for fold " + str(i+1) + "\n" + "="*100 + "\n")
             if return_predictions:
                 predictions.append(df)
 
             # scoring
-            y_test = split_op.intermediate[1]
             df = df.with_columns(df["vals"].map_elements(lambda pred: scoring_func(y_test, pl.Series(pred))).alias("scores"))
             df = df.drop("vals")
             results.append(df)
 
         results = pl.concat(results)
-        results = results.group_by("id").mean().sort("scores", descending=True)
+        results = results.group_by("id").mean().sort("scores", descending=greater_is_better)
         return results
 
+    def process_op(self, op: Op):
+        """Process a single DataOp node and return its output."""
+        if SchedulerFlags.stratum_gc:
+            for in_ in op.inputs:
+                self.intermediate_dependencies[in_] -= 1
+        logger.debug(f"[{perf_counter() - self.t0:.2f}s] Processing op: {op}")
+        
+        try:
+            # cache lookup
+            cache_key = None
+            if self.cache is not None and isinstance(op, TransformerOp) and op.name == "TableVectorizer":
+                cache_key = stable_hash((op.get_hash(), self.cv_id, self.mode))
+                logger.debug(f"Cache lookup for op: {op} with key: {cache_key}")
+                cache_value = self.cache.get(cache_key)
+                if cache_value is not None:
+                    logger.debug(f"Cache hit for op: {op}")
+                    op.intermediate = cache_value
+                    return op
+
+            t0 = perf_counter() if self.timings is not None else 0
+            op.process(mode=self.mode, environment=self.env)
+            if self.timings is not None:
+                duration = perf_counter() - t0
+                self.timings.append((str(op), duration))
+
+            # cache write
+            if self.cache is not None and isinstance(op, TransformerOp) and op.name == "TableVectorizer":
+                cache_value = op.intermediate
+                self.cache.set(cache_key, cache_value)
+                logger.debug(f"Cached result of op: {op} with key: {cache_key}")
+
+        except Exception as e:
+            raise RuntimeError(f"[{self.mode}] Error processing '{op}': {e}")
+
+        self.run_gc()
+        self.intermediate_dependencies[op] = len(op.outputs)
+
+        if SchedulerFlags.show_memory_usage:
+            gc.collect()
+            memory_usage = measure_memory_usage()
+            logger.debug(f"[{(perf_counter() - self.t0):.2f}s] Memory usage after processing {op}: {memory_usage}")
+            logger.debug(f"Memory usage of intermediate of {op}: {format_bytes(op.get_intermediate_size())}")
+        
+        return op
+
+    def _format_predict_result(self, pred):
+        """Helper method to format prediction results consistently."""
+        if isinstance(pred, list):
+            return pl.DataFrame(pred)
+        elif isinstance(pred, dict) and "id" in pred and "vals" in pred:
+            return pl.DataFrame([pred])
+        else:
+            return pl.DataFrame({"vals": [pred], "id": ["default"]})
+
+    def _flag_op_for_recomputation_if_needed(self, op: Op):
+        """Helper method to flag an op for recomputation if it's an ImplOp with EvalMode."""
+        if isinstance(op, ImplOp) and isinstance(op.skrub_impl, EvalMode):
+            self.flagged_for_recomputation.append(op)
+
+class SequentialScheduler(Scheduler):
+    def __init__(self, dag_sink: Op, print_heavy_hitters=False, cache=None, env=None, t0 = None):
+        super().__init__(print_heavy_hitters, cache=cache, env=env, t0=t0)
+        self.ops_ordered = [op for op in topological_iterator(dag_sink)]
+
+    def evaluate(self, seed: int = 42, test_size = 0.2):
+        """Evaluate the pipeline with a train/test split and return predictions."""
+        try:
+            split_op = self.compute_xy()
+        except RuntimeError as e:
+            if "X and y nodes not found in the DAG" in str(e):
+                logger.warning("X and y nodes not found in the DAG, returning the last node")
+                return self.ops_ordered[-1].intermediate
+            else:
+                raise e
+
+        train_index, test_index = train_test_split(range(len(split_op.inputs[0].intermediate)), test_size=test_size, random_state=seed)
+        split_op.indices = train_index
+        for in_ in split_op.inputs:
+            self.intermediate_dependencies[in_] *= 2
+        self.compute(self.pos_split_op)
+        split_op.indices = test_index
+        pred, _ = self.compute(self.pos_split_op, mode="predict")
+        return pred["vals"][0]
+
+
     def compute(self, start_pos: int, mode="fit_transform"):
         """Compute the pipeline from start_pos onwards with given inputs."""
         ops_to_compute = self.ops_ordered[start_pos:]
@@ -103,15 +244,15 @@ def compute(self, start_pos: int, mode="fit_transform"):
             ops_to_compute = self.flagged_for_recomputation + ops_to_compute
         self.mode = mode
 
+        y_true = None
         for node in ops_to_compute:
             self.process_op(node)
+            if mode == "predict" and isinstance(node, SplitOp):
+                y_true = node.intermediate[1]
 
         if mode == "predict":
             pred = self.ops_ordered[-1].intermediate
-            if isinstance(pred, list):
-                return pl.DataFrame(pred)
-            else:
-                return pl.DataFrame({"vals": [pred], "id": ["default"]})
+            return self._format_predict_result(pred), y_true
         return None
 
     def compute_xy(self) -> SplitOp:
@@ -121,20 +262,124 @@ def compute_xy(self) -> SplitOp:
                 self.pos_split_op = i
                 return op
             self.process_op(op)
-            if isinstance(op, ImplOp) and isinstance(op.skrub_impl, EvalMode):
-                self.flagged_for_recomputation.append(op)
+            self._flag_op_for_recomputation_if_needed(op)
         raise RuntimeError("X and y nodes not found in the DAG")
 
-    def process_op(self, op: Op):
-        """Process a single DataOp node and return its output."""
-        logger.debug(f"Processing op: {op}")
-        t0 = perf_counter() if self.timings is not None else 0
+class ParallelScheduler(Scheduler):
+    def __init__(self, dag_sink: Op, parallel_groups: dict[int, (int, list[Op])], print_heavy_hitters=False, backend="threading", max_workers=None, cache=None, env=None):
+        super().__init__(print_heavy_hitters, cache=cache, env=env)
+        self.linearize_dag(dag_sink)
+        self.backend = backend
+        if max_workers is None:
+            max_workers = os.cpu_count() or 8
+        self.max_workers = max_workers
+
+    def linearize_dag(self, dag_sink: Op):
+        parallel_groups = {}
+        for op in topological_iterator(dag_sink):
+            if op.parallel_group is not None:
+                group = parallel_groups.get(op.parallel_group, [])
+                group.append(op)
+                parallel_groups[op.parallel_group] = group
+        groups_str = "\n".join("  ["+",".join(op.name for op in g) +"]" for g in parallel_groups.values()) #cant use f-string because of py3.11
+        logger.debug(f"Parallel groups:\n{groups_str}\n")
+        for group in parallel_groups.values():
+            inputs_union = set()
+            for op in group:
+                inputs_union.update(op.inputs)
+            for op in group:
+                # add additional dependencies s.t. all ops in the group are ready to compute
+                for in_ in inputs_union:
+                    if in_ not in op.inputs:
+                        if op.additional_inputs is None:
+                            op.additional_inputs = []
+                        op.additional_inputs.append(in_)
+                        if in_.additional_outputs is None:
+                            in_.additional_outputs = []
+                        in_.additional_outputs.append(op)
+        if FLAGS.DEBUG:
+            show_graph(dag_sink, "parallel_process_plan")
+
+        blocks = []
+        group_added = {}
+        for op in topological_iterator(dag_sink):
+
+            if op.parallel_group is None:
+                blocks.append(op)
+            else:
+                group = parallel_groups[op.parallel_group]
+                if not group_added.get(op.parallel_group, False):
+                    blocks.append(group)
+                    group_added[op.parallel_group] = True
+                
+                
+        self.blocks = blocks
+    
+    def compute(self, start_pos: int, mode="fit_transform"):
+        """Compute the pipeline from start_pos onwards with given inputs."""
+        blocks_to_compute = self.blocks[start_pos:]
+        if len(self.flagged_for_recomputation) != 0:
+            # Add flagged ops as individual blocks before the rest
+            blocks_to_compute = [op for op in self.flagged_for_recomputation] + blocks_to_compute
+        self.mode = mode
+
+        y_true = None
+        for block in blocks_to_compute:
+            self.process_block(block)
+            if mode == "predict" and isinstance(block, SplitOp):
+                y_true = block.intermediate[1]
+        if mode == "predict":
+            # Get the last block's output
+            last_block = self.blocks[-1]
+            return self._format_predict_result(last_block.intermediate), y_true
+        return None
+
+    def compute_xy(self) -> SplitOp:
+        """Compute blocks until X and y nodes are found and store them."""
+        for i, block in enumerate(self.blocks):
+            if block.is_split_op:
+                self.pos_split_op = i
+                return block
+            self.process_block(block)
+            self._flag_op_for_recomputation_if_needed(block)
+        raise RuntimeError("X and y nodes not found in the DAG")
+
+    def process_block(self, block):
+        """Process a single block - either an Op or a list of Ops (parallel group)."""
+        if isinstance(block, list):
+            # Parallel group - process ops in parallel
+            ops = block
+            logger.debug(f"Processing parallel block with {len(ops)} ops")
+            t0 = perf_counter() if self.timings is not None else 0
+            
+            if self.backend == "process" or (self.backend == "auto" and all(isinstance(op, EstimatorOp) for op in ops)):
+                logger.debug(f"Using process-based parallel processing with joblib)")
+                results = Parallel(n_jobs=len(ops), backend="loky")(
+                    delayed(op.get_process_task())(op.extract_args_from_inputs(self.mode)) 
+                    for op in ops
+                )
+                
+                for i, (result, fitted_estimator) in enumerate(results):
+                    ops[i].intermediate = result
+                    ops[i].estimator = fitted_estimator
+            else:
+                logger.debug(f"Using thread-based parallel processing with ThreadPoolExecutor")
+                with ThreadPoolExecutor(max_workers=8) as executor:
+                    futures = [executor.submit(self._process_op_task, op, self.mode, self.env) for op in ops]
+                for i, future in enumerate(futures):
+                    ops[i].intermediate = future.result()
+            
+            if self.timings is not None:
+                duration = perf_counter() - t0
+                self.timings.append((f"ParallelBlock({len(ops)} ops)", duration))
+        else:
+            # Single op - process sequentially
+            self.process_op(block)
+
+    def _process_op_task(self, op: Op, mode: str, environment: dict):
+        """Helper task for thread-based parallel processing."""
         try:
-            op.process(mode=self.mode, environment=self.env)
+            op.process(mode=mode, environment=environment)
+            return op.intermediate
         except Exception as e:
-            raise RuntimeError(f"[{self.mode}] Error processing '{op}': {e}")
-
-        if self.timings is not None:
-            duration = perf_counter() - t0
-            self.timings.append((str(op), duration))
-        return op
\ No newline at end of file
+            raise RuntimeError(f"[{mode}] Error processing '{op}': {e}")
\ No newline at end of file
diff --git a/stratum/tests/application/test_multi_level_choice_graph.py b/stratum/tests/application/test_multi_level_choice_graph.py
index bca02997..a04a6787 100644
--- a/stratum/tests/application/test_multi_level_choice_graph.py
+++ b/stratum/tests/application/test_multi_level_choice_graph.py
@@ -1,4 +1,5 @@
 import os
+import pickle
 import tempfile
 import unittest
 import uuid
@@ -13,10 +14,14 @@
 from xgboost import XGBRegressor
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.metrics import make_scorer, mean_squared_error, r2_score
+from sklearn.model_selection import KFold
 from stratum.logical_optimizer._optimize import optimize
-
-
+import polars as pl
+import logging
+logging.basicConfig(level=logging.DEBUG)
+from stratum.runtime._scheduler import SchedulerFlags
 class TargetEncoder(BaseEstimator, TransformerMixin):
+    
     def fit(self, X, y=None):
         print("fit target encoder")
         self.global_mean_ = y.mean()
@@ -87,44 +92,65 @@ def is_numeric_column(col):
     def df2(X):
         return X.skb.apply(TableVectorizer())
 
-    X_vec = skrub.choose_from({"1": df1(X,y), "2": df2(X)}, name = "data engineering").as_data_op()
+    X_vec = skrub.choose_from({"1": df1(X,y), "2": df2(X)}, name = "pre").as_data_op()
     models = {
         "Ridge": Ridge(random_state=42),
-        "XGBoost": XGBRegressor(random_state=42),
-        "LightGBM": LGBMRegressor(random_state=42),
-        "ElasticNet": ElasticNet(random_state=42),
+        "xgb": XGBRegressor(random_state=42),
+        "lgbm": LGBMRegressor(random_state=42),
+        "elastic": ElasticNet(random_state=42),
     }
     preds = {name: X_vec.skb.apply(m, y=y) for name, m in models.items()}
-    return skrub.choose_from(preds, name="models").as_data_op()
-    # model = skrub.choose_from(models, name="models").as_data_op()
-    # preds = X_vec.skb.apply(model, y=y)
-    return preds
+    return skrub.choose_from(preds, name="m").as_data_op()
 
-def make_data(n: int = 1000):
+def make_data(n: int = 1000, seed: int = 42):
+    np.random.seed(seed)
+    rng = np.random.default_rng(seed)
     df = pd.DataFrame({
         "Transaction unique identifier": [str(uuid.uuid4()) for _ in range(n)],
-        "Price": np.random.randint(50000, 2_000_000, size=n),
+        "Price": rng.integers(50000, 2_000_000, size=n),
         "Date of Transfer": pd.to_datetime(
-            np.random.choice(pd.date_range("2010-01-01", "2024-12-31"), size=n)
+            rng.choice(pd.date_range("2010-01-01", "2024-12-31"), size=n)
         ).astype(str),
-        "Property Type": np.random.choice(list("DSTFO"), size=n),
-        "Old/New": np.random.choice(["Y", "N"], size=n),
-        "Duration": np.random.choice(["F", "L"], size=n),
-        "Town/City": np.random.choice(
+        "Property Type": rng.choice(list("DSTFO"), size=n),
+        "Old/New": rng.choice(["Y", "N"], size=n),
+        "Duration": rng.choice(["F", "L"], size=n),
+        "Town/City": rng.choice(
             ["London", "Manchester", "Birmingham", "Leeds", "Bristol"], size=n
         ),
-        "District": np.random.choice(
+        "District": rng.choice(
             ["District A", "District B", "District C"], size=n
         ),
-        "County": np.random.choice(
+        "County": rng.choice(
             ["Greater London", "West Midlands", "Greater Manchester"], size=n
         ),
-        "PPDCategory Type": np.random.choice(["A", "B"], size=n),
-        "Record Status - monthly file only": np.random.choice(["A", "C"], size=n),
+        "PPDCategory Type": rng.choice(["A", "B"], size=n),
+        "Record Status - monthly file only": rng.choice(["A", "C"], size=n),
     })
     return df
 
 class TestMultiLevelChoiceGraph(unittest.TestCase):
+    expected_results = pl.DataFrame({
+        "id": [
+            "m:elastic, pre:2",
+            "m:elastic, pre:1",
+            "m:Ridge, pre:2",
+            "m:Ridge, pre:1",
+            "m:xgb, pre:2",
+            "m:lgbm, pre:2",
+            "m:lgbm, pre:1",
+            "m:xgb, pre:1"
+        ],
+        "scores": [
+            -0.000779,
+            -0.028774,
+            -0.021469,
+            -0.040625,
+            -0.156263,
+            -0.174555,
+            -0.172825,
+            -0.251869
+        ]
+    })
 
     def test_application(self):
         tmp_path = tempfile.mkdtemp()
@@ -133,6 +159,89 @@ def test_application(self):
         df.to_csv(os.path.join(tmp_path, "data.csv"), index=False)
         preds = define_pipeline(os.path.join(tmp_path, "data.csv"))
         scorer = make_scorer(r2_score)
-        with skrub.config(DEBUG=True, open_graph=False, scheduler=True, rust_backend=False):
-            search = preds.skb.make_grid_search(fitted=True, cv = 2, scoring=scorer)
-            print(search.results_)
+        cv = KFold(n_splits=2, shuffle=True, random_state=42)
+        with skrub.config(DEBUG=True, open_graph=False, scheduler=True, rust_backend=False, scheduler_parallelism=None, stats=20):
+            search = preds.skb.make_grid_search(fitted=True, cv = cv, scoring=scorer)
+        print(search.results_)
+
+
+    def run_application(self, sched_par: str = None):
+        tmp_path = tempfile.mkdtemp()
+        df = make_data()
+        df.to_csv(os.path.join(tmp_path, "data.csv"), index=False)
+        preds = define_pipeline(os.path.join(tmp_path, "data.csv"))
+        preds = preds.skb.apply_func(lambda a, m: a, m=skrub.eval_mode())
+        scorer = make_scorer(r2_score)
+        cv = KFold(n_splits=2, shuffle=True, random_state=42)
+        with skrub.config(DEBUG=True, open_graph=False, scheduler=True, rust_backend=False, scheduler_parallelism=sched_par, stats=20):
+            search = preds.skb.make_grid_search(fitted=True, cv = cv, scoring=scorer)
+        print(search.results_)
+        return search.results_
+
+    def test_application_no_parallelism(self):
+        actual_results = self.run_application()
+        # Convert to pandas for comparison
+        # TODO: pre:2 is non-deterministic right now, so we need to filter it out
+        filter_expr = pl.col("id").str.contains("pre:1") & ~pl.col("id").str.contains("xgb|lgbm")
+        actual_df = actual_results.sort("id").filter(filter_expr).to_pandas()
+        expected_df = self.expected_results.sort("id").filter(filter_expr).to_pandas()
+        print(actual_df)
+        pd.testing.assert_frame_equal(
+            actual_df,
+            expected_df,
+            atol=1e-6,
+            check_dtype=False
+        )
+
+    def test_application_threading(self):
+        SchedulerFlags.stratum_gc = False
+        actual_results = self.run_application(sched_par="threading")
+        # Convert to pandas for comparison
+        # TODO: pre:2 is non-deterministic right now, so we need to filter it out
+        filter_expr = pl.col("id").str.contains("pre:1") & ~pl.col("id").str.contains("xgb|lgbm")
+        actual_df = actual_results.sort("id").filter(filter_expr).to_pandas()
+        expected_df = self.expected_results.sort("id").filter(filter_expr).to_pandas()
+        print(actual_df)
+        pd.testing.assert_frame_equal(
+            actual_df,
+            expected_df,
+            atol=1e-6,
+            check_dtype=False
+        )
+        SchedulerFlags.stratum_gc = True
+    
+    def test_application_process(self):
+        SchedulerFlags.stratum_gc = False
+        actual_results = self.run_application(sched_par="process")
+        # Convert to pandas for comparison
+        # TODO: pre:2 is non-deterministic right now, so we need to filter it out
+        filter_expr = pl.col("id").str.contains("pre:1") & ~pl.col("id").str.contains("xgb|lgbm")
+        actual_df = actual_results.sort("id").filter(filter_expr).to_pandas()
+        expected_df = self.expected_results.sort("id").filter(filter_expr).to_pandas()
+        print(actual_df)
+        pd.testing.assert_frame_equal(
+            actual_df,
+            expected_df,
+            atol=1e-6,
+            check_dtype=False
+        )
+        SchedulerFlags.stratum_gc = True
+
+    def test_application_auto(self):
+        SchedulerFlags.stratum_gc = False
+        actual_results = self.run_application(sched_par="auto")
+        # Convert to pandas for comparison
+        # TODO: pre:2 is non-deterministic right now, so we need to filter it out
+        filter_expr = pl.col("id").str.contains("pre:1") & ~pl.col("id").str.contains("xgb|lgbm")
+        actual_df = actual_results.sort("id").filter(filter_expr).to_pandas()
+        expected_df = self.expected_results.sort("id").filter(filter_expr).to_pandas()
+        print(actual_df)
+        pd.testing.assert_frame_equal(
+            actual_df,
+            expected_df,
+            atol=1e-6,
+            check_dtype=False
+        )
+        SchedulerFlags.stratum_gc = True
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/stratum/tests/logical_optimizer/test_dataframe_ops.py b/stratum/tests/logical_optimizer/test_dataframe_ops.py
index 2857931d..d1201e4d 100644
--- a/stratum/tests/logical_optimizer/test_dataframe_ops.py
+++ b/stratum/tests/logical_optimizer/test_dataframe_ops.py
@@ -2,11 +2,15 @@
 import tempfile
 from stratum.logical_optimizer._dataframe_ops import AssignOp, DataSourceOp, DatetimeConversionOp, GetAttrProjectionOp, ProjectionOp
 from stratum.logical_optimizer._ops import GetItemOp, MethodCallOp
-from stratum.logical_optimizer._optimize import OptConfig, optimize
+from stratum.logical_optimizer._optimize import OptConfig, optimize as optimize_
+from stratum.logical_optimizer._op_utils import topological_iterator
 import stratum as skrub
 import pandas as pd
 import unittest
 
+def optimize(dag, conf=None):
+    return list(topological_iterator(optimize_(dag, conf)))
+
 class TestDataframeOps(unittest.TestCase):
     def setUp(self):
         self.df = pd.DataFrame({
@@ -53,8 +57,9 @@ def test_projection_fused_get_item_rewrite_df2(self):
         sub_dag2 = data
         sink = skrub.choose_from([sub_dag1, sub_dag2]).as_data_op()
         ops = optimize(sink)
+        print(ops)
         self.assertEqual(5, len(ops))
-        self.assertTrue(isinstance(ops[2], GetItemOp))
+        self.assertTrue(isinstance(ops[1], GetItemOp))
         self.assertTrue(isinstance(ops[3], ProjectionOp))
 
     def test_fused_get_attr_rewrite_df(self):
diff --git a/stratum/tests/logical_optimizer/test_op_utils.py b/stratum/tests/logical_optimizer/test_op_utils.py
index 1f44af8d..b57f1453 100644
--- a/stratum/tests/logical_optimizer/test_op_utils.py
+++ b/stratum/tests/logical_optimizer/test_op_utils.py
@@ -1,11 +1,14 @@
 #from curses import flash
 import unittest
 import stratum as skrub
-from stratum.logical_optimizer._optimize import optimize, OptConfig, choice_unrolling
-from stratum.logical_optimizer._op_utils import show_graph, clone_sub_dag
+from stratum.logical_optimizer._optimize import optimize as optimize_, OptConfig, choice_unrolling
+from stratum.logical_optimizer._op_utils import show_graph, clone_sub_dag, topological_iterator
 from stratum._config import config
 graph = False
 
+def optimize(dag, conf=None):
+    return list(topological_iterator(optimize_(dag, conf)))
+
 class TestOpUtils(unittest.TestCase):
     def setUp(self):
         pass
diff --git a/stratum/tests/logical_optimizer/test_ops.py b/stratum/tests/logical_optimizer/test_ops.py
index a276d9a4..388d566f 100644
--- a/stratum/tests/logical_optimizer/test_ops.py
+++ b/stratum/tests/logical_optimizer/test_ops.py
@@ -1,11 +1,11 @@
 import unittest
 import pandas as pd
 import stratum as skrub
+from stratum.logical_optimizer._op_utils import topological_iterator
 from stratum.logical_optimizer._ops import (
     ImplOp, Op, ChoiceOp, ValueOp, MethodCallOp, CallOp, GetAttrOp, GetItemOp, SearchEvalOp, as_op
 )
-from stratum.logical_optimizer._optimize import optimize
-from sklearn.ensemble import RandomForestRegressor
+from stratum.logical_optimizer._optimize import optimize as optimize_
 from sklearn.dummy import DummyRegressor
 
 class TestOpCloning(unittest.TestCase):
@@ -49,7 +49,7 @@ def test_clone_ops(self):
         pred = pred.skb.apply_func(lambda x,a, b: x, 1, b=1)
         choice = skrub.choose_from([pred], name="choice").as_data_op()
         out = choice.empty
-        ops = optimize(out)
+        ops = list(topological_iterator(optimize_(out)))
 
         try:
             ops[0].clone()
@@ -94,4 +94,10 @@ def test_replace_non_existing_output(self):
         try:
             op.replace_output(3, 4)
         except ValueError as e:
-            self.assertEqual(str(e), "Output 3 not found in Op.")
\ No newline at end of file
+            self.assertEqual(str(e), "Output 3 not found in Op.")
+
+
+    def test_var_ops(self):
+        var = skrub.var("test")
+        out = var.skb.apply_func(pd.read_csv)
+        ops = list(topological_iterator(optimize_(out)))
diff --git a/stratum/tests/logical_optimizer/test_optimize.py b/stratum/tests/logical_optimizer/test_optimize.py
index ad08fec8..5addc1ad 100644
--- a/stratum/tests/logical_optimizer/test_optimize.py
+++ b/stratum/tests/logical_optimizer/test_optimize.py
@@ -1,3 +1,4 @@
+from stratum.logical_optimizer._op_utils import topological_iterator
 from stratum.logical_optimizer._optimize import OptConfig, optimize
 import stratum as skrub
 import pandas as pd
@@ -27,8 +28,7 @@ def test_optimize(self):
         X2 = X1.assign(
             year=X1["datetime"].dt.year,
             month=X1["datetime"].dt.month)
-        out = optimize(X2, OptConfig(cse=True))
-        self.assertEqual(out[0].skrub_impl, data._skrub_impl)
+        out = list(topological_iterator(optimize(X2, OptConfig(cse=True))))
         self.assertTrue(out[0].outputs[0] is out[1])
         self.assertTrue(len(out[0].inputs) == 0)
         
diff --git a/stratum/tests/runtime/test_caching.py b/stratum/tests/runtime/test_caching.py
new file mode 100644
index 00000000..ef12d907
--- /dev/null
+++ b/stratum/tests/runtime/test_caching.py
@@ -0,0 +1,166 @@
+import unittest
+import os
+import sys
+from sklearn.dummy import DummyRegressor
+from sklearn.model_selection import KFold
+import stratum as skrub
+from stratum.logical_optimizer._op_utils import topological_iterator
+from stratum.runtime._scheduler import SchedulerFlags
+from stratum.tests.runtime.runtime_test_utils import RuntimeTest
+import logging
+from stratum.logical_optimizer._optimize import optimize
+import pandas as pd
+logging.basicConfig(level=logging.DEBUG)
+
+
+class SearchTest(RuntimeTest):
+    expected_simple_hashes = {
+        "local": [
+            14466646976231713574,
+            4283753923329093683,
+            11672455255761944456,
+            1,
+            3,
+            2,
+            17673706173561179344,
+            6346118744052152261,
+        ],
+        "linux": [
+            17843118638478979946,
+            4283753923329093683,
+            11672455255761944456,
+            1,
+            3,
+            2,
+            17673706173561179344,
+            6346118744052152261,
+        ],
+        "macos": [
+            17237841316323807291,
+            4283753923329093683,
+            11672455255761944456,
+            1,
+            3,
+            2,
+            17673706173561179344,
+            6346118744052152261,
+        ],
+        "windows": [
+            9534843511007154554,
+            4283753923329093683,
+            11672455255761944456,
+            1,
+            3,
+            2,
+            17673706173561179344,
+            6346118744052152261,
+        ],
+    }
+    expected_hashes = {
+        "local": [
+            17214955316726503821,
+            11824152000386466899,
+            18298532774759976535,
+            7513694150800269850,
+            5537892472318521177,
+            168195864670644233,
+            1997578848421863092,
+            14476433947220053316,
+        ],
+        "linux": [
+            7056806754431583388,
+            7639690250793122720,
+            17532383718078189923,
+            10707031619699354836,
+            7435966898669112865,
+            8941144976148573683,
+            16675763945336090482,
+            13801223252098341323,
+        ],
+        "macos": [
+            11800167861632073492,
+            13894009875302220469,
+            2903296657173264096,
+            4207835120194851649,
+            11109528315728706675,
+            17956785590977498290,
+            10919015601046973997,
+            12242410082145359458,
+        ],
+        "windows": [
+            6235675172187585043,
+            6926154378978508485,
+            3072755605188723418,
+            18085496009191016749,
+            15337519220874548500,
+            10614601562615527768,
+            15722255280919350770,
+            508214583577843344,
+        ],
+    }
+
+    @classmethod
+    def _detect_mode(cls) -> str | None:
+        """Detect environment to pick the right expected hash set.
+
+        - \"local\": developer machine at /Users/elias/PycharmProjects/stratum/
+        - \"linux\" / \"macos\" / \"windows\": GitHub runners on the respective OS.
+        """
+        file_path = os.path.abspath(__file__)
+        local_root = "/Users/elias/PycharmProjects/stratum/"
+        if file_path.startswith(local_root):
+            return "local"
+        if sys.platform.startswith("linux"):
+            return "linux"
+        if sys.platform.startswith("darwin"):
+            return "macos"
+        if sys.platform.startswith(("win32", "cygwin")):
+            return "windows"
+        return None
+
+    def compare_hashes(self, op, expected_hash, simple = False,):
+        hash_val = op.simple_hash() if simple else op.get_hash()
+        self.assertEqual(expected_hash, hash_val, f"Hash mismatch for {op}")
+
+
+
+    def test_hashes(self):
+        file_path = os.path.join(os.path.dirname(__file__), "data.csv")
+        mode = self._detect_mode()
+        if mode not in self.expected_simple_hashes or mode not in self.expected_hashes:
+            self.skipTest(f"No expected hashes defined for mode={mode!r}")
+        self.df.to_csv(file_path, index=False)
+        data = skrub.as_data_op(file_path).skb.apply_func(pd.read_csv)
+        X = data[["x", "datetime"]].skb.mark_as_X()
+        y = data["y"].skb.mark_as_y()
+
+        x_vec = X.skb.apply(skrub.TableVectorizer())
+        pred = x_vec.skb.apply(DummyRegressor(), y=y)
+        pred = optimize(pred)
+        ops = list(topological_iterator(pred))
+
+        for i, op in enumerate(ops):
+            self.compare_hashes(op, self.expected_simple_hashes[mode][i], simple=True)
+        for i, op in enumerate(ops):
+            self.compare_hashes(op, self.expected_hashes[mode][i])
+            
+    def test_search(self):
+        SchedulerFlags.stratum_gc = False
+        file_path = os.path.join(os.path.dirname(__file__), "data.csv")
+        mode = self._detect_mode()
+        if mode not in self.expected_simple_hashes or mode not in self.expected_hashes:
+            self.skipTest(f"No expected hashes defined for mode={mode!r}")
+        self.df.to_csv(file_path, index=False)
+        data = skrub.as_data_op(file_path).skb.apply_func(pd.read_csv)
+        X = data[["x", "datetime"]].skb.mark_as_X()
+        y = data["y"].skb.mark_as_y()
+
+        x_vec = X.skb.apply(skrub.TableVectorizer())
+        pred = x_vec.skb.apply(DummyRegressor(), y=y)
+        cv = KFold(n_splits=3, shuffle=True, random_state=42)
+        with skrub.config(scheduler=True, stats=20, caching=True):
+            search = pred.skb.make_grid_search(cv=cv, fitted=True,scoring="neg_mean_squared_error")
+        SchedulerFlags.stratum_gc = True
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/stratum/tests/runtime/test_evaluate.py b/stratum/tests/runtime/test_evaluate.py
index ddcff401..03df8afd 100644
--- a/stratum/tests/runtime/test_evaluate.py
+++ b/stratum/tests/runtime/test_evaluate.py
@@ -1,3 +1,5 @@
+from contextlib import redirect_stderr, redirect_stdout
+from io import StringIO
 import unittest
 from sklearn.datasets import make_regression
 from sklearn.preprocessing import StandardScaler
@@ -6,7 +8,7 @@
 import pandas as pd
 from stratum._api import evaluate
 from stratum.tests.runtime.runtime_test_utils import RuntimeTest, datetime_pipeline1
-import stratum
+from stratum.runtime._scheduler import logger
 import logging
 logging.basicConfig(level=logging.INFO)
 
@@ -76,5 +78,23 @@ def test_evaluate(self):
         pred = x_scaled.skb.apply(RandomForestRegressor(random_state=42), y=y)
         self.compare_evaluate(pred)
 
+    def test_evaluate_with_error(self):
+        # generate data using sklearn
+        data = skrub.as_data_op(self.df)
+        data = data.skb.apply_func(lambda x,m: x if m == "preview" else int("not a number :P"), m=skrub.eval_mode())
+        try:
+            evaluate(data, seed=self.seed, test_size=self.test_size)
+            self.fail("Expected RuntimeError")
+        except RuntimeError as e:
+            self.assertEqual("[fit_transform] Error processing 'CallOp(<lambda>)': invalid literal for int() with base 10: 'not a number :P'",str(e))
+        except Exception as e:
+            self.fail("Expected RuntimeError, got %s" % type(e))
+
+    def test_evaluate_no_X_y(self):
+        # generate data using sklearn
+        data = skrub.as_data_op(self.df)
+        with self.assertLogs(logger, level=logging.WARNING) as log:
+            evaluate(data, seed=self.seed, test_size=self.test_size)
+        self.assertIn("X and y nodes not found in the DAG", log.output[0])
 if __name__ == "__main__":
     unittest.main()
\ No newline at end of file
diff --git a/stratum/tests/runtime/test_search.py b/stratum/tests/runtime/test_search.py
index e49827cf..4df8f281 100644
--- a/stratum/tests/runtime/test_search.py
+++ b/stratum/tests/runtime/test_search.py
@@ -39,7 +39,7 @@ def test_search(self):
         search_stratum, preds = grid_search(y, cv=cv, scoring="neg_mean_squared_error", return_predictions=True)
 
         search = y.skb.make_grid_search(cv=cv, fitted=True,scoring="neg_mean_squared_error")
-        assert(np.allclose(search.results_["mean_test_score"], search_stratum.results_["scores"]))
+        assert(np.allclose(search.results_["mean_test_score"]*-1, search_stratum.results_["scores"]))
 
 
 
@@ -65,6 +65,17 @@ def test_search_with_no_y(self):
         except RuntimeError as e:
             self.assertEqual("X and y nodes not found in the DAG",str(e))
 
+    def test_search_with_no_y_parrel_scheduler(self):
+        start = skrub.as_data_op(True)
+        end = start.skb.apply_func(lambda a: a).skb.mark_as_X()
+
+        try:
+            with skrub.config(stats=20, scheduler_parallelism="threading"):
+                grid_search(end, return_predictions=True)
+            self.fail("Expected RuntimeError")
+        except RuntimeError as e:
+            self.assertEqual("X and y nodes not found in the DAG",str(e))
+
 
     def test_search_choice_not_at_the_end1(self):
         data = skrub.as_data_op(self.df)
@@ -122,8 +133,8 @@ def test_search_with_stats(self):
         out = stdout.getvalue()
         out = out.split("\n")
         self.assertIn("Heavy hitters", out[2])
-        self.assertIn("CallOp(<lambda>)", out[4])
-        assert(out[4].split(" ")[-1] == "10")
+        self.assertIn("CallOp(<lambda>)", out[5])
+        assert(out[5].split(" ")[-1] == "10")
 
 
     def test_fused_attr(self):
diff --git a/stratum/tests/test_init_module.py b/stratum/tests/test_init_module.py
index 407bb440..030a9eee 100644
--- a/stratum/tests/test_init_module.py
+++ b/stratum/tests/test_init_module.py
@@ -1,6 +1,7 @@
 import stratum
 import os
-from stratum._config import _env_bool
+from stratum._config import _env_bool, _env_str
+from stratum._config import FLAGS
 
 def test_versions_contains_strings():
     versions = stratum.versions()
@@ -23,4 +24,22 @@ def test_env_bool_false_values():
         assert _env_bool("TEST_BOOL", True) is False
         del os.environ["TEST_BOOL"]
 
+def test_config_scheduler_parallelism():
+    with stratum.config(scheduler_parallelism="threading"):
+        assert FLAGS.scheduler_parallelism == "threading"
+    with stratum.config(scheduler_parallelism="process"):
+        assert FLAGS.scheduler_parallelism == "process"
+    with stratum.config(scheduler_parallelism="auto"):
+        assert FLAGS.scheduler_parallelism == "auto"
+    try:
+        with stratum.config(scheduler_parallelism="invalid"):
+            assert False
+    except ValueError as e:
+        assert str(e) == "scheduler_parallelism must be None, 'threading', 'process', or 'auto', got invalid"
+    os.environ["STRATUM_SCHEDULER_PARALLELISM"] = "threading"
+    assert _env_str("STRATUM_SCHEDULER_PARALLELISM") == "threading"
+    os.environ["STRATUM_SCHEDULER_PARALLELISM"] = "none"
+    assert _env_str("STRATUM_SCHEDULER_PARALLELISM") is None
+    del os.environ["STRATUM_SCHEDULER_PARALLELISM"]
+
 
diff --git a/uv.lock b/uv.lock
index d93976b5..6e099521 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,9 +2,15 @@ version = 1
 revision = 3
 requires-python = ">=3.11"
 resolution-markers = [
-    "python_full_version >= '3.14'",
-    "python_full_version >= '3.12' and python_full_version < '3.14'",
-    "python_full_version < '3.12'",
+    "python_full_version >= '3.14' and sys_platform == 'win32'",
+    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'",
+    "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'emscripten'",
+    "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version < '3.12' and sys_platform == 'win32'",
+    "python_full_version < '3.12' and sys_platform == 'emscripten'",
+    "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'",
 ]
 
 [[package]]
@@ -1637,7 +1643,7 @@ name = "pexpect"
 version = "4.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "ptyprocess" },
+    { name = "ptyprocess", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" }
 wheels = [
@@ -1982,11 +1988,11 @@ wheels = [
 
 [[package]]
 name = "pytz"
-version = "2025.2"
+version = "2026.1.post1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+    { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" },
 ]
 
 [[package]]
@@ -2491,7 +2497,10 @@ version = "0.0.0.dev0"
 source = { editable = "." }
 dependencies = [
     { name = "graphviz" },
+    { name = "joblib" },
+    { name = "pandas" },
     { name = "polars" },
+    { name = "psutil" },
     { name = "pyarrow" },
     { name = "scikit-learn" },
     { name = "skrub" },
@@ -2518,14 +2527,17 @@ dev = [
 requires-dist = [
     { name = "coverage", extras = ["toml"], marker = "extra == 'test'" },
     { name = "graphviz" },
+    { name = "joblib" },
     { name = "jupyter", marker = "extra == 'dev'" },
     { name = "lightgbm", marker = "extra == 'test'", specifier = ">=4.6.0" },
+    { name = "pandas", specifier = "==2.3.3" },
     { name = "polars" },
+    { name = "psutil" },
     { name = "pyarrow", specifier = ">=22.0.0" },
     { name = "pytest", marker = "extra == 'test'" },
     { name = "pytest-cov", marker = "extra == 'test'" },
     { name = "scikit-learn", specifier = "==1.8" },
-    { name = "skrub", specifier = ">=0.3" },
+    { name = "skrub", specifier = "==0.6.2" },
     { name = "xgboost", marker = "extra == 'test'", specifier = ">=3.1.1" },
 ]
 provides-extras = ["test", "benchmark", "dev"]