From 327e8bcee09e85ac0e359b2b73c3bd924910a65d Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 11:41:06 +0200
Subject: [PATCH 01/10] Added TabPFN as surrogate model

---
 .../5_tabPFN_surrogate_model.py               |  99 ++++++++++++
 setup.py                                      |   1 +
 smac/model/__init__.py                        |   7 +-
 smac/model/tabPFNv2.py                        | 132 ++++++++++++++++
 tests/test_model/test_tabpfn.py               | 145 ++++++++++++++++++
 5 files changed, 379 insertions(+), 5 deletions(-)
 create mode 100644 examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py
 create mode 100644 smac/model/tabPFNv2.py
 create mode 100644 tests/test_model/test_tabpfn.py

diff --git a/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py b/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py
new file mode 100644
index 0000000000..4370139390
--- /dev/null
+++ b/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py
@@ -0,0 +1,99 @@
+"""Support Vector Machine with Cross-Validation
+# Flags: doc-Runnable
+
+An example of optimizing a simple support vector machine on the IRIS dataset. We use the
+hyperparameter optimization facade, which uses a random forest as its surrogate model. It is able to
+scale to higher evaluation budgets and a higher number of dimensions. Also, you can use mixed data
+types as well as conditional hyperparameters.
+"""
+
+import numpy as np
+from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float, Integer
+from ConfigSpace.conditions import InCondition
+from sklearn import datasets, svm
+from sklearn.model_selection import cross_val_score
+from smac.model.tabPFNv2 import TabPFNModel
+
+from smac import HyperparameterOptimizationFacade, Scenario
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+
+# We load the iris-dataset (a widely used benchmark)
+iris = datasets.load_iris()
+
+
+class SVM:
+    @property
+    def configspace(self) -> ConfigurationSpace:
+        # Build Configuration Space which defines all parameters and their ranges
+        cs = ConfigurationSpace(seed=0)
+
+        # First we create our hyperparameters
+        kernel = Categorical("kernel", ["linear", "poly", "rbf", "sigmoid"], default="poly")
+        C = Float("C", (0.001, 1000.0), default=1.0, log=True)
+        shrinking = Categorical("shrinking", [True, False], default=True)
+        degree = Integer("degree", (1, 5), default=3)
+        coef = Float("coef0", (0.0, 10.0), default=0.0)
+        gamma = Categorical("gamma", ["auto", "value"], default="auto")
+        gamma_value = Float("gamma_value", (0.0001, 8.0), default=1.0, log=True)
+
+        # Then we create dependencies
+        use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
+        use_coef = InCondition(child=coef, parent=kernel, values=["poly", "sigmoid"])
+        use_gamma = InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"])
+        use_gamma_value = InCondition(child=gamma_value, parent=gamma, values=["value"])
+
+        # Add hyperparameters and conditions to our configspace
+        cs.add([kernel, C, shrinking, degree, coef, gamma, gamma_value])
+        cs.add([use_degree, use_coef, use_gamma, use_gamma_value])
+
+        return cs
+
+    def train(self, config: Configuration, seed: int = 0) -> float:
+        """Creates a SVM based on a configuration and evaluates it on the
+        iris-dataset using cross-validation."""
+        config_dict = dict(config)
+        if "gamma" in config:
+            config_dict["gamma"] = config_dict["gamma_value"] if config_dict["gamma"] == "value" else "auto"
+            config_dict.pop("gamma_value", None)
+
+        classifier = svm.SVC(**config_dict, random_state=seed)
+        scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
+        cost = 1 - np.mean(scores)
+
+        return cost
+
+
+if __name__ == "__main__":
+    classifier = SVM()
+
+    # Next, we create an object, holding general information about the run
+    scenario = Scenario(
+        classifier.configspace,
+        n_trials=50,  # We want to run max 50 trials (combination of config and seed)
+    )
+
+    # We want to run the facade's default initial design, but we want to change the number
+    # of initial configs to 5.
+    initial_design = HyperparameterOptimizationFacade.get_initial_design(scenario, n_configs=5)
+
+    # Now we use SMAC to find the best hyperparameters
+    smac = HyperparameterOptimizationFacade(
+        scenario,
+        classifier.train,
+        initial_design=initial_design,
+        overwrite=True,  # If the run exists, we overwrite it; alternatively, we can continue from last state
+        model=TabPFNModel(configspace=scenario.configspace, seed=scenario.seed), # use TabPFN as surrogate model
+    )
+
+    incumbent = smac.optimize()
+
+    # Get cost of default configuration
+    default_cost = smac.validate(classifier.configspace.get_default_configuration())
+    print(f"Default cost: {default_cost}")
+
+    # Let's calculate the cost of the incumbent
+    incumbent_cost = smac.validate(incumbent)
+    print(f"Incumbent cost: {incumbent_cost}")
diff --git a/setup.py b/setup.py
index 6ac234efe1..f31ad085ab 100644
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,7 @@ def read_file(filepath: str) -> str:
         "pytest",
         "pytest-coverage",
         "pytest-cases",
+        "tabpfn"
     ],
 }
 
diff --git a/smac/model/__init__.py b/smac/model/__init__.py
index fd33967912..648035e8b6 100644
--- a/smac/model/__init__.py
+++ b/smac/model/__init__.py
@@ -1,9 +1,6 @@
 from smac.model.abstract_model import AbstractModel
 from smac.model.multi_objective_model import MultiObjectiveModel
 from smac.model.random_model import RandomModel
+from smac.model.tabPFNv2 import TabPFNModel
 
-__all__ = [
-    "AbstractModel",
-    "MultiObjectiveModel",
-    "RandomModel",
-]
+__all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel", "TabPFNModel"]
diff --git a/smac/model/tabPFNv2.py b/smac/model/tabPFNv2.py
new file mode 100644
index 0000000000..da39b357e2
--- /dev/null
+++ b/smac/model/tabPFNv2.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+from ConfigSpace import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter
+from tabpfn import TabPFNRegressor
+
+from smac.model.abstract_model import AbstractModel
+from smac.utils.logging import get_logger
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+logger = get_logger(__name__)
+
+
+class TabPFNModel(AbstractModel):
+    """TabPFNModel, for more details check: https://github.com/PriorLabs/TabPFN.
+
+    Parameters
+    ----------
+    instance_features : dict[str, list[int | float]] | None, defaults to None
+        Features (list of int or floats) of the instances (str). The features are incorporated into the X data,
+        on which the model is trained on.
+    pca_components : float, defaults to 7
+        Number of components to keep when using PCA to reduce dimensionality of instance features.
+    seed : int
+    n_estimators : int, defaults to 8
+        The number of estimators in the TabPFN ensemble.
+    softmax_temperature : float, defaults to 0.9
+        The temperature for the softmax function.
+    """
+
+    def __init__(
+        self,
+        configspace: ConfigurationSpace,
+        instance_features: dict[str, list[int | float]] | None = None,
+        pca_components: int | None = 7,
+        seed: int = 0,
+        n_estimators: int = 8,
+        softmax_temperature: float = 0.9,
+    ) -> None:
+        super().__init__(
+            configspace=configspace,
+            instance_features=instance_features,
+            pca_components=pca_components,
+            seed=seed,
+        )
+
+        self._tabpfn = None
+        self.n_estimators = n_estimators
+        self.categorical_features_indices = [
+            i for i, hp in enumerate(list(configspace.values())) if isinstance(hp, CategoricalHyperparameter)
+        ]
+        self.softmax_temperature = softmax_temperature
+        self.random_state = seed
+
+    @property
+    def meta(self) -> dict[str, Any]:
+        """Returns the metadata of the model.
+
+        Returns
+        -------
+            dict[str, Any]: meta data
+        """
+        meta = super().meta
+        meta.update(
+            {
+                "pca_components": self._pca_components,
+            }
+        )
+        return meta
+
+    def _train(self, X: np.ndarray, y: np.ndarray) -> TabPFNModel:
+        y = y.flatten()
+
+        self._tabpfn = self._get_tabpfn()
+        if self._tabpfn is None:
+            raise AssertionError("TabPFNRegressor is not initialized properly!")
+        self._tabpfn.fit(X, y)
+
+        # Set the flag
+        self._is_trained = True
+
+        return self
+
+    def _predict(
+        self,
+        X: np.ndarray,
+        covariance_type: str | None = "diagonal",
+    ) -> tuple[np.ndarray, np.ndarray | None]:
+        if len(X.shape) != 2:
+            raise ValueError("Expected 2d array, got %dd array!" % len(X.shape))
+
+        if X.shape[1] != len(self._types):
+            raise ValueError("Rows in X should have %d entries but have %d!" % (len(self._types), X.shape[1]))
+
+        if covariance_type != "diagonal":
+            raise ValueError("`covariance_type` can only take `diagonal` for this model.")
+
+        assert self._tabpfn is not None
+        # X = self._impute_inactive(X)
+
+        out_dict = self._tabpfn.predict(X, output_type="full")
+
+        # Variance estimation is difficult with TabPFN, it can have very large variances
+        var = out_dict["criterion"].variance(out_dict["logits"]).cpu().detach().numpy()
+        var = var.flatten()
+        var = np.clip(var, np.percentile(var, 5), np.percentile(var, 95))
+        if np.isclose(var.min(), var.max()):
+            var = np.zeros_like(var)
+        else:
+            var = (var - var.min()) / (var.max() - var.min())
+        var = var + 1e-6  # Avoid zero variance
+        return out_dict["mean"], var
+
+    def _get_tabpfn(self) -> TabPFNRegressor:
+        """Return a TabPFNRegressor instance with the specified parameters.
+        The fit_mode is set to 'low_memory' because the model is often retrained.
+
+        Returns
+        -------
+            TabPFNRegressor: TabPFNRegressor.
+        """
+        return TabPFNRegressor(
+            n_estimators=self.n_estimators,
+            categorical_features_indices=self.categorical_features_indices,
+            softmax_temperature=self.softmax_temperature,
+            fit_mode="low_memory",
+        )
diff --git a/tests/test_model/test_tabpfn.py b/tests/test_model/test_tabpfn.py
new file mode 100644
index 0000000000..95bc5e433b
--- /dev/null
+++ b/tests/test_model/test_tabpfn.py
@@ -0,0 +1,145 @@
+import numpy as np
+import pytest
+from ConfigSpace import (
+    CategoricalHyperparameter,
+    ConfigurationSpace,
+    OrdinalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+from smac.model.tabPFNv2 import TabPFNModel
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+
+def _get_cs(n_dimensions):
+    configspace = ConfigurationSpace(seed=0)
+    for i in range(n_dimensions):
+        configspace.add(UniformFloatHyperparameter("x%d" % i, 0, 1))
+
+    return configspace
+
+
+def test_predict_wrong_X_dimensions():
+    rs = np.random.RandomState(1)
+
+    model = TabPFNModel(
+        configspace=_get_cs(10),
+    )
+    X = rs.rand(10)
+    with pytest.raises(ValueError, match="Expected 2d array.*"):
+        model.predict(X)
+
+    X = rs.rand(10, 10, 10)
+    with pytest.raises(ValueError, match="Expected 2d array.*"):
+        model.predict(X)
+
+    X = rs.rand(10, 5)
+
+    with pytest.raises(ValueError, match="Feature mismatch: .*"):
+        model.predict(X)
+
+
+def test_predict():
+    rs = np.random.RandomState(1)
+    X = rs.rand(20, 10)
+    Y = rs.rand(10, 1)
+    model = TabPFNModel(configspace=_get_cs(10))
+    model.train(X[:10], Y[:10])
+    m_hat, v_hat = model.predict(X[10:])
+    assert m_hat.shape == (10, 1)
+    assert v_hat.shape == (10, 1)
+
+
+def test_train_with_pca():
+    rs = np.random.RandomState(1)
+    X = rs.rand(20, 20)
+    Y = rs.rand(20, 1)
+
+    F = {}
+    for i in range(10):
+        F[f"instance-{i}"] = list(rs.rand(10))
+
+    model = TabPFNModel(
+        configspace=_get_cs(10),
+        pca_components=2,
+        instance_features=F,
+    )
+    model.train(X, Y)
+
+    assert model._n_features == 10
+    assert model._n_hps == 10
+    assert model._pca is not None
+    assert model._scaler is not None
+
+
+def test_predict_with_actual_values():
+    X = np.array(
+        [
+            [0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0],
+            [0.0, 1.0, 0.0],
+            [0.0, 1.0, 1.0],
+            [1.0, 0.0, 0.0],
+            [1.0, 0.0, 1.0],
+            [1.0, 1.0, 0.0],
+            [1.0, 1.0, 1.0],
+        ],
+        dtype=np.float64,
+    )
+    y = np.array([[0.1], [0.2], [9], [9.2], [100.0], [100.2], [109.0], [109.2]], dtype=np.float64)
+    model = TabPFNModel(
+        configspace=_get_cs(3),
+        instance_features=None,
+        seed=12345,
+    )
+    model.train(np.vstack((X, X, X, X, X, X, X, X)), np.vstack((y, y, y, y, y, y, y, y)))
+
+    y_hat, _ = model.predict(X)
+    for y_i, y_hat_i in zip(y.reshape((1, -1)).flatten(), y_hat.reshape((1, -1)).flatten()):
+        assert pytest.approx(y_i, rel=0.5) == y_hat_i
+
+
+def test_with_ordinal():
+    cs = ConfigurationSpace(seed=0)
+    cs.add(CategoricalHyperparameter("a", [0, 1], default_value=0))
+    cs.add(OrdinalHyperparameter("b", [0, 1], default_value=1))
+    cs.add(UniformFloatHyperparameter("c", lower=0.0, upper=1.0, default_value=1))
+    cs.add(UniformIntegerHyperparameter("d", lower=0, upper=10, default_value=1))
+
+    F = {}
+    for i in range(1):
+        F[f"instance-{i}"] = [0, 0, 0]
+
+    model = TabPFNModel(
+        configspace=cs,
+        instance_features=F,
+        pca_components=9,
+    )
+
+    X = np.array(
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0, 9.0, 0.0, 0.0, 0.0],
+            [0.0, 1.0, 1.0, 4.0, 0.0, 0.0, 0.0],
+        ],
+        dtype=np.float64,
+    )
+    y = np.array([0, 1, 2, 3], dtype=np.float64)
+
+    X_train = np.vstack((X, X, X, X, X, X, X, X, X, X))
+    y_train = np.vstack((y, y, y, y, y, y, y, y, y, y))
+
+    model.train(X_train, y_train.reshape((-1, 1)))
+    mean, _ = model.predict(X)
+    for idx, m in enumerate(mean):
+        assert pytest.approx(y[idx], abs=0.05) == m
+
+def test_predict_before_train_raises():
+    model = TabPFNModel(configspace=_get_cs(3))
+    X = np.random.rand(2, 3)
+    with pytest.raises(AssertionError):
+        model.predict(X)
\ No newline at end of file

From 40d084673e785a77d383a35bd5b2c31477265038 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 13:57:40 +0200
Subject: [PATCH 02/10] added tabpfn to setup

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index f31ad085ab..53f3a6a833 100644
--- a/setup.py
+++ b/setup.py
@@ -87,6 +87,7 @@ def read_file(filepath: str) -> str:
         "emcee>=3.0.0",
         "regex",
         "pyyaml",
+        "tabpfn"
     ],
     extras_require=extras_require,
     test_suite="pytest",

From 521c43ba892cf28e23b8ffe73e4b07a355a47880 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 14:03:44 +0200
Subject: [PATCH 03/10] add tabpfn as requirement

---
 setup.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 53f3a6a833..092a7b4957 100644
--- a/setup.py
+++ b/setup.py
@@ -54,9 +54,9 @@ def read_file(filepath: str) -> str:
         "black",                # This allows mkdocstrings to format signatures in the docs
         "pytest",
         "pytest-coverage",
-        "pytest-cases",
-        "tabpfn"
+        "pytest-cases"
     ],
+    "tabpfn":["tabpfn"]
 }
 
 setuptools.setup(
@@ -86,8 +86,7 @@ def read_file(filepath: str) -> str:
         "dask_jobqueue>=0.8.2",
         "emcee>=3.0.0",
         "regex",
-        "pyyaml",
-        "tabpfn"
+        "pyyaml"
     ],
     extras_require=extras_require,
     test_suite="pytest",

From 2c695301479c7768d8074c28635d837277a0bccc Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 14:08:19 +0200
Subject: [PATCH 04/10] add tabpfn to tests

---
 .github/workflows/pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index c94e246bbf..6d2b980a3a 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -28,7 +28,7 @@ on:
 env:
   package-name: smac
   test-dir: tests
-  extra-requires: "[gpytorch,dev]"
+  extra-requires: "[gpytorch,dev,tabpfn]"
 
   # Arguments used for pytest
   pytest-args: >-

From edb99cb5e6fcfce2025fa5a92a51c3af22b5b197 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 14:27:13 +0200
Subject: [PATCH 05/10] test

---
 .github/workflows/pytest.yml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 6d2b980a3a..723320b9e9 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -28,7 +28,21 @@ on:
 env:
   package-name: smac
   test-dir: tests
-  extra-requires: "[gpytorch,dev,tabpfn]"
+  extra-requires: #"[gpytorch,dev,tabpfn]"
+  - name: Debug show extras requires
+    run: echo "EXTRA REQUIRES = .${{ env.extra-requires }}"
+
+  - name: Show directory
+    run: ls -la
+
+  - name: Show setup.py
+    run: cat setup.py
+
+  - name: Dry-run pip install
+    run: |
+      echo "RUNNING pip install -e \".${{ env.extra-requires }}\""
+      pip install -v -e ".${{ env.extra-requires }}"
+
 
   # Arguments used for pytest
   pytest-args: >-

From 0306bc347ab0901ef4a56d82763f191f48534c5a Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 14:28:55 +0200
Subject: [PATCH 06/10] testing

---
 .github/workflows/pytest.yml | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 723320b9e9..6d2b980a3a 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -28,21 +28,7 @@ on:
 env:
   package-name: smac
   test-dir: tests
-  extra-requires: #"[gpytorch,dev,tabpfn]"
-  - name: Debug show extras requires
-    run: echo "EXTRA REQUIRES = .${{ env.extra-requires }}"
-
-  - name: Show directory
-    run: ls -la
-
-  - name: Show setup.py
-    run: cat setup.py
-
-  - name: Dry-run pip install
-    run: |
-      echo "RUNNING pip install -e \".${{ env.extra-requires }}\""
-      pip install -v -e ".${{ env.extra-requires }}"
-
+  extra-requires: "[gpytorch,dev,tabpfn]"
 
   # Arguments used for pytest
   pytest-args: >-

From f3b2885c97a7f7a9125269b034d9d56d1fe3ea5f Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 14:42:53 +0200
Subject: [PATCH 07/10] tabpfn python version fix

---
 smac/model/__init__.py          | 12 ++++++++++--
 tests/test_model/test_tabpfn.py |  5 +++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/smac/model/__init__.py b/smac/model/__init__.py
index 648035e8b6..79438b621b 100644
--- a/smac/model/__init__.py
+++ b/smac/model/__init__.py
@@ -1,6 +1,14 @@
 from smac.model.abstract_model import AbstractModel
 from smac.model.multi_objective_model import MultiObjectiveModel
 from smac.model.random_model import RandomModel
-from smac.model.tabPFNv2 import TabPFNModel
 
-__all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel", "TabPFNModel"]
+__all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel"]
+
+try:
+    from smac.model.tabPFNv2 import TabPFNModel
+
+    __all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel", "TabPFNModel"]
+except ImportError as e:
+    raise ImportError(
+        "TabPFNModel requires tabpfn to be installed and Python >=3.9. " "Install with pip install tabpfn"
+    ) from e
diff --git a/tests/test_model/test_tabpfn.py b/tests/test_model/test_tabpfn.py
index 95bc5e433b..eb53ab2565 100644
--- a/tests/test_model/test_tabpfn.py
+++ b/tests/test_model/test_tabpfn.py
@@ -1,5 +1,10 @@
 import numpy as np
+import sys
 import pytest
+pytestmark = pytest.mark.skipif(
+    sys.version_info < (3, 9),
+    reason="tabpfn requires Python >=3.9"
+)
 from ConfigSpace import (
     CategoricalHyperparameter,
     ConfigurationSpace,

From b2486d43c18f410f01a56b1c7350c49ce203a13e Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 14:47:26 +0200
Subject: [PATCH 08/10] testing

---
 smac/model/__init__.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/smac/model/__init__.py b/smac/model/__init__.py
index 79438b621b..3129d36c16 100644
--- a/smac/model/__init__.py
+++ b/smac/model/__init__.py
@@ -8,7 +8,5 @@
     from smac.model.tabPFNv2 import TabPFNModel
 
     __all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel", "TabPFNModel"]
-except ImportError as e:
-    raise ImportError(
-        "TabPFNModel requires tabpfn to be installed and Python >=3.9. " "Install with pip install tabpfn"
-    ) from e
+except ImportError:
+    pass

From 0bc415ae206b6b73663eb2504accb0a21f7f4eda Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 15 Jul 2025 15:01:08 +0200
Subject: [PATCH 09/10] testing

---
 tests/test_model/test_tabpfn.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/test_model/test_tabpfn.py b/tests/test_model/test_tabpfn.py
index eb53ab2565..01fd2d68da 100644
--- a/tests/test_model/test_tabpfn.py
+++ b/tests/test_model/test_tabpfn.py
@@ -1,10 +1,7 @@
 import numpy as np
 import sys
 import pytest
-pytestmark = pytest.mark.skipif(
-    sys.version_info < (3, 9),
-    reason="tabpfn requires Python >=3.9"
-)
+
 from ConfigSpace import (
     CategoricalHyperparameter,
     ConfigurationSpace,
@@ -13,7 +10,15 @@
     UniformIntegerHyperparameter,
 )
 
-from smac.model.tabPFNv2 import TabPFNModel
+try:
+    from smac.model.tabPFNv2 import TabPFNModel
+except ImportError:
+    pass
+
+pytestmark = pytest.mark.skipif(
+    sys.version_info < (3, 9),
+    reason="tabpfn requires Python >=3.9"
+)
 
 __copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
 __license__ = "3-clause BSD"

From ab11a4dc1235b51bdc0243c020d979d2ed467b07 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Fri, 21 Nov 2025 14:59:21 +0100
Subject: [PATCH 10/10] Added TabPFN acquisition function and input
 preprocessing.

---
 .../5_tabPFN_surrogate_model.py               |  6 ++-
 smac/acquisition/function/tabpfn_acq_fun.py   | 48 +++++++++++++++++++
 smac/model/tabPFNv2.py                        | 48 +++++++++++++------
 3 files changed, 87 insertions(+), 15 deletions(-)
 create mode 100644 smac/acquisition/function/tabpfn_acq_fun.py

diff --git a/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py b/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py
index 4370139390..a5bec161d9 100644
--- a/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py
+++ b/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py
@@ -15,6 +15,7 @@
 from smac.model.tabPFNv2 import TabPFNModel
 
 from smac import HyperparameterOptimizationFacade, Scenario
+from smac.acquisition.function.tabpfn_acq_fun import RiemannExpectedImprovement
 
 __copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
 __license__ = "3-clause BSD"
@@ -78,7 +79,9 @@ def train(self, config: Configuration, seed: int = 0) -> float:
     # We want to run the facade's default initial design, but we want to change the number
     # of initial configs to 5.
     initial_design = HyperparameterOptimizationFacade.get_initial_design(scenario, n_configs=5)
-
+    
+    acq_fun = RiemannExpectedImprovement()  # we will set the runhistory later
+    
     # Now we use SMAC to find the best hyperparameters
     smac = HyperparameterOptimizationFacade(
         scenario,
@@ -86,6 +89,7 @@ def train(self, config: Configuration, seed: int = 0) -> float:
         initial_design=initial_design,
         overwrite=True,  # If the run exists, we overwrite it; alternatively, we can continue from last state
         model=TabPFNModel(configspace=scenario.configspace, seed=scenario.seed), # use TabPFN as surrogate model
+        acquisition_function=acq_fun,  # use TabPFN-based EI as acquisition function
     )
 
     incumbent = smac.optimize()
diff --git a/smac/acquisition/function/tabpfn_acq_fun.py b/smac/acquisition/function/tabpfn_acq_fun.py
new file mode 100644
index 0000000000..7de87f7202
--- /dev/null
+++ b/smac/acquisition/function/tabpfn_acq_fun.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from typing import Any, cast
+
+import numpy as np
+import torch
+
+from smac.acquisition.function import AbstractAcquisitionFunction
+from smac.model.tabPFNv2 import TabPFNModel  # <--- Adjust path as necessary
+
+
+class RiemannExpectedImprovement(AbstractAcquisitionFunction):
+    """Expected Improvement computed from a discrete (Riemann) predictive distribution.
+
+    This version is designed for TabPFN/PFNs4BO models that output discrete logits
+    rather than Gaussian mean/variance pairs.
+    """
+
+    @property
+    def name(self) -> str:  # noqa: D102
+        return "RiemannExpectedImprovement"
+
+    def _update(self, **kwargs: Any) -> None:
+        """Called after the model is fitted. Updates current best (f_best)."""
+        if self.model is None:
+            raise ValueError("No model attached to acquisition function.")
+        assert "eta" in kwargs
+        self._eta = kwargs["eta"]
+
+    def _compute(self, X: np.ndarray) -> np.ndarray:
+        """Compute Riemann-based EI for given X."""
+        if self.model is None:
+            raise ValueError("Model not set for acquisition function.")
+
+        model = cast(TabPFNModel, self.model)
+
+        # Impute, transform, scale
+        X_imputed = model._x_imputer.transform(X)
+        X_transformed = model._x_pt.transform(X_imputed)
+        X_scaled = model._x_scaler.transform(X_transformed)
+
+        assert model._tabpfn is not None
+        with torch.no_grad():
+            pred = model._tabpfn.predict(X_scaled, output_type="full")
+
+        # change sign because TabPFN maximizes by default
+        ei = pred["criterion"].ei(pred["logits"], (-1) * self._eta)
+        return ei.cpu().numpy().reshape(-1, 1)
diff --git a/smac/model/tabPFNv2.py b/smac/model/tabPFNv2.py
index da39b357e2..583f2e14d8 100644
--- a/smac/model/tabPFNv2.py
+++ b/smac/model/tabPFNv2.py
@@ -3,8 +3,11 @@
 from typing import Any
 
 import numpy as np
+import torch
 from ConfigSpace import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import PowerTransformer, StandardScaler
 from tabpfn import TabPFNRegressor
 
 from smac.model.abstract_model import AbstractModel
@@ -57,6 +60,13 @@ def __init__(
         self.softmax_temperature = softmax_temperature
         self.random_state = seed
 
+        self._x_imputer = SimpleImputer(strategy="mean")
+        self._x_pt = PowerTransformer(method="yeo-johnson", standardize=False)
+        self._x_scaler = StandardScaler()
+
+        self._y_pt = PowerTransformer(method="yeo-johnson", standardize=False)
+        self._y_scaler = StandardScaler()
+
     @property
     def meta(self) -> dict[str, Any]:
         """Returns the metadata of the model.
@@ -74,16 +84,22 @@ def meta(self) -> dict[str, Any]:
         return meta
 
     def _train(self, X: np.ndarray, y: np.ndarray) -> TabPFNModel:
-        y = y.flatten()
-
         self._tabpfn = self._get_tabpfn()
         if self._tabpfn is None:
             raise AssertionError("TabPFNRegressor is not initialized properly!")
-        self._tabpfn.fit(X, y)
 
-        # Set the flag
-        self._is_trained = True
+        # Impute, transform, scale
+        X_imputed = self._x_imputer.fit_transform(X)
+        X_transformed = self._x_pt.fit_transform(X_imputed)
+        X_scaled = self._x_scaler.fit_transform(X_transformed)
 
+        y = y.flatten()
+        y_transformed = self._y_pt.fit_transform(y.reshape(-1, 1))
+        y_scaled = self._y_scaler.fit_transform(y_transformed)
+        y_scaled = y_scaled.flatten()
+
+        self._tabpfn.fit(X_scaled, y_scaled)
+        self._is_trained = True
         return self
 
     def _predict(
@@ -101,20 +117,24 @@ def _predict(
             raise ValueError("`covariance_type` can only take `diagonal` for this model.")
 
         assert self._tabpfn is not None
-        # X = self._impute_inactive(X)
 
-        out_dict = self._tabpfn.predict(X, output_type="full")
+        # Impute, transform, scale
+        X_imputed = self._x_imputer.transform(X)
+        X_transformed = self._x_pt.transform(X_imputed)
+        X_scaled = self._x_scaler.transform(X_transformed)
+
+        with torch.no_grad():
+            out_dict = self._tabpfn.predict(X_scaled, output_type="full")
 
         # Variance estimation is difficult with TabPFN, it can have very large variances
         var = out_dict["criterion"].variance(out_dict["logits"]).cpu().detach().numpy()
         var = var.flatten()
-        var = np.clip(var, np.percentile(var, 5), np.percentile(var, 95))
-        if np.isclose(var.min(), var.max()):
-            var = np.zeros_like(var)
-        else:
-            var = (var - var.min()) / (var.max() - var.min())
-        var = var + 1e-6  # Avoid zero variance
-        return out_dict["mean"], var
+        var = np.maximum(var, 1e-6)
+
+        y_pred = self._y_scaler.inverse_transform(out_dict["mean"].reshape(-1, 1))
+        y_pred = self._y_pt.inverse_transform(y_pred)
+
+        return y_pred.flatten(), var
 
     def _get_tabpfn(self) -> TabPFNRegressor:
         """Return a TabPFNRegressor instance with the specified parameters.