manujosephv · pre-commit-ci · Jul 28, 2025 · Jul 28, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -27,14 +27,14 @@ repos:
           - id: detect-private-key
 
     - repo: https://github.com/PyCQA/docformatter
-      rev: 06907d0267368b49b9180eed423fae5697c1e909 # todo: fix for docformatter after last 1.7.5
+      rev: v1.7.8-rc1 # todo: fix for docformatter after last 1.7.5
       hooks:
           - id: docformatter
             additional_dependencies: [tomli]
             args: ["--in-place"]
 
     - repo: https://github.com/executablebooks/mdformat
-      rev: 0.7.19
+      rev: 0.7.22
       hooks:
           - id: mdformat
             additional_dependencies:
@@ -48,7 +48,7 @@ repos:
                     )
 
     - repo: https://github.com/astral-sh/ruff-pre-commit
-      rev: v0.8.3
+      rev: v0.12.5
       hooks:
           - id: ruff
             args: ["--fix"]

diff --git a/examples/__only_for_dev__/to_test_regression_custom_models.py b/examples/__only_for_dev__/to_test_regression_custom_models.py
@@ -83,15 +83,15 @@ class MultiStageModelConfig(ModelConfig):
     threshold_init_beta: float = field(
         default=1.0,
         metadata={
-            "help": """
-                Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
-                (with a beta distribution) to feature values in the first batch.
-                It initializes threshold to a q-th quantile of data points.
-                where q ~ Beta(:threshold_init_beta:, :threshold_init_beta:)
-                If this param is set to 1, initial thresholds will have the same distribution as data points
-                If greater than 1 (e.g. 10), thresholds will be closer to median data value
-                If less than 1 (e.g. 0.1), thresholds will approach min/max data values.
-            """
+            "help": """Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
+                    (with a beta distribution) to feature values in the first batch.
+
+                    It initializes threshold to a q-th quantile of data points. where q ~ Beta(:threshold_init_beta:,
+                    :threshold_init_beta:) If this param is set to 1, initial thresholds will have the same distribution
+                    as data points If greater than 1 (e.g. 10), thresholds will be closer to median data value If less
+                    than 1 (e.g. 0.1), thresholds will approach min/max data values.
+
+                    """
         },
     )
     threshold_init_cutoff: float = field(

diff --git a/src/pytorch_tabular/categorical_encoders.py b/src/pytorch_tabular/categorical_encoders.py
@@ -58,9 +58,9 @@ def transform(self, X):
             raise ValueError("`fit` method must be called before `transform`.")
         assert all(c in X.columns for c in self.cols)
         if self.handle_missing == "error":
-            assert (
-                not X[self.cols].isnull().any().any()
-            ), "`handle_missing` = `error` and missing values found in columns to encode."
+            assert not X[self.cols].isnull().any().any(), (
+                "`handle_missing` = `error` and missing values found in columns to encode."
+            )
         X_encoded = X.copy(deep=True)
         category_cols = X_encoded.select_dtypes(include="category").columns
         X_encoded[category_cols] = X_encoded[category_cols].astype("object")
@@ -153,9 +153,9 @@ def fit(self, X, y=None):
         """
         self._before_fit_check(X, y)
         if self.handle_missing == "error":
-            assert (
-                not X[self.cols].isnull().any().any()
-            ), "`handle_missing` = `error` and missing values found in columns to encode."
+            assert not X[self.cols].isnull().any().any(), (
+                "`handle_missing` = `error` and missing values found in columns to encode."
+            )
         for col in self.cols:
             map = Series(unique(X[col].fillna(NAN_CATEGORY)), name=col).reset_index().rename(columns={"index": "value"})
             map["value"] += 1

diff --git a/src/pytorch_tabular/config/config.py b/src/pytorch_tabular/config/config.py
@@ -26,12 +26,15 @@ def _read_yaml(filename):
         "tag:yaml.org,2002:float",
         re.compile(
             """^(?:
-        [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
-        |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
-        |\\.[0-9_]+(?:[eE][-+][0-9]+)?
-        |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
-        |[-+]?\\.(?:inf|Inf|INF)
-        |\\.(?:nan|NaN|NAN))$""",
+
+            [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
+            |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
+            |\\.[0-9_]+(?:[eE][-+][0-9]+)?
+            |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
+            |[-+]?\\.(?:inf|Inf|INF)
+            |\\.(?:nan|NaN|NAN))$
+
+            """,
             re.X,
         ),
         list("-+0123456789."),
@@ -192,9 +195,9 @@ class DataConfig:
     )
 
     def __post_init__(self):
-        assert (
-            len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
-        ), "There should be at-least one feature defined in categorical, continuous, or date columns"
+        assert len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0, (
+            "There should be at-least one feature defined in categorical, continuous, or date columns"
+        )
         _validate_choices(self)
         if os.name == "nt" and self.num_workers != 0:
             print("Windows does not support num_workers > 0. Setting num_workers to 0")
@@ -255,9 +258,9 @@ class InferredConfig:
 
     def __post_init__(self):
         if self.embedding_dims is not None:
-            assert all(
-                (isinstance(t, Iterable) and len(t) == 2) for t in self.embedding_dims
-            ), "embedding_dims must be a list of tuples (cardinality, embedding_dim)"
+            assert all((isinstance(t, Iterable) and len(t) == 2) for t in self.embedding_dims), (
+                "embedding_dims must be a list of tuples (cardinality, embedding_dim)"
+            )
             self.embedded_cat_dim = sum([t[1] for t in self.embedding_dims])
         else:
             self.embedded_cat_dim = 0
@@ -581,24 +584,25 @@ def __post_init__(self):
 
 @dataclass
 class ExperimentConfig:
-    """Experiment configuration. Experiment Tracking with WandB and Tensorboard.
+    """Experiment configuration.
 
-    Args:
-        project_name (str): The name of the project under which all runs will be logged. For Tensorboard
-                this defines the folder under which the logs will be saved and for W&B it defines the project name
+    Experiment Tracking with WandB and Tensorboard.
+        Args:
+            project_name (str): The name of the project under which all runs will be logged. For Tensorboard
+                    this defines the folder under which the logs will be saved and for W&B it defines the project name
 
-        run_name (Optional[str]): The name of the run; a specific identifier to recognize the run. If left
-                blank, will be assigned an auto-generated name
+            run_name (Optional[str]): The name of the run; a specific identifier to recognize the run. If left
+                    blank, will be assigned an auto-generated name
 
-        exp_watch (Optional[str]): The level of logging required.  Can be `gradients`, `parameters`, `all`
-                or `None`. Defaults to None. Choices are: [`gradients`,`parameters`,`all`,`None`].
+            exp_watch (Optional[str]): The level of logging required.  Can be `gradients`, `parameters`, `all`
+                    or `None`. Defaults to None. Choices are: [`gradients`,`parameters`,`all`,`None`].
 
-        log_target (str): Determines where logging happens - Tensorboard or W&B. Choices are:
-                [`wandb`,`tensorboard`].
+            log_target (str): Determines where logging happens - Tensorboard or W&B. Choices are:
+                    [`wandb`,`tensorboard`].
 
-        log_logits (bool): Turn this on to log the logits as a histogram in W&B
+            log_logits (bool): Turn this on to log the logits as a histogram in W&B
 
-        exp_log_freq (int): step count between logging of gradients and parameters.
+            exp_log_freq (int): step count between logging of gradients and parameters.
 
     """
 
@@ -730,8 +734,8 @@ def __init__(
         self,
         exp_version_manager: str = ".pt_tmp/exp_version_manager.yml",
     ) -> None:
-        """The manages the versions of the experiments based on the name. It is a simple dictionary(yaml) based lookup.
-        Primary purpose is to avoid overwriting of saved models while running the training without changing the
+        """The manages the versions of the experiments based on the name. Primary purpose is to avoid overwriting of
+        saved models while running the training without changing the It is a simple dictionary(yaml) based lookup.
         experiment name.
 
         Args:

diff --git a/src/pytorch_tabular/feature_extractor.py b/src/pytorch_tabular/feature_extractor.py
@@ -59,7 +59,6 @@ def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
             pd.DataFrame: The encoded dataframe
 
         """
-
         X_encoded = X.copy(deep=True)
         orig_features = X_encoded.columns
         self.tabular_model.model.eval()

diff --git a/src/pytorch_tabular/models/category_embedding/config.py b/src/pytorch_tabular/models/category_embedding/config.py
@@ -98,7 +98,7 @@ class CategoryEmbeddingModelConfig(ModelConfig):
     )
     use_batch_norm: bool = field(
         default=False,
-        metadata={"help": ("Flag to include a BatchNorm layer after each Linear Layer+DropOut." " Defaults to False")},
+        metadata={"help": ("Flag to include a BatchNorm layer after each Linear Layer+DropOut. Defaults to False")},
     )
     initialization: str = field(
         default="kaiming",

diff --git a/src/pytorch_tabular/models/common/heads/config.py b/src/pytorch_tabular/models/common/heads/config.py
@@ -6,8 +6,8 @@
 
 @dataclass
 class LinearHeadConfig:
-    """A model class for Linear Head configuration; serves as a template and documentation. The models take a
-    dictionary as input, but if there are keys which are not present in this model class, it'll throw an exception.
+    """A model class for Linear Head configuration; serves as a template and documentation. dictionary as input, but if
+    there are keys which are not present in this model class, it'll throw an exception. The models take a.
 
     Args:
         layers (str): Hyphen-separated number of layers and units in the classification/regression head.

diff --git a/src/pytorch_tabular/models/common/layers/activations.py b/src/pytorch_tabular/models/common/layers/activations.py
@@ -151,8 +151,8 @@ def _sparsemax_threshold_and_support(X, dim=-1, k=None):
         the threshold value for each vector
     support_size : torch LongTensor, shape like `tau`
         the number of nonzeros in each vector.
-    """
 
+    """
     if k is None or k >= X.shape[dim]:  # do full sort
         topk, _ = torch.sort(X, dim=dim, descending=True)
     else:
@@ -204,7 +204,6 @@ def _entmax_threshold_and_support(X, dim=-1, k=None):
         the number of nonzeros in each vector.
 
     """
-
     if k is None or k >= X.shape[dim]:  # do full sort
         Xsrt, _ = torch.sort(X, dim=dim, descending=True)
     else:
@@ -288,7 +287,7 @@ def backward(cls, ctx, dY):
 
 
 def sparsemax(X, dim=-1, k=None):
-    """sparsemax: normalizing sparse transform (a la softmax).
+    """Sparsemax: normalizing sparse transform (a la softmax).
 
     Solves the projection:
 
@@ -313,8 +312,8 @@ def sparsemax(X, dim=-1, k=None):
     -------
     P : torch tensor, same shape as X
         The projection result, such that P.sum(dim=dim) == 1 elementwise.
-    """
 
+    """
     return SparsemaxFunction.apply(X, dim, k)
 
 
@@ -347,13 +346,12 @@ def entmax15(X, dim=-1, k=None):
     P : torch tensor, same shape as X
         The projection result, such that P.sum(dim=dim) == 1 elementwise.
     """
-
     return Entmax15Function.apply(X, dim, k)
 
 
 class Sparsemax(nn.Module):
     def __init__(self, dim=-1, k=None):
-        """sparsemax: normalizing sparse transform (a la softmax).
+        """Sparsemax: normalizing sparse transform (a la softmax).
 
         Solves the projection:
 
@@ -370,6 +368,7 @@ def __init__(self, dim=-1, k=None):
             nonzeros in the solution. If the solution is more than k-sparse,
             this function is recursively called with a 2*k schedule.
             If `None`, full sorting is performed from the beginning.
+
         """
         self.dim = dim
         self.k = k

diff --git a/src/pytorch_tabular/models/common/layers/embeddings.py b/src/pytorch_tabular/models/common/layers/embeddings.py
@@ -84,12 +84,12 @@ def forward(self, x: Dict[str, Any]) -> torch.Tensor:
             x.get("continuous", torch.empty(0, 0)),
             x.get("categorical", torch.empty(0, 0)),
         )
-        assert (
-            categorical_data.shape[1] == self.categorical_dim
-        ), "categorical_data must have same number of columns as categorical embedding layers"
-        assert (
-            continuous_data.shape[1] == self.continuous_dim
-        ), "continuous_data must have same number of columns as continuous dim"
+        assert categorical_data.shape[1] == self.categorical_dim, (
+            "categorical_data must have same number of columns as categorical embedding layers"
+        )
+        assert continuous_data.shape[1] == self.continuous_dim, (
+            "continuous_data must have same number of columns as continuous dim"
+        )
         embed = None
         if continuous_data.shape[1] > 0:
             if self.batch_norm_continuous_input:
@@ -141,12 +141,12 @@ def forward(self, x: Dict[str, Any]) -> torch.Tensor:
             x.get("continuous", torch.empty(0, 0)),
             x.get("categorical", torch.empty(0, 0)),
         )
-        assert categorical_data.shape[1] == len(
-            self.cat_embedding_layers
-        ), "categorical_data must have same number of columns as categorical embedding layers"
-        assert (
-            continuous_data.shape[1] == self.continuous_dim
-        ), "continuous_data must have same number of columns as continuous dim"
+        assert categorical_data.shape[1] == len(self.cat_embedding_layers), (
+            "categorical_data must have same number of columns as categorical embedding layers"
+        )
+        assert continuous_data.shape[1] == self.continuous_dim, (
+            "continuous_data must have same number of columns as continuous dim"
+        )
         embed = None
         if continuous_data.shape[1] > 0:
             if self.batch_norm_continuous_input:
@@ -273,12 +273,12 @@ def forward(self, x: Dict[str, Any]) -> torch.Tensor:
             x.get("continuous", torch.empty(0, 0)),
             x.get("categorical", torch.empty(0, 0)),
         )
-        assert categorical_data.shape[1] == len(
-            self.cat_embedding_layers
-        ), "categorical_data must have same number of columns as categorical embedding layers"
-        assert (
-            continuous_data.shape[1] == self.continuous_dim
-        ), "continuous_data must have same number of columns as continuous dim"
+        assert categorical_data.shape[1] == len(self.cat_embedding_layers), (
+            "categorical_data must have same number of columns as categorical embedding layers"
+        )
+        assert continuous_data.shape[1] == self.continuous_dim, (
+            "continuous_data must have same number of columns as continuous dim"
+        )
         embed = None
         if continuous_data.shape[1] > 0:
             cont_idx = torch.arange(self.continuous_dim, device=continuous_data.device).expand(

diff --git a/src/pytorch_tabular/models/gate/config.py b/src/pytorch_tabular/models/gate/config.py
@@ -173,7 +173,7 @@ def __post_init__(self):
         assert self.tree_depth > 0, "tree_depth should be greater than 0"
         # Either gflu_stages or num_trees should be greater than 0
         assert self.num_trees > 0, (
-            "`num_trees` must be greater than 0." "If you want a lighter model which performs better, use GANDALF."
+            "`num_trees` must be greater than 0.If you want a lighter model which performs better, use GANDALF."
         )
         super().__post_init__()
 

diff --git a/src/pytorch_tabular/models/gate/gate_model.py b/src/pytorch_tabular/models/gate/gate_model.py
@@ -51,12 +51,12 @@ def __init__(
         embedding_dropout: float = 0.0,
     ):
         super().__init__()
-        assert (
-            binning_activation in self.BINARY_ACTIVATION_MAP.keys()
-        ), f"`binning_activation should be one of {self.BINARY_ACTIVATION_MAP.keys()}"
-        assert (
-            feature_mask_function in self.ACTIVATION_MAP.keys()
-        ), f"`feature_mask_function should be one of {self.ACTIVATION_MAP.keys()}"
+        assert binning_activation in self.BINARY_ACTIVATION_MAP.keys(), (
+            f"`binning_activation should be one of {self.BINARY_ACTIVATION_MAP.keys()}"
+        )
+        assert feature_mask_function in self.ACTIVATION_MAP.keys(), (
+            f"`feature_mask_function should be one of {self.ACTIVATION_MAP.keys()}"
+        )
 
         self.gflu_stages = gflu_stages
         self.num_trees = num_trees

diff --git a/src/pytorch_tabular/models/mixture_density/config.py b/src/pytorch_tabular/models/mixture_density/config.py
@@ -87,9 +87,9 @@ class MDNConfig(ModelConfig):
     _probabilistic: bool = field(default=True)
 
     def __post_init__(self):
-        assert (
-            self.backbone_config_class not in INCOMPATIBLE_BACKBONES
-        ), f"{self.backbone_config_class} is not a supported backbone for MDN head"
+        assert self.backbone_config_class not in INCOMPATIBLE_BACKBONES, (
+            f"{self.backbone_config_class} is not a supported backbone for MDN head"
+        )
         assert self.head == "MixtureDensityHead"
         return super().__post_init__()
 

diff --git a/src/pytorch_tabular/models/node/config.py b/src/pytorch_tabular/models/node/config.py
@@ -168,15 +168,15 @@ class NodeConfig(ModelConfig):
     threshold_init_beta: float = field(
         default=1.0,
         metadata={
-            "help": """
-                Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
-                (with a beta distribution) to feature values in the first batch.
-                It initializes threshold to a q-th quantile of data points.
-                where q ~ Beta(:threshold_init_beta:, :threshold_init_beta:)
-                If this param is set to 1, initial thresholds will have the same distribution as data points
-                If greater than 1 (e.g. 10), thresholds will be closer to median data value
-                If less than 1 (e.g. 0.1), thresholds will approach min/max data values.
-            """
+            "help": """Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
+                    (with a beta distribution) to feature values in the first batch.
+
+                    It initializes threshold to a q-th quantile of data points. where q ~ Beta(:threshold_init_beta:,
+                    :threshold_init_beta:) If this param is set to 1, initial thresholds will have the same distribution
+                    as data points If greater than 1 (e.g. 10), thresholds will be closer to median data value If less
+                    than 1 (e.g. 0.1), thresholds will approach min/max data values.
+
+                    """
         },
     )
     threshold_init_cutoff: float = field(

diff --git a/src/pytorch_tabular/models/stacking/config.py b/src/pytorch_tabular/models/stacking/config.py
@@ -5,8 +5,8 @@
 
 @dataclass
 class StackingModelConfig(ModelConfig):
-    """StackingModelConfig is a configuration class for the StackingModel. It is used to stack multiple models
-    together. Now, CategoryEmbeddingModel, TabNetModel, FTTransformerModel, GatedAdditiveTreeEnsembleModel, DANetModel,
+    """StackingModelConfig is a configuration class for the StackingModel. together. Now, CategoryEmbeddingModel,
+    TabNetModel, FTTransformerModel, GatedAdditiveTreeEnsembleModel, DANetModel, It is used to stack multiple models
     AutoIntModel, GANDALFModel, NodeModel are supported.
 
     Args: