Skip to content

[pre-commit.ci] pre-commit suggestions #525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ repos:
- id: detect-private-key

- repo: https://github.com/PyCQA/docformatter
rev: 06907d0267368b49b9180eed423fae5697c1e909 # todo: fix for docformatter after last 1.7.5
rev: v1.7.8-rc1 # todo: fix for docformatter after last 1.7.5
hooks:
- id: docformatter
additional_dependencies: [tomli]
args: ["--in-place"]

- repo: https://github.com/executablebooks/mdformat
rev: 0.7.19
rev: 0.7.22
hooks:
- id: mdformat
additional_dependencies:
Expand All @@ -48,7 +48,7 @@ repos:
)

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.3
rev: v0.12.5
hooks:
- id: ruff
args: ["--fix"]
Expand Down
18 changes: 9 additions & 9 deletions examples/__only_for_dev__/to_test_regression_custom_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,15 @@ class MultiStageModelConfig(ModelConfig):
threshold_init_beta: float = field(
default=1.0,
metadata={
"help": """
Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
(with a beta distribution) to feature values in the first batch.
It initializes threshold to a q-th quantile of data points.
where q ~ Beta(:threshold_init_beta:, :threshold_init_beta:)
If this param is set to 1, initial thresholds will have the same distribution as data points
If greater than 1 (e.g. 10), thresholds will be closer to median data value
If less than 1 (e.g. 0.1), thresholds will approach min/max data values.
"""
"help": """Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
(with a beta distribution) to feature values in the first batch.

It initializes threshold to a q-th quantile of data points. where q ~ Beta(:threshold_init_beta:,
:threshold_init_beta:) If this param is set to 1, initial thresholds will have the same distribution
as data points If greater than 1 (e.g. 10), thresholds will be closer to median data value If less
than 1 (e.g. 0.1), thresholds will approach min/max data values.

"""
},
)
threshold_init_cutoff: float = field(
Expand Down
12 changes: 6 additions & 6 deletions src/pytorch_tabular/categorical_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ def transform(self, X):
raise ValueError("`fit` method must be called before `transform`.")
assert all(c in X.columns for c in self.cols)
if self.handle_missing == "error":
assert (
not X[self.cols].isnull().any().any()
), "`handle_missing` = `error` and missing values found in columns to encode."
assert not X[self.cols].isnull().any().any(), (
"`handle_missing` = `error` and missing values found in columns to encode."
)
X_encoded = X.copy(deep=True)
category_cols = X_encoded.select_dtypes(include="category").columns
X_encoded[category_cols] = X_encoded[category_cols].astype("object")
Expand Down Expand Up @@ -153,9 +153,9 @@ def fit(self, X, y=None):
"""
self._before_fit_check(X, y)
if self.handle_missing == "error":
assert (
not X[self.cols].isnull().any().any()
), "`handle_missing` = `error` and missing values found in columns to encode."
assert not X[self.cols].isnull().any().any(), (
"`handle_missing` = `error` and missing values found in columns to encode."
)
for col in self.cols:
map = Series(unique(X[col].fillna(NAN_CATEGORY)), name=col).reset_index().rename(columns={"index": "value"})
map["value"] += 1
Expand Down
56 changes: 30 additions & 26 deletions src/pytorch_tabular/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@ def _read_yaml(filename):
"tag:yaml.org,2002:float",
re.compile(
"""^(?:
[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
|[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
|\\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
|[-+]?\\.(?:inf|Inf|INF)
|\\.(?:nan|NaN|NAN))$""",

[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
|[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
|\\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
|[-+]?\\.(?:inf|Inf|INF)
|\\.(?:nan|NaN|NAN))$

""",
re.X,
),
list("-+0123456789."),
Expand Down Expand Up @@ -192,9 +195,9 @@ class DataConfig:
)

def __post_init__(self):
assert (
len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
), "There should be at-least one feature defined in categorical, continuous, or date columns"
assert len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0, (
"There should be at-least one feature defined in categorical, continuous, or date columns"
)
_validate_choices(self)
if os.name == "nt" and self.num_workers != 0:
print("Windows does not support num_workers > 0. Setting num_workers to 0")
Expand Down Expand Up @@ -255,9 +258,9 @@ class InferredConfig:

def __post_init__(self):
if self.embedding_dims is not None:
assert all(
(isinstance(t, Iterable) and len(t) == 2) for t in self.embedding_dims
), "embedding_dims must be a list of tuples (cardinality, embedding_dim)"
assert all((isinstance(t, Iterable) and len(t) == 2) for t in self.embedding_dims), (
"embedding_dims must be a list of tuples (cardinality, embedding_dim)"
)
self.embedded_cat_dim = sum([t[1] for t in self.embedding_dims])
else:
self.embedded_cat_dim = 0
Expand Down Expand Up @@ -581,24 +584,25 @@ def __post_init__(self):

@dataclass
class ExperimentConfig:
"""Experiment configuration. Experiment Tracking with WandB and Tensorboard.
"""Experiment configuration.

Args:
project_name (str): The name of the project under which all runs will be logged. For Tensorboard
this defines the folder under which the logs will be saved and for W&B it defines the project name
Experiment Tracking with WandB and Tensorboard.
Args:
project_name (str): The name of the project under which all runs will be logged. For Tensorboard
this defines the folder under which the logs will be saved and for W&B it defines the project name

run_name (Optional[str]): The name of the run; a specific identifier to recognize the run. If left
blank, will be assigned an auto-generated name
run_name (Optional[str]): The name of the run; a specific identifier to recognize the run. If left
blank, will be assigned an auto-generated name

exp_watch (Optional[str]): The level of logging required. Can be `gradients`, `parameters`, `all`
or `None`. Defaults to None. Choices are: [`gradients`,`parameters`,`all`,`None`].
exp_watch (Optional[str]): The level of logging required. Can be `gradients`, `parameters`, `all`
or `None`. Defaults to None. Choices are: [`gradients`,`parameters`,`all`,`None`].

log_target (str): Determines where logging happens - Tensorboard or W&B. Choices are:
[`wandb`,`tensorboard`].
log_target (str): Determines where logging happens - Tensorboard or W&B. Choices are:
[`wandb`,`tensorboard`].

log_logits (bool): Turn this on to log the logits as a histogram in W&B
log_logits (bool): Turn this on to log the logits as a histogram in W&B

exp_log_freq (int): step count between logging of gradients and parameters.
exp_log_freq (int): step count between logging of gradients and parameters.

"""

Expand Down Expand Up @@ -730,8 +734,8 @@ def __init__(
self,
exp_version_manager: str = ".pt_tmp/exp_version_manager.yml",
) -> None:
"""The manages the versions of the experiments based on the name. It is a simple dictionary(yaml) based lookup.
Primary purpose is to avoid overwriting of saved models while running the training without changing the
"""The manages the versions of the experiments based on the name. Primary purpose is to avoid overwriting of
saved models while running the training without changing the It is a simple dictionary(yaml) based lookup.
experiment name.

Args:
Expand Down
1 change: 0 additions & 1 deletion src/pytorch_tabular/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
pd.DataFrame: The encoded dataframe

"""

X_encoded = X.copy(deep=True)
orig_features = X_encoded.columns
self.tabular_model.model.eval()
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/models/category_embedding/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class CategoryEmbeddingModelConfig(ModelConfig):
)
use_batch_norm: bool = field(
default=False,
metadata={"help": ("Flag to include a BatchNorm layer after each Linear Layer+DropOut." " Defaults to False")},
metadata={"help": ("Flag to include a BatchNorm layer after each Linear Layer+DropOut. Defaults to False")},
)
initialization: str = field(
default="kaiming",
Expand Down
4 changes: 2 additions & 2 deletions src/pytorch_tabular/models/common/heads/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

@dataclass
class LinearHeadConfig:
"""A model class for Linear Head configuration; serves as a template and documentation. The models take a
dictionary as input, but if there are keys which are not present in this model class, it'll throw an exception.
"""A model class for Linear Head configuration; serves as a template and documentation. dictionary as input, but if
there are keys which are not present in this model class, it'll throw an exception. The models take a.

Args:
layers (str): Hyphen-separated number of layers and units in the classification/regression head.
Expand Down
11 changes: 5 additions & 6 deletions src/pytorch_tabular/models/common/layers/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ def _sparsemax_threshold_and_support(X, dim=-1, k=None):
the threshold value for each vector
support_size : torch LongTensor, shape like `tau`
the number of nonzeros in each vector.
"""

"""
if k is None or k >= X.shape[dim]: # do full sort
topk, _ = torch.sort(X, dim=dim, descending=True)
else:
Expand Down Expand Up @@ -204,7 +204,6 @@ def _entmax_threshold_and_support(X, dim=-1, k=None):
the number of nonzeros in each vector.

"""

if k is None or k >= X.shape[dim]: # do full sort
Xsrt, _ = torch.sort(X, dim=dim, descending=True)
else:
Expand Down Expand Up @@ -288,7 +287,7 @@ def backward(cls, ctx, dY):


def sparsemax(X, dim=-1, k=None):
"""sparsemax: normalizing sparse transform (a la softmax).
"""Sparsemax: normalizing sparse transform (a la softmax).

Solves the projection:

Expand All @@ -313,8 +312,8 @@ def sparsemax(X, dim=-1, k=None):
-------
P : torch tensor, same shape as X
The projection result, such that P.sum(dim=dim) == 1 elementwise.
"""

"""
return SparsemaxFunction.apply(X, dim, k)


Expand Down Expand Up @@ -347,13 +346,12 @@ def entmax15(X, dim=-1, k=None):
P : torch tensor, same shape as X
The projection result, such that P.sum(dim=dim) == 1 elementwise.
"""

return Entmax15Function.apply(X, dim, k)


class Sparsemax(nn.Module):
def __init__(self, dim=-1, k=None):
"""sparsemax: normalizing sparse transform (a la softmax).
"""Sparsemax: normalizing sparse transform (a la softmax).

Solves the projection:

Expand All @@ -370,6 +368,7 @@ def __init__(self, dim=-1, k=None):
nonzeros in the solution. If the solution is more than k-sparse,
this function is recursively called with a 2*k schedule.
If `None`, full sorting is performed from the beginning.

"""
self.dim = dim
self.k = k
Expand Down
36 changes: 18 additions & 18 deletions src/pytorch_tabular/models/common/layers/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ def forward(self, x: Dict[str, Any]) -> torch.Tensor:
x.get("continuous", torch.empty(0, 0)),
x.get("categorical", torch.empty(0, 0)),
)
assert (
categorical_data.shape[1] == self.categorical_dim
), "categorical_data must have same number of columns as categorical embedding layers"
assert (
continuous_data.shape[1] == self.continuous_dim
), "continuous_data must have same number of columns as continuous dim"
assert categorical_data.shape[1] == self.categorical_dim, (
"categorical_data must have same number of columns as categorical embedding layers"
)
assert continuous_data.shape[1] == self.continuous_dim, (
"continuous_data must have same number of columns as continuous dim"
)
embed = None
if continuous_data.shape[1] > 0:
if self.batch_norm_continuous_input:
Expand Down Expand Up @@ -141,12 +141,12 @@ def forward(self, x: Dict[str, Any]) -> torch.Tensor:
x.get("continuous", torch.empty(0, 0)),
x.get("categorical", torch.empty(0, 0)),
)
assert categorical_data.shape[1] == len(
self.cat_embedding_layers
), "categorical_data must have same number of columns as categorical embedding layers"
assert (
continuous_data.shape[1] == self.continuous_dim
), "continuous_data must have same number of columns as continuous dim"
assert categorical_data.shape[1] == len(self.cat_embedding_layers), (
"categorical_data must have same number of columns as categorical embedding layers"
)
assert continuous_data.shape[1] == self.continuous_dim, (
"continuous_data must have same number of columns as continuous dim"
)
embed = None
if continuous_data.shape[1] > 0:
if self.batch_norm_continuous_input:
Expand Down Expand Up @@ -273,12 +273,12 @@ def forward(self, x: Dict[str, Any]) -> torch.Tensor:
x.get("continuous", torch.empty(0, 0)),
x.get("categorical", torch.empty(0, 0)),
)
assert categorical_data.shape[1] == len(
self.cat_embedding_layers
), "categorical_data must have same number of columns as categorical embedding layers"
assert (
continuous_data.shape[1] == self.continuous_dim
), "continuous_data must have same number of columns as continuous dim"
assert categorical_data.shape[1] == len(self.cat_embedding_layers), (
"categorical_data must have same number of columns as categorical embedding layers"
)
assert continuous_data.shape[1] == self.continuous_dim, (
"continuous_data must have same number of columns as continuous dim"
)
embed = None
if continuous_data.shape[1] > 0:
cont_idx = torch.arange(self.continuous_dim, device=continuous_data.device).expand(
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/models/gate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def __post_init__(self):
assert self.tree_depth > 0, "tree_depth should be greater than 0"
# Either gflu_stages or num_trees should be greater than 0
assert self.num_trees > 0, (
"`num_trees` must be greater than 0." "If you want a lighter model which performs better, use GANDALF."
"`num_trees` must be greater than 0.If you want a lighter model which performs better, use GANDALF."
)
super().__post_init__()

Expand Down
12 changes: 6 additions & 6 deletions src/pytorch_tabular/models/gate/gate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ def __init__(
embedding_dropout: float = 0.0,
):
super().__init__()
assert (
binning_activation in self.BINARY_ACTIVATION_MAP.keys()
), f"`binning_activation should be one of {self.BINARY_ACTIVATION_MAP.keys()}"
assert (
feature_mask_function in self.ACTIVATION_MAP.keys()
), f"`feature_mask_function should be one of {self.ACTIVATION_MAP.keys()}"
assert binning_activation in self.BINARY_ACTIVATION_MAP.keys(), (
f"`binning_activation should be one of {self.BINARY_ACTIVATION_MAP.keys()}"
)
assert feature_mask_function in self.ACTIVATION_MAP.keys(), (
f"`feature_mask_function should be one of {self.ACTIVATION_MAP.keys()}"
)

self.gflu_stages = gflu_stages
self.num_trees = num_trees
Expand Down
6 changes: 3 additions & 3 deletions src/pytorch_tabular/models/mixture_density/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ class MDNConfig(ModelConfig):
_probabilistic: bool = field(default=True)

def __post_init__(self):
assert (
self.backbone_config_class not in INCOMPATIBLE_BACKBONES
), f"{self.backbone_config_class} is not a supported backbone for MDN head"
assert self.backbone_config_class not in INCOMPATIBLE_BACKBONES, (
f"{self.backbone_config_class} is not a supported backbone for MDN head"
)
assert self.head == "MixtureDensityHead"
return super().__post_init__()

Expand Down
18 changes: 9 additions & 9 deletions src/pytorch_tabular/models/node/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,15 @@ class NodeConfig(ModelConfig):
threshold_init_beta: float = field(
default=1.0,
metadata={
"help": """
Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
(with a beta distribution) to feature values in the first batch.
It initializes threshold to a q-th quantile of data points.
where q ~ Beta(:threshold_init_beta:, :threshold_init_beta:)
If this param is set to 1, initial thresholds will have the same distribution as data points
If greater than 1 (e.g. 10), thresholds will be closer to median data value
If less than 1 (e.g. 0.1), thresholds will approach min/max data values.
"""
"help": """Used in the Data-aware initialization of thresholds where the threshold is initialized randomly
(with a beta distribution) to feature values in the first batch.

It initializes threshold to a q-th quantile of data points. where q ~ Beta(:threshold_init_beta:,
:threshold_init_beta:) If this param is set to 1, initial thresholds will have the same distribution
as data points If greater than 1 (e.g. 10), thresholds will be closer to median data value If less
than 1 (e.g. 0.1), thresholds will approach min/max data values.

"""
},
)
threshold_init_cutoff: float = field(
Expand Down
4 changes: 2 additions & 2 deletions src/pytorch_tabular/models/stacking/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

@dataclass
class StackingModelConfig(ModelConfig):
"""StackingModelConfig is a configuration class for the StackingModel. It is used to stack multiple models
together. Now, CategoryEmbeddingModel, TabNetModel, FTTransformerModel, GatedAdditiveTreeEnsembleModel, DANetModel,
"""StackingModelConfig is a configuration class for the StackingModel. together. Now, CategoryEmbeddingModel,
TabNetModel, FTTransformerModel, GatedAdditiveTreeEnsembleModel, DANetModel, It is used to stack multiple models
AutoIntModel, GANDALFModel, NodeModel are supported.

Args:
Expand Down
Loading
Loading