reformat all codes

Winter Deng · Winter Deng · commit 10c2d9e82adc · 2025-10-09T14:09:31.000+08:00
diff --git a/libmultilabel/common_utils.py b/libmultilabel/common_utils.py
@@ -82,7 +82,7 @@ def argsort_top_k(vals, k, axis=-1):
         k: Consider only the top k elements for each query
         axis: Axis along which to sort. The default is -1 (the last axis).
 
-    Returns: 
+    Returns:
         Array of indices that sort vals along the specified axis.
     """
     unsorted_top_k_idx = np.argpartition(vals, -k, axis=axis)[:, -k:]
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
@@ -56,7 +56,7 @@ def __init__(
         self.flat_model = flat_model
         self.node_ptr = node_ptr
         self.multiclass = False
-        self._model_separated = False # Indicates whether the model has been separated for pruning tree.
+        self._model_separated = False  # Indicates whether the model has been separated for pruning tree.
 
     def predict_values(
         self,
@@ -75,44 +75,42 @@ def predict_values(
         if beam_width >= len(self.root.children):
             # Beam_width is sufficiently large; pruning not applied.
             # Calculates decision values for all nodes.
-            all_preds = linear.predict_values(self.flat_model, x) # number of instances * (number of labels + total number of metalabels)
+            all_preds = linear.predict_values(
+                self.flat_model, x
+            )  # number of instances * (number of labels + total number of metalabels)
         else:
             # Beam_width is small; pruning applied to reduce computation.
             if not self._model_separated:
                 self._separate_model_for_pruning_tree()
                 self._model_separated = True
-            all_preds = self._prune_tree_and_predict_values(x, beam_width) # number of instances * (number of labels + total number of metalabels)
+            all_preds = self._prune_tree_and_predict_values(
+                x, beam_width
+            )  # number of instances * (number of labels + total number of metalabels)
         return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])])
 
     def _separate_model_for_pruning_tree(self):
         """
         This function separates the weights for the root node and its children into (K+1) FlatModel
         for efficient beam search traversal in Python.
         """
-        tree_flat_model_params = {
-            'bias': self.root.model.bias,
-            'thresholds': 0,
-            'multiclass': False
-        }
+        tree_flat_model_params = {"bias": self.root.model.bias, "thresholds": 0, "multiclass": False}
         slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
         self.root_model = linear.FlatModel(
-            name="root-flattened-tree",
-            weights=self.flat_model.weights[slice].tocsr(),
-            **tree_flat_model_params
+            name="root-flattened-tree", weights=self.flat_model.weights[slice].tocsr(), **tree_flat_model_params
         )
 
         self.subtree_models = []
         for i in range(len(self.root.children)):
             subtree_weights_start = self.node_ptr[self.root.children[i].index]
-            subtree_weights_end = self.node_ptr[self.root.children[i+1].index] if i+1 < len(self.root.children) else self.node_ptr[-1]
+            subtree_weights_end = (
+                self.node_ptr[self.root.children[i + 1].index] if i + 1 < len(self.root.children) else self.node_ptr[-1]
+            )
             slice = np.s_[:, subtree_weights_start:subtree_weights_end]
             subtree_flatmodel = linear.FlatModel(
-                name="subtree-flattened-tree",
-                weights=self.flat_model.weights[slice].tocsr(),
-                **tree_flat_model_params
+                name="subtree-flattened-tree", weights=self.flat_model.weights[slice].tocsr(), **tree_flat_model_params
             )
             self.subtree_models.append(subtree_flatmodel)
-        
+
     def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int) -> np.ndarray:
         """Calculates the selective decision values associated with instances x by evaluating only the most relevant subtrees.
 
@@ -143,7 +141,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int)
         # Build a mask where mask[i, j] is True if the j-th subtree is among the top beam_width subtrees for the i-th instance
         mask = np.zeros_like(children_scores, dtype=np.bool_)
         np.put_along_axis(mask, top_beam_width_indices, True, axis=1)
-        
+
         # Calculate predictions for each subtree with its corresponding instances
         for subtree_idx in range(len(self.root.children)):
             subtree_model = self.subtree_models[subtree_idx]
@@ -427,7 +425,7 @@ def train_ensemble_tree(
     seed: int = None,
 ) -> EnsembleTreeModel:
     """Trains an ensemble of tree models (Parabel/Bonsai-style).
-    
+
     Args:
         y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
         x (sparse.csr_matrix): A matrix with dimensions number of instances * number of features.
@@ -443,7 +441,7 @@ def train_ensemble_tree(
     """
     if seed is None:
         seed = 42
-        
+
     tree_models = []
     for i in range(n_trees):
         np.random.seed(seed + i)
diff --git a/libmultilabel/linear/utils.py b/libmultilabel/linear/utils.py
@@ -76,7 +76,13 @@ class MultiLabelEstimator(sklearn.base.BaseEstimator):
         scoring_metric (str, optional): The scoring metric. Defaults to 'P@1'.
     """
 
-    def __init__(self, options: str = "", linear_technique: str = "1vsrest", scoring_metric: str = "P@1", multiclass: bool = False):
+    def __init__(
+        self,
+        options: str = "",
+        linear_technique: str = "1vsrest",
+        scoring_metric: str = "P@1",
+        multiclass: bool = False,
+    ):
         super().__init__()
         self.options = options
         self.linear_technique = linear_technique
@@ -97,9 +103,7 @@ def predict(self, X: sparse.csr_matrix) -> np.ndarray:
 
     def score(self, X: sparse.csr_matrix, y: sparse.csr_matrix) -> float:
         metrics = linear.get_metrics(
-            monitor_metrics=[self.scoring_metric],
-            num_classes=y.shape[1],
-            multiclass=self.multiclass
+            monitor_metrics=[self.scoring_metric], num_classes=y.shape[1], multiclass=self.multiclass
         )
         preds = self.predict(X)
         metrics.update(preds, y.toarray())
diff --git a/main.py b/main.py
@@ -41,7 +41,11 @@ def add_all_arguments(parser):
     parser.add_argument("--checkpoint_path", help="The checkpoint to warm-up with (default: %(default)s)")
 
     # data
-    parser.add_argument("--data_name", default="unnamed_data", help="Dataset name for generating the output directory (default: %(default)s)")
+    parser.add_argument(
+        "--data_name",
+        default="unnamed_data",
+        help="Dataset name for generating the output directory (default: %(default)s)",
+    )
     parser.add_argument("--training_file", help="Path to training data (default: %(default)s)")
     parser.add_argument("--val_file", help="Path to validation data (default: %(default)s)")
     parser.add_argument("--test_file", help="Path to test data (default: %(default)s)")
@@ -104,7 +108,9 @@ def add_all_arguments(parser):
     # pretrained vocab / embeddings
     parser.add_argument("--vocab_file", type=str, help="Path to a file holding vocabuaries (default: %(default)s)")
     parser.add_argument(
-        "--embed_file", type=str, help="Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding (default: %(default)s)"
+        "--embed_file",
+        type=str,
+        help="Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding (default: %(default)s)",
     )
 
     # train
@@ -235,7 +241,10 @@ def add_all_arguments(parser):
         "--tree_max_depth", type=int, default=10, help="Maximum depth of the tree (default: %(default)s)"
     )
     parser.add_argument(
-        "--tree_ensemble_models", type=int, default=1, help="Number of models in the tree ensemble (default: %(default)s)"
+        "--tree_ensemble_models",
+        type=int,
+        default=1,
+        help="Number of models in the tree ensemble (default: %(default)s)",
     )
     parser.add_argument(
         "--beam_width",
diff --git a/search_params.py b/search_params.py
@@ -191,16 +191,18 @@ def load_static_data(config):
     )
     return {
         "datasets": datasets,
-        "word_dict": None
-        if config.embed_file is None
-        else data_utils.load_or_build_text_dict(
-            dataset=datasets["train"],
-            vocab_file=config.vocab_file,
-            min_vocab_freq=config.min_vocab_freq,
-            embed_file=config.embed_file,
-            embed_cache_dir=config.embed_cache_dir,
-            silent=config.silent,
-            normalize_embed=config.normalize_embed,
+        "word_dict": (
+            None
+            if config.embed_file is None
+            else data_utils.load_or_build_text_dict(
+                dataset=datasets["train"],
+                vocab_file=config.vocab_file,
+                min_vocab_freq=config.min_vocab_freq,
+                embed_file=config.embed_file,
+                embed_cache_dir=config.embed_cache_dir,
+                silent=config.silent,
+                normalize_embed=config.normalize_embed,
+            )
         ),
         "classes": data_utils.load_or_build_label(datasets, config.label_file, config.include_test_labels),
     }