reformat all codes

Winter Deng · Winter Deng · commit b04e1cc91a76 · 2025-10-09T14:09:31.000+08:00
diff --git a/docs/cli/classifier.py b/docs/cli/classifier.py
@@ -11,27 +11,37 @@
 
 import subprocess
 
+
 def load_exclude_authors(file_path):
     path = Path(file_path)
     if not path.exists():
         return []
     with open(path, "r", encoding="utf-8") as f:
         return [line.strip() for line in f if line.strip()]
 
+
 def get_last_commit_excluding(exclude_file):
     exclude_authors = set(load_exclude_authors(exclude_file))
-    
-    logs = subprocess.check_output([
-        "git", "log",
-        "--pretty=format:%H|%ae",  # commit hash | author email
-    ]).decode("utf-8").splitlines()
-    
+
+    logs = (
+        subprocess.check_output(
+            [
+                "git",
+                "log",
+                "--pretty=format:%H|%ae",  # commit hash | author email
+            ]
+        )
+        .decode("utf-8")
+        .splitlines()
+    )
+
     for line in logs:
         commit, email = line.split("|", 1)
         if email not in exclude_authors:
             return commit
     return None
 
+
 def classify_file_category(path):
 
     relative_path = Path(path).relative_to(lib_path)
@@ -52,12 +62,12 @@ def fetch_option_flags(flags):
 
     for flag in flags:
         flag_list.append(
-                {
-                    "name": flag["name"].replace("\\", ""),
-                    "instruction": flag["name"].split("-")[-1],
-                    "description": flag["description"]
-                }
-            )
+            {
+                "name": flag["name"].replace("\\", ""),
+                "instruction": flag["name"].split("-")[-1],
+                "description": flag["description"],
+            }
+        )
 
     return flag_list
 
@@ -66,15 +76,15 @@ def fetch_all_files():
     main_files = [
         os.path.join(lib_path, "main.py"),
         os.path.join(lib_path, "linear_trainer.py"),
-        os.path.join(lib_path, "torch_trainer.py")
+        os.path.join(lib_path, "torch_trainer.py"),
     ]
     lib_files = glob.glob(os.path.join(lib_path, "libmultilabel/**/*.py"), recursive=True)
     file_set = set(map(os.path.abspath, main_files + lib_files))
     return file_set
 
 
 def find_config_usages_in_file(file_path, allowed_keys):
-    pattern = re.compile(r'\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)')
+    pattern = re.compile(r"\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)")
     detailed_results = {}
     try:
         with open(file_path, "r", encoding="utf-8") as f:
@@ -114,7 +124,7 @@ def move_duplicates_together(data, keep):
     duplicates = set()
 
     for i, key1 in enumerate(all_keys):
-        for key2 in all_keys[i+1:]:
+        for key2 in all_keys[i + 1 :]:
             duplicates |= data[key1] & data[key2]
 
     data[keep] |= duplicates
@@ -136,7 +146,7 @@ def classify(raw_flags):
     collected = {}
 
     for file_path in file_set:
-        detailed_results = find_config_usages_in_file(file_path, allowed_keys)        
+        detailed_results = find_config_usages_in_file(file_path, allowed_keys)
         if detailed_results:
             usage_map[file_path] = set(detailed_results.keys())
             for k, v in detailed_results.items():
@@ -163,7 +173,9 @@ def classify(raw_flags):
         if flag["category"] not in result:
             result[flag["category"]] = []
 
-        result[flag["category"]].append({"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]})
+        result[flag["category"]].append(
+            {"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]}
+        )
 
     result["details"] = []
     for k, v in collected.items():
@@ -172,4 +184,4 @@ def classify(raw_flags):
             for i in v[1:]:
                 result["details"].append({"name": "", "file": i["file"], "location": ", ".join(i["lines"])})
 
-    return result
+    return result
diff --git a/docs/cli/genflags.py b/docs/cli/genflags.py
@@ -6,6 +6,7 @@
 import main
 from classifier import classify
 
+
 class FakeParser(dict):
     def __init__(self):
         self.flags = []
@@ -32,9 +33,11 @@ def add_argument(
 
 classified = classify(parser.flags)
 
+
 def width_title(key, title):
     return max(map(lambda f: len(f[key]), classified[title]))
 
+
 def print_table(title, flags, intro):
     print()
     print(intro)
@@ -51,21 +54,22 @@ def print_table(title, flags, intro):
     print("=" * wn, "=" * wd)
     print()
 
+
 print_table(
     "general",
     classified["general"],
     intro="**General options**:\n\
-Common configurations shared across both linear and neural network trainers."
+Common configurations shared across both linear and neural network trainers.",
 )
 print_table(
     "linear",
     classified["linear"],
     intro="**Linear options**:\n\
-Configurations specific to linear trainer."
+Configurations specific to linear trainer.",
 )
 print_table(
     "nn",
     classified["nn"],
     intro="**Neural network options**:\n\
-Configurations specific to torch (neural networks) trainer."
+Configurations specific to torch (neural networks) trainer.",
 )
diff --git a/docs/examples/plot_linear_feature_gen.py b/docs/examples/plot_linear_feature_gen.py
@@ -13,11 +13,7 @@
 from libmultilabel import linear
 
 datasets = linear.load_dataset("txt", "data/rcv1/train.txt", "data/rcv1/test.txt")
-tfidf_params = {
-    "max_features": 20000,
-    "min_df": 3,
-    "ngram_range": (1, 3)
-}
+tfidf_params = {"max_features": 20000, "min_df": 3, "ngram_range": (1, 3)}
 preprocessor = linear.Preprocessor(tfidf_params=tfidf_params)
 preprocessor.fit(datasets)
 datasets = preprocessor.transform(datasets)
@@ -30,4 +26,4 @@
 #
 # Finally, we use the generated numerical features to train and evaluate the model.
 # The rest of the steps is the same in the quickstarts.
-# Please refer to them for details.
+# Please refer to them for details.
diff --git a/docs/examples/plot_linear_gridsearch_tutorial.py b/docs/examples/plot_linear_gridsearch_tutorial.py
@@ -61,13 +61,13 @@
 ######################################################################
 # The best parameters are::
 #
-#   clf__options: -s 2 -c 0.5 -m 1                      
-#   tfidf__max_features: 10000                          
+#   clf__options: -s 2 -c 0.5 -m 1
+#   tfidf__max_features: 10000
 #   tfidf__min_df: 5
 #
 # Note that in the above code, the ``refit`` argument of ``GridSearchCV`` is enabled by default, meaning that the best configuration will be trained on the whole dataset after hyperparameter search.
 # We refer to this as the retrain strategy.
-# After fitting ``GridSearchCV``, the retrained model is stored in ``clf``. 
+# After fitting ``GridSearchCV``, the retrained model is stored in ``clf``.
 #
 # We can apply the ``predict`` function of ``GridSearchCV`` object to use the estimator trained under the best hyperparameters for prediction.
 # Then use ``linear.compute_metrics`` to calculate the test performance.
diff --git a/docs/examples/plot_linear_tree_tutorial.py b/docs/examples/plot_linear_tree_tutorial.py
@@ -129,9 +129,7 @@ def metrics_in_batches(model):
 # We have already trained a single tree model as a baseline.
 # Now, let's train an ensemble of 3 tree models.
 training_start = time.time()
-ensemble_model = linear.train_ensemble_tree(
-    datasets["train"]["y"], datasets["train"]["x"], n_trees=3
-)
+ensemble_model = linear.train_ensemble_tree(datasets["train"]["y"], datasets["train"]["x"], n_trees=3)
 training_end = time.time()
 print("Training time of ensemble tree: {:10.2f}".format(training_end - training_start))
 
@@ -179,4 +177,3 @@ def metrics_in_batches(model):
 #    |               +-----------------+-------+-------+-------+
 #    |               | Ensemble-15     | 91.25 | 81.31 | 68.34 |
 #    +---------------+-----------------+-------+-------+-------+
-
diff --git a/docs/examples/plot_multi_label.py b/docs/examples/plot_multi_label.py

Original file line number	Diff line number	Diff line change
`@@ -61,13 +61,13 @@`
`61`	`61`	`######################################################################`
`62`	`62`	`# The best parameters are::`
`63`	`63`	`#`
`64`		`-# clf__options: -s 2 -c 0.5 -m 1`
`65`		`-# tfidf__max_features: 10000`
	`64`	`+# clf__options: -s 2 -c 0.5 -m 1`
	`65`	`+# tfidf__max_features: 10000`
`66`	`66`	`# tfidf__min_df: 5`
`67`	`67`	`#`
`68`	`68`	# Note that in the above code, the ``refit`` argument of ``GridSearchCV`` is enabled by default, meaning that the best configuration will be trained on the whole dataset after hyperparameter search.
`69`	`69`	`# We refer to this as the retrain strategy.`
`70`		-# After fitting ``GridSearchCV``, the retrained model is stored in ``clf``.
	`70`	+# After fitting ``GridSearchCV``, the retrained model is stored in ``clf``.
`71`	`71`	`#`
`72`	`72`	# We can apply the ``predict`` function of ``GridSearchCV`` object to use the estimator trained under the best hyperparameters for prediction.
`73`	`73`	# Then use ``linear.compute_metrics`` to calculate the test performance.