making dmax and K as global default value

shenkha · shenkha · commit 0669c9fce4fd · 2025-08-04T11:21:09.000+04:00
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
@@ -13,8 +13,8 @@
 
 __all__ = ["train_tree", "TreeModel", "train_ensemble_tree", "EnsembleTreeModel"]
 
-K = 100
-DMAX = 10
+DEFAULT_K = 100
+DEFAULT_DMAX = 10
 
 
 class Node:
@@ -201,6 +201,8 @@ def train_tree(
     y: sparse.csr_matrix,
     x: sparse.csr_matrix,
     options: str = "",
+    K=DEFAULT_K,
+    dmax=DEFAULT_DMAX,
     verbose: bool = True,
 ) -> TreeModel:
     """Train a linear model for multi-label data using a divide-and-conquer strategy.
@@ -210,14 +212,16 @@ def train_tree(
         y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
         x (sparse.csr_matrix): A matrix with dimensions number of instances * number of features.
         options (str): The option string passed to liblinear.
+        K (int, optional): Maximum degree of nodes in the tree. Defaults to 100.
+        dmax (int, optional): Maximum depth of the tree. Defaults to 10.
         verbose (bool, optional): Output extra progress information. Defaults to True.
 
     Returns:
         TreeModel: A model which can be used in predict_values.
     """
     label_representation = (y.T * x).tocsr()
     label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
-    root = _build_tree(label_representation, np.arange(y.shape[1]), 0)
+    root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax)
     root.is_root = True
 
     num_nodes = 0
@@ -260,18 +264,20 @@ def visit(node):
     return TreeModel(root, flat_model, node_ptr)
 
 
-def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int) -> Node:
+def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
     """Build the tree recursively by kmeans clustering.
 
     Args:
         label_representation (sparse.csr_matrix): A matrix with dimensions number of classes under this node * number of features.
         label_map (np.ndarray): Maps 0..label_representation.shape[0] to the original label indices.
         d (int): Current depth.
+        K (int): Maximum degree of nodes in the tree.
+        dmax (int): Maximum depth of the tree.
 
     Returns:
         Node: Root of the (sub)tree built from label_representation.
     """
-    if d >= DMAX or label_representation.shape[0] <= K:
+    if d >= dmax or label_representation.shape[0] <= K:
         return Node(label_map=label_map, children=[])
 
     metalabels = (
@@ -291,7 +297,7 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
     for i in range(K):
         child_representation = label_representation[metalabels == i]
         child_map = label_map[metalabels == i]
-        child = _build_tree(child_representation, child_map, d + 1)
+        child = _build_tree(child_representation, child_map, d + 1, K, dmax)
         children.append(child)
 
     return Node(label_map=label_map, children=children)
@@ -413,6 +419,8 @@ def train_ensemble_tree(
     y: sparse.csr_matrix,
     x: sparse.csr_matrix,
     options: str = "",
+    K: int = DEFAULT_K,
+    dmax: int = DEFAULT_DMAX,
     n_trees: int = 3,
     seed: int = 42,
     verbose: bool = True,
@@ -422,6 +430,8 @@ def train_ensemble_tree(
         y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
         x (sparse.csr_matrix): A matrix with dimensions number of instances * number of features.
         options (str, optional): The option string passed to liblinear. Defaults to ''.
+        K (int, optional): Maximum degree of nodes in the tree. Defaults to 100.
+        dmax (int, optional): Maximum depth of the tree. Defaults to 10.
         n_trees (int, optional): Number of trees in the ensemble. Defaults to 3.
         seed (int, optional): The base random seed for the ensemble. Defaults to 42.
         verbose (bool, optional): Output extra progress information. Defaults to True.
@@ -433,10 +443,9 @@ def train_ensemble_tree(
     for i in range(n_trees):
         np.random.seed(seed + i)
 
-        tree_model = train_tree(y, x, options, verbose=False)
+        tree_model = train_tree(y, x, options, K, dmax, verbose)
         tree_models.append(tree_model)
 
-    if verbose:
-        print("Ensemble training completed.")
+    print("Ensemble training completed.")
 
     return EnsembleTreeModel(tree_models)
diff --git a/linear_trainer.py b/linear_trainer.py
@@ -48,13 +48,14 @@ def linear_train(datasets, config):
     if config.linear_technique == "tree":
         if multiclass:
             raise ValueError("Tree model should only be used with multilabel datasets.")
-        linear.tree.K = config.tree_degree
-        linear.tree.DMAX = config.tree_max_depth
+
         if config.tree_ensemble_models > 1:
             model = train_ensemble_tree(
                 datasets["train"]["y"],
                 datasets["train"]["x"],
                 options=config.liblinear_options,
+                K=config.tree_degree,
+                dmax=config.tree_max_depth,
                 n_trees=config.tree_ensemble_models,
                 seed=config.seed if config.seed is not None else 42,
             )
@@ -63,6 +64,8 @@ def linear_train(datasets, config):
                 datasets["train"]["y"],
                 datasets["train"]["x"],
                 options=config.liblinear_options,
+                K=config.tree_degree,
+                dmax=config.tree_max_depth,
             )
     else:
         model = LINEAR_TECHNIQUES[config.linear_technique](