RxnRover · dulithaprasanna · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/src/amlro/generate_reaction_conditions.py b/src/amlro/generate_reaction_conditions.py
@@ -22,7 +22,6 @@
 def get_reaction_scope(
     config: Dict,
     sampling: str = "random",
-    training_size: int = 20,
     write_files: bool = False,
     exp_dir: str = None,
 ) -> pd.DataFrame:
@@ -36,9 +35,6 @@ def get_reaction_scope(
     :param sampling: Sampling methods for generating traning reaction conditions,
                     defaults to 'random'
     :type sampling: str, optional
-    :param training_size: Training set size required for initial experiments,
-                         defaults to 20
-    :type training_size: int, optional
     :param write_files: Option to enable writing files,
                         defaults to False
     :type write_files: bool, optional
@@ -56,6 +52,8 @@ def get_reaction_scope(
         generate_reaction_grid(config)
     )
 
+    training_size = config.get("training_size", 15)
+
     if sampling == "random":
         training_conditions_df = random_sampling(
             reaction_conditions_df, training_size

diff --git a/src/amlro/optimizer.py b/src/amlro/optimizer.py
@@ -8,7 +8,11 @@
 
 from amlro.const import FULL_COMBO_FILENAME, REACTION_DATA_FILENAME
 from amlro.ml_models import get_regressor_model
-from amlro.pareto import identify_pareto_front
+from amlro.pareto import (
+    calculate_frontier_depth,
+    get_ranked_pareto_fronts,
+    select_batch_from_ranked_fronts,
+)
 from amlro.validations import validate_optimizer_config
 
 # random seed for define random state of ML models
@@ -134,7 +138,7 @@ def mo_model_training(
 
 
 def predict_next_parameters(
-    regr, data: pd.DataFrame, config: Dict, batch_size: int = 1
+    regr, data: pd.DataFrame, n_curr: int, config: Dict, batch_size: int = 1
 ) -> pd.DataFrame:
     """Predicts the yield from all the combination data using a trained
     regressor model and return the best combinations.
@@ -150,6 +154,8 @@ def predict_next_parameters(
     :type regr: model object with a `predict` method (e.g., sklearn regressor)
     :param data: test dataset that contains full reaction space
     :type data: pd.Dataframe
+    :param n_curr: current size of the training data
+    :type n_curr: int
     :param config: Dictionary of optimizer parameters
     :type config: Dict
     :param batch_size: Number of reactions conditions need as predictions
@@ -177,32 +183,22 @@ def predict_next_parameters(
 
     elif len(config["directions"]) > 1:
         nfeatures = len(data.columns)
+        feature_columns = data.columns
 
-        weights = np.array(
-            [-1 if dir == "min" else 1 for dir in config["directions"]]
+        # calculate the max fronts or frontier depth for stratified sampling
+        frontier_depth = calculate_frontier_depth(
+            config, nfeatures, n_curr, batch_size
         )
 
         # Identify Pareto front
-        pareto_front = identify_pareto_front(
-            prediction_df.values, config["directions"], nfeatures
+        ranked_pareto_fronts = get_ranked_pareto_fronts(
+            config, nfeatures, prediction_df, frontier_depth
         )
 
-        # finding the weighted sum of objective values for pareto solutions
-        normalized_front = (pareto_front - pareto_front.min()) / (
-            pareto_front.max() - pareto_front.min()
+        # finding best set of solutions from exploring frontier depth
+        best_parameters = select_batch_from_ranked_fronts(
+            ranked_pareto_fronts, config, nfeatures, batch_size, feature_columns
         )
-        weighted_sums = np.sum(
-            normalized_front[:, nfeatures:] * weights, axis=1
-        )
-
-        num_solutions = min(len(pareto_front), batch_size)
-
-        # Get the indices of the top `num_solutions` weighted sums
-        best_indexs = np.argsort(weighted_sums)[-num_solutions:][::-1]
-        best_solutions = pareto_front[best_indexs]
-        best_parameters = best_solutions[:, :nfeatures]
-
-        best_parameters = pd.DataFrame(best_parameters, columns=data.columns)
 
     return best_parameters
 
@@ -280,7 +276,7 @@ def get_optimized_parameters(
     x_train, y_train, data = load_data(
         reaction_data_path, full_combo_path, config
     )
-
+    current_dataset_size = len(y_train)
     print("Data Loading for Machine Learning Model...")
     print("Training ML model " + model + " ...")
 
@@ -291,7 +287,9 @@ def get_optimized_parameters(
         regr = mo_model_training(x_train, y_train, model)
 
     # Predict the next best reaction conditions.
-    best_combo = predict_next_parameters(regr, data, config, batch_size)
+    best_combo = predict_next_parameters(
+        regr, data, current_dataset_size, config, batch_size
+    )
 
     # Decode the best reaction conditions.
     next_best_conditions = []