Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/amlro/generate_reaction_conditions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
def get_reaction_scope(
config: Dict,
sampling: str = "random",
training_size: int = 20,
write_files: bool = False,
exp_dir: str = None,
) -> pd.DataFrame:
Expand All @@ -36,9 +35,6 @@ def get_reaction_scope(
:param sampling: Sampling methods for generating traning reaction conditions,
defaults to 'random'
:type sampling: str, optional
:param training_size: Training set size required for initial experiments,
defaults to 20
:type training_size: int, optional
:param write_files: Option to enable writing files,
defaults to False
:type write_files: bool, optional
Expand All @@ -56,6 +52,8 @@ def get_reaction_scope(
generate_reaction_grid(config)
)

training_size = config.get("training_size", 15)

if sampling == "random":
training_conditions_df = random_sampling(
reaction_conditions_df, training_size
Expand Down
44 changes: 21 additions & 23 deletions src/amlro/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@

from amlro.const import FULL_COMBO_FILENAME, REACTION_DATA_FILENAME
from amlro.ml_models import get_regressor_model
from amlro.pareto import identify_pareto_front
from amlro.pareto import (
calculate_frontier_depth,
get_ranked_pareto_fronts,
select_batch_from_ranked_fronts,
)
from amlro.validations import validate_optimizer_config

# random seed for define random state of ML models
Expand Down Expand Up @@ -134,7 +138,7 @@ def mo_model_training(


def predict_next_parameters(
regr, data: pd.DataFrame, config: Dict, batch_size: int = 1
regr, data: pd.DataFrame, n_curr: int, config: Dict, batch_size: int = 1
) -> pd.DataFrame:
"""Predicts the yield from all the combination data using a trained
regressor model and return the best combinations.
Expand All @@ -150,6 +154,8 @@ def predict_next_parameters(
:type regr: model object with a `predict` method (e.g., sklearn regressor)
:param data: test dataset that contains full reaction space
:type data: pd.Dataframe
:param n_curr: current size of the training data
:type n_curr: int
:param config: Dictionary of optimizer parameters
:type config: Dict
:param batch_size: Number of reactions conditions need as predictions
Expand Down Expand Up @@ -177,32 +183,22 @@ def predict_next_parameters(

elif len(config["directions"]) > 1:
nfeatures = len(data.columns)
feature_columns = data.columns

weights = np.array(
[-1 if dir == "min" else 1 for dir in config["directions"]]
# calculate the max fronts or frontier depth for stratified sampling
frontier_depth = calculate_frontier_depth(
config, nfeatures, n_curr, batch_size
)

# Identify Pareto front
pareto_front = identify_pareto_front(
prediction_df.values, config["directions"], nfeatures
ranked_pareto_fronts = get_ranked_pareto_fronts(
config, nfeatures, prediction_df, frontier_depth
)

# finding the weighted sum of objective values for pareto solutions
normalized_front = (pareto_front - pareto_front.min()) / (
pareto_front.max() - pareto_front.min()
# finding best set of solutions from exploring frontier depth
best_parameters = select_batch_from_ranked_fronts(
ranked_pareto_fronts, config, nfeatures, batch_size, feature_columns
)
weighted_sums = np.sum(
normalized_front[:, nfeatures:] * weights, axis=1
)

num_solutions = min(len(pareto_front), batch_size)

# Get the indices of the top `num_solutions` weighted sums
best_indexs = np.argsort(weighted_sums)[-num_solutions:][::-1]
best_solutions = pareto_front[best_indexs]
best_parameters = best_solutions[:, :nfeatures]

best_parameters = pd.DataFrame(best_parameters, columns=data.columns)

return best_parameters

Expand Down Expand Up @@ -280,7 +276,7 @@ def get_optimized_parameters(
x_train, y_train, data = load_data(
reaction_data_path, full_combo_path, config
)

current_dataset_size = len(y_train)
print("Data Loading for Machine Learning Model...")
print("Training ML model " + model + " ...")

Expand All @@ -291,7 +287,9 @@ def get_optimized_parameters(
regr = mo_model_training(x_train, y_train, model)

# Predict the next best reaction conditions.
best_combo = predict_next_parameters(regr, data, config, batch_size)
best_combo = predict_next_parameters(
regr, data, current_dataset_size, config, batch_size
)

# Decode the best reaction conditions.
next_best_conditions = []
Expand Down
Loading