diff --git a/docs/make.bat b/docs/make.bat old mode 100644 new mode 100755 diff --git a/examol/score/base.py b/examol/score/base.py index 83850510b..8e37b5956 100644 --- a/examol/score/base.py +++ b/examol/score/base.py @@ -77,24 +77,24 @@ def prepare_message(self, model: object, training: bool = False) -> object: """ raise NotImplementedError() - def score(self, model_msg: object, inputs: list, **kwargs) -> np.ndarray: + def score(self, model_msg: object, input_data: list, **kwargs) -> np.ndarray: """Assign a score to molecules Args: model_msg: Model in a transmittable format, may need to be deserialized - inputs: Batch of inputs ready for the model, as generated by :meth:`transform_inputs` + input_data: Batch of inputs ready for the model, as generated by :meth:`transform_inputs` Returns: The scores to a set of records """ raise NotImplementedError() - def retrain(self, model_msg: object, inputs: list, outputs: list, **kwargs) -> object: + def retrain(self, model_msg: object, input_data: list, output_data: list, **kwargs) -> object: """Retrain the scorer based on new training records Args: model_msg: Model to be retrained - inputs: Training set inputs, as generated by :meth:`transform_inputs` - outputs: Training Set outputs, as generated by :meth:`transform_outputs` + input_data: Training set inputs, as generated by :meth:`transform_inputs` + output_data: Training Set outputs, as generated by :meth:`transform_outputs` Returns: Message defining how to update the model """ @@ -121,8 +121,8 @@ class MultiFidelityScorer(Scorer): training and use the lower-fidelity data to enhance prediction accuracy during scoring. """ - def score(self, model_msg: object, inputs: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray: + def score(self, model_msg: object, input_data: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray: raise NotImplementedError() - def retrain(self, model_msg: object, inputs: list, outputs: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> object: + def retrain(self, model_msg: object, input_data: list, output_data: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> object: raise NotImplementedError() diff --git a/examol/score/nfp.py b/examol/score/nfp.py index 802837010..7849a9715 100644 --- a/examol/score/nfp.py +++ b/examol/score/nfp.py @@ -321,7 +321,7 @@ def transform_inputs(self, record_batch: list[MoleculeRecord]) -> list[dict]: def score(self, model_msg: NFPMessage, - inputs: list[dict | tuple[dict, np.ndarray]], + input_data: list[dict | tuple[dict, np.ndarray]], batch_size: int = 64, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray: @@ -329,7 +329,7 @@ def score(self, Args: model_msg: Model in a transmittable format - inputs: Batch of inputs ready for the model (in dictionary format) + input_data: Batch of inputs ready for the model (in dictionary format) batch_size: Number of molecules to evaluate at each time lower_fidelities: Properties of the molecule at lower levels, if known Returns: @@ -338,7 +338,7 @@ def score(self, model = model_msg.get_model() # Unpack the model # Run inference - loader = make_data_loader(inputs, batch_size=batch_size) + loader = make_data_loader(input_data, batch_size=batch_size) ml_outputs = np.squeeze(model.predict(loader, verbose=False)) if ml_outputs.ndim == 1: # Single-fidelity learning return ml_outputs @@ -351,8 +351,8 @@ def score(self, def retrain(self, model_msg: dict | NFPMessage, - inputs: list, - outputs: np.ndarray, + input_data: list, + output_data: np.ndarray, lower_fidelities: None | np.ndarray = None, num_epochs: int = 4, batch_size: int = 32, @@ -367,8 +367,8 @@ def retrain(self, Args: model_msg: Model to be retrained - inputs: Training set inputs, as generated by :meth:`transform_inputs` - outputs: Training Set outputs, as generated by :meth:`transform_outputs` + input_data: Training set inputs, as generated by :meth:`transform_inputs` + output_data: Training Set outputs, as generated by :meth:`transform_outputs` lower_fidelities: Lower-fidelity data, if available num_epochs: Maximum number of epochs to run batch_size: Number of molecules per training batch @@ -405,12 +405,12 @@ def loss(y_true, y_pred): return tf.keras.losses.mean_squared_error(y_true[is_known], y_pred[is_known]) # Prepare the outputs - outputs = np.concatenate([lower_fidelities, outputs[:, None]], axis=1) - outputs = compute_deltas(outputs) - value_spec = tf.TensorSpec((outputs.shape[1],), dtype=tf.float32) + output_data = np.concatenate([lower_fidelities, output_data[:, None]], axis=1) + output_data = compute_deltas(output_data) + value_spec = tf.TensorSpec((output_data.shape[1],), dtype=tf.float32) # Split off a validation set - train_x, valid_x, train_y, valid_y = train_test_split(inputs, outputs, test_size=validation_split) + train_x, valid_x, train_y, valid_y = train_test_split(input_data, output_data, test_size=validation_split) # Make the loaders steps_per_epoch = len(train_x) // batch_size @@ -422,9 +422,9 @@ def loss(y_true, y_pred): # Define initial guesses for the "scaling" later try: - outputs = np.array(outputs) - output_mean = np.nanmean(outputs, axis=0) - outputs_std = np.clip(np.nanstd(outputs, axis=0), 1e-6, a_max=None) + output_data = np.array(output_data) + output_mean = np.nanmean(output_data, axis=0) + outputs_std = np.clip(np.nanstd(output_data, axis=0), 1e-6, a_max=None) for i, (m, s) in enumerate(zip(np.atleast_1d(output_mean), np.atleast_1d(outputs_std))): scale_layer = model.get_layer(f'scale_{i}') scale_layer.set_weights([np.atleast_2d(s), np.atleast_1d(m)]) diff --git a/examol/score/rdkit/__init__.py b/examol/score/rdkit/__init__.py index 028eb0dee..d53cded56 100644 --- a/examol/score/rdkit/__init__.py +++ b/examol/score/rdkit/__init__.py @@ -103,53 +103,53 @@ def prepare_message(self, model: ModelType, training: bool = True) -> ModelType: # Send the whole list for inference return model - def score(self, model_msg: ModelType, inputs: InputType, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray: + def score(self, model_msg: ModelType, input_data: InputType, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray: if not isinstance(model_msg, list): # Single objective - return model_msg.predict(inputs) + return model_msg.predict(input_data) elif len(model_msg) == 1: - return np.squeeze(model_msg[0].predict(inputs)) + return np.squeeze(model_msg[0].predict(input_data)) else: # Get the known deltas then append a NaN to the end (we don't know the last delta) if lower_fidelities is None: - deltas = np.empty((len(inputs), len(model_msg))) * np.nan + deltas = np.empty((len(input_data), len(model_msg))) * np.nan else: known_deltas = compute_deltas(lower_fidelities) deltas = np.concatenate((known_deltas, np.empty_like(known_deltas[:, :1]) * np.nan), axis=1) # Run the model at each level for my_level, my_model in enumerate(model_msg): - my_preds = my_model.predict(inputs) + my_preds = my_model.predict(input_data) is_unknown = np.isnan(deltas[:, my_level]) deltas[is_unknown, my_level] = my_preds[is_unknown] # Sum up the deltas return np.sum(deltas, axis=1) - def retrain(self, model_msg: Pipeline, inputs: InputType, outputs: np.ndarray, + def retrain(self, model_msg: Pipeline, input_data: InputType, output_data: np.ndarray, bootstrap: bool = False, lower_fidelities: np.ndarray | None = None) -> ModelType: if bootstrap: - samples = np.random.random_integers(0, len(inputs) - 1, size=(len(inputs),)) - inputs = [inputs[i] for i in samples] - outputs = outputs[samples] + samples = np.random.random_integers(0, len(input_data) - 1, size=(len(input_data),)) + input_data = [input_data[i] for i in samples] + output_data = output_data[samples] if lower_fidelities is not None: lower_fidelities = lower_fidelities[samples, :] if lower_fidelities is None: # For single level, train a single model - model_msg.fit(inputs, outputs) + model_msg.fit(input_data, output_data) return model_msg else: # Compute the delta and then train a different model for each delta - outputs = np.concatenate([lower_fidelities, outputs[:, None]], axis=1) # Append target level to end - deltas = compute_deltas(outputs) + output_data = np.concatenate([lower_fidelities, output_data[:, None]], axis=1) # Append target level to end + deltas = compute_deltas(output_data) models = [] for y in deltas.T: # Remove the missing values mask = np.isfinite(y) - my_smiles = [i for m, i in zip(mask, inputs) if m] + my_smiles = [i for m, i in zip(mask, input_data) if m] y = y[mask] # Fit a fresh copy of the model diff --git a/examples/redoxmers/run/report.md b/examples/redoxmers/run/report.md index 7e567afbd..4a87207a5 100644 --- a/examples/redoxmers/run/report.md +++ b/examples/redoxmers/run/report.md @@ -1,12 +1,14 @@ # Run Report -Report time: 2023-10-18 17:38:06.910593 +Report time: 2025-02-12 09:02:54.732377 ## Task Summary Measures how many tasks have run as part of the application | Task Type | Count | Node Hours | Failures | |-------------|---------|--------------|------------| -| simulation | 2 | 0.00041 | 0 (0.0%) | +| inference | 5 | 0.0044 | 0 (0.0%) | +| train | 5 | 0.0017 | 0 (0.0%) | +| simulation | 37 | 0.02 | 0 (0.0%) | ## Outcomes over Time The property of the molecules over time. diff --git a/examples/redoxmers/run/simulation-outputs_recipe-0.png b/examples/redoxmers/run/simulation-outputs_recipe-0.png index ff18e8239..94a06949d 100644 Binary files a/examples/redoxmers/run/simulation-outputs_recipe-0.png and b/examples/redoxmers/run/simulation-outputs_recipe-0.png differ diff --git a/tests/steer/conftest.py b/tests/steer/conftest.py index 403d81c6b..6865432f3 100644 --- a/tests/steer/conftest.py +++ b/tests/steer/conftest.py @@ -74,7 +74,7 @@ def queues(recipe, scorer, simulator, tmp_path) -> ColmenaQueues: # Make parsl configuration config = Config( run_dir=str(tmp_path), - executors=[HighThroughputExecutor(max_workers=1, address='127.0.0.1')] + executors=[HighThroughputExecutor(max_workers_per_node=1, address='127.0.0.1')] ) doer = ParslTaskServer(