Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified docs/make.bat
100644 → 100755
Empty file.
14 changes: 7 additions & 7 deletions examol/score/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,24 @@ def prepare_message(self, model: object, training: bool = False) -> object:
"""
raise NotImplementedError()

def score(self, model_msg: object, inputs: list, **kwargs) -> np.ndarray:
def score(self, model_msg: object, input_data: list, **kwargs) -> np.ndarray:
"""Assign a score to molecules

Args:
model_msg: Model in a transmittable format, may need to be deserialized
inputs: Batch of inputs ready for the model, as generated by :meth:`transform_inputs`
input_data: Batch of inputs ready for the model, as generated by :meth:`transform_inputs`
Returns:
The scores to a set of records
"""
raise NotImplementedError()

def retrain(self, model_msg: object, inputs: list, outputs: list, **kwargs) -> object:
def retrain(self, model_msg: object, input_data: list, output_data: list, **kwargs) -> object:
"""Retrain the scorer based on new training records

Args:
model_msg: Model to be retrained
inputs: Training set inputs, as generated by :meth:`transform_inputs`
outputs: Training Set outputs, as generated by :meth:`transform_outputs`
input_data: Training set inputs, as generated by :meth:`transform_inputs`
output_data: Training Set outputs, as generated by :meth:`transform_outputs`
Returns:
Message defining how to update the model
"""
Expand All @@ -121,8 +121,8 @@ class MultiFidelityScorer(Scorer):
training and use the lower-fidelity data to enhance prediction accuracy during scoring.
"""

def score(self, model_msg: object, inputs: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray:
def score(self, model_msg: object, input_data: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray:
raise NotImplementedError()

def retrain(self, model_msg: object, inputs: list, outputs: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> object:
def retrain(self, model_msg: object, input_data: list, output_data: list, lower_fidelities: np.ndarray | None = None, **kwargs) -> object:
raise NotImplementedError()
28 changes: 14 additions & 14 deletions examol/score/nfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,15 @@ def transform_inputs(self, record_batch: list[MoleculeRecord]) -> list[dict]:

def score(self,
model_msg: NFPMessage,
inputs: list[dict | tuple[dict, np.ndarray]],
input_data: list[dict | tuple[dict, np.ndarray]],
batch_size: int = 64,
lower_fidelities: np.ndarray | None = None,
**kwargs) -> np.ndarray:
"""Assign a score to molecules

Args:
model_msg: Model in a transmittable format
inputs: Batch of inputs ready for the model (in dictionary format)
input_data: Batch of inputs ready for the model (in dictionary format)
batch_size: Number of molecules to evaluate at each time
lower_fidelities: Properties of the molecule at lower levels, if known
Returns:
Expand All @@ -338,7 +338,7 @@ def score(self,
model = model_msg.get_model() # Unpack the model

# Run inference
loader = make_data_loader(inputs, batch_size=batch_size)
loader = make_data_loader(input_data, batch_size=batch_size)
ml_outputs = np.squeeze(model.predict(loader, verbose=False))
if ml_outputs.ndim == 1: # Single-fidelity learning
return ml_outputs
Expand All @@ -351,8 +351,8 @@ def score(self,

def retrain(self,
model_msg: dict | NFPMessage,
inputs: list,
outputs: np.ndarray,
input_data: list,
output_data: np.ndarray,
lower_fidelities: None | np.ndarray = None,
num_epochs: int = 4,
batch_size: int = 32,
Expand All @@ -367,8 +367,8 @@ def retrain(self,

Args:
model_msg: Model to be retrained
inputs: Training set inputs, as generated by :meth:`transform_inputs`
outputs: Training Set outputs, as generated by :meth:`transform_outputs`
input_data: Training set inputs, as generated by :meth:`transform_inputs`
output_data: Training Set outputs, as generated by :meth:`transform_outputs`
lower_fidelities: Lower-fidelity data, if available
num_epochs: Maximum number of epochs to run
batch_size: Number of molecules per training batch
Expand Down Expand Up @@ -405,12 +405,12 @@ def loss(y_true, y_pred):
return tf.keras.losses.mean_squared_error(y_true[is_known], y_pred[is_known])

# Prepare the outputs
outputs = np.concatenate([lower_fidelities, outputs[:, None]], axis=1)
outputs = compute_deltas(outputs)
value_spec = tf.TensorSpec((outputs.shape[1],), dtype=tf.float32)
output_data = np.concatenate([lower_fidelities, output_data[:, None]], axis=1)
output_data = compute_deltas(output_data)
value_spec = tf.TensorSpec((output_data.shape[1],), dtype=tf.float32)

# Split off a validation set
train_x, valid_x, train_y, valid_y = train_test_split(inputs, outputs, test_size=validation_split)
train_x, valid_x, train_y, valid_y = train_test_split(input_data, output_data, test_size=validation_split)

# Make the loaders
steps_per_epoch = len(train_x) // batch_size
Expand All @@ -422,9 +422,9 @@ def loss(y_true, y_pred):

# Define initial guesses for the "scaling" later
try:
outputs = np.array(outputs)
output_mean = np.nanmean(outputs, axis=0)
outputs_std = np.clip(np.nanstd(outputs, axis=0), 1e-6, a_max=None)
output_data = np.array(output_data)
output_mean = np.nanmean(output_data, axis=0)
outputs_std = np.clip(np.nanstd(output_data, axis=0), 1e-6, a_max=None)
for i, (m, s) in enumerate(zip(np.atleast_1d(output_mean), np.atleast_1d(outputs_std))):
scale_layer = model.get_layer(f'scale_{i}')
scale_layer.set_weights([np.atleast_2d(s), np.atleast_1d(m)])
Expand Down
26 changes: 13 additions & 13 deletions examol/score/rdkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,53 +103,53 @@ def prepare_message(self, model: ModelType, training: bool = True) -> ModelType:
# Send the whole list for inference
return model

def score(self, model_msg: ModelType, inputs: InputType, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray:
def score(self, model_msg: ModelType, input_data: InputType, lower_fidelities: np.ndarray | None = None, **kwargs) -> np.ndarray:
if not isinstance(model_msg, list):
# Single objective
return model_msg.predict(inputs)
return model_msg.predict(input_data)
elif len(model_msg) == 1:
return np.squeeze(model_msg[0].predict(inputs))
return np.squeeze(model_msg[0].predict(input_data))
else:
# Get the known deltas then append a NaN to the end (we don't know the last delta)
if lower_fidelities is None:
deltas = np.empty((len(inputs), len(model_msg))) * np.nan
deltas = np.empty((len(input_data), len(model_msg))) * np.nan
else:
known_deltas = compute_deltas(lower_fidelities)
deltas = np.concatenate((known_deltas, np.empty_like(known_deltas[:, :1]) * np.nan), axis=1)

# Run the model at each level
for my_level, my_model in enumerate(model_msg):
my_preds = my_model.predict(inputs)
my_preds = my_model.predict(input_data)
is_unknown = np.isnan(deltas[:, my_level])
deltas[is_unknown, my_level] = my_preds[is_unknown]

# Sum up the deltas
return np.sum(deltas, axis=1)

def retrain(self, model_msg: Pipeline, inputs: InputType, outputs: np.ndarray,
def retrain(self, model_msg: Pipeline, input_data: InputType, output_data: np.ndarray,
bootstrap: bool = False,
lower_fidelities: np.ndarray | None = None) -> ModelType:
if bootstrap:
samples = np.random.random_integers(0, len(inputs) - 1, size=(len(inputs),))
inputs = [inputs[i] for i in samples]
outputs = outputs[samples]
samples = np.random.random_integers(0, len(input_data) - 1, size=(len(input_data),))
input_data = [input_data[i] for i in samples]
output_data = output_data[samples]
if lower_fidelities is not None:
lower_fidelities = lower_fidelities[samples, :]

if lower_fidelities is None:
# For single level, train a single model
model_msg.fit(inputs, outputs)
model_msg.fit(input_data, output_data)
return model_msg
else:
# Compute the delta and then train a different model for each delta
outputs = np.concatenate([lower_fidelities, outputs[:, None]], axis=1) # Append target level to end
deltas = compute_deltas(outputs)
output_data = np.concatenate([lower_fidelities, output_data[:, None]], axis=1) # Append target level to end
deltas = compute_deltas(output_data)

models = []
for y in deltas.T:
# Remove the missing values
mask = np.isfinite(y)
my_smiles = [i for m, i in zip(mask, inputs) if m]
my_smiles = [i for m, i in zip(mask, input_data) if m]
y = y[mask]

# Fit a fresh copy of the model
Expand Down
6 changes: 4 additions & 2 deletions examples/redoxmers/run/report.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# Run Report
Report time: 2023-10-18 17:38:06.910593
Report time: 2025-02-12 09:02:54.732377

## Task Summary
Measures how many tasks have run as part of the application

| Task Type | Count | Node Hours | Failures |
|-------------|---------|--------------|------------|
| simulation | 2 | 0.00041 | 0 (0.0%) |
| inference | 5 | 0.0044 | 0 (0.0%) |
| train | 5 | 0.0017 | 0 (0.0%) |
| simulation | 37 | 0.02 | 0 (0.0%) |

## Outcomes over Time
The property of the molecules over time.
Expand Down
Binary file modified examples/redoxmers/run/simulation-outputs_recipe-0.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion tests/steer/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def queues(recipe, scorer, simulator, tmp_path) -> ColmenaQueues:
# Make parsl configuration
config = Config(
run_dir=str(tmp_path),
executors=[HighThroughputExecutor(max_workers=1, address='127.0.0.1')]
executors=[HighThroughputExecutor(max_workers_per_node=1, address='127.0.0.1')]
)

doer = ParslTaskServer(
Expand Down
Loading