From 037f4ce5983bbd6811c706cb13e865f6519c11f0 Mon Sep 17 00:00:00 2001 From: teddygroves Date: Thu, 19 Oct 2023 15:21:24 +0200 Subject: [PATCH 1/8] Add pathfinder and laplace commands --- maud/cli.py | 174 ++++++++++++++++++++++++++++++++- maud/data_model/maud_config.py | 4 + maud/running_stan.py | 58 ++++++++++- 3 files changed, 234 insertions(+), 2 deletions(-) diff --git a/maud/cli.py b/maud/cli.py index 4679a445..d8562465 100644 --- a/maud/cli.py +++ b/maud/cli.py @@ -28,7 +28,15 @@ from maud.data.example_inputs import linear, methionine from maud.getting_idatas import get_idata from maud.loading_maud_inputs import load_maud_input -from maud.running_stan import optimize, predict, sample, simulate, variational +from maud.running_stan import ( + laplace, + optimize, + pathfinder, + predict, + sample, + simulate, + variational, +) AVAILABLE_EXAMPLE_INPUTS = {"linear": linear, "methionine": methionine} @@ -497,3 +505,167 @@ def do_variational(data_path, output_dir): shutil.copytree(data_path, ui_dir) variational(mi, samples_path) return output_path + + +@cli.command("pathfinder") +@click.option("--output_dir", default=".", help="Where to save Maud's output") +@click.argument( + "data_path", + type=click.Path(exists=True, dir_okay=True, file_okay=False), +) +def pathfinder_command(data_path, output_dir): + """Generate pathfinder samples given a user input directory.""" + click.echo(do_pathfinder(data_path, output_dir)) + + +def do_pathfinder(data_path, output_dir): + """Generate pathfinder samples given a user input directory.""" + mi = load_maud_input(data_path) + now = datetime.now().strftime("%Y%m%d%H%M%S") + output_name = f"maud_output_pf-{mi.config.name}-{now}" + output_path = os.path.join(output_dir, output_name) + samples_path = os.path.join(output_path, "samples") + ui_dir = os.path.join(output_path, "user_input") + print("Creating output directory: " + output_path) + os.mkdir(output_path) + os.mkdir(samples_path) + print(f"Copying user input from {data_path} to {ui_dir}") + shutil.copytree(data_path, ui_dir) + pathfinder(mi, samples_path) + return output_path + + +@cli.command("laplace") +@click.option("--output_dir", default=".", help="Where to save the output") +@click.argument( + "data_path", + type=click.Path(exists=True, dir_okay=True, file_okay=False), +) +def laplace_command(data_path, output_dir): + """Do Laplace approximation.""" + click.echo(do_laplace(data_path, output_dir)) + + +def do_laplace(data_path, output_dir): + """Do Laplace approximation.""" + mi = load_maud_input(data_path=data_path) + now = datetime.now().strftime("%Y%m%d%H%M%S") + output_name = f"maud_output_opt-{mi.config.name}-{now}" + output_path = os.path.join(output_dir, output_name) + samples_path = os.path.join(output_path, "samples") + ui_dir = os.path.join(output_path, "user_input") + print("Creating output directory: " + output_path) + os.mkdir(output_path) + os.mkdir(samples_path) + print(f"Copying user input from {data_path} to {ui_dir}") + shutil.copytree(data_path, ui_dir) + laplace_fit = laplace(mi, samples_path) + idata = get_idata(laplace_fit._runset.csv_files, mi, "train") + idata.to_json(os.path.join(output_path, "idata.json")) + print("\n\nSimulated concentrations:") + print( + idata.posterior["conc_train"] + .mean(dim=["chain", "draw"]) + .to_series() + .unstack() + .T + ) + print("\n\nSimulated fluxes:") + print( + idata.posterior["flux_train"] + .mean(dim=["chain", "draw"]) + .to_series() + .unstack() + .T + ) + print("\n\nSimulated kms:") + print(idata.posterior["km"].mean(dim=["chain", "draw"]).to_series()) + print("\n\nSimulated enzyme concentrations:") + print( + idata.posterior["conc_enzyme_train"] + .mean(dim=["chain", "draw"]) + .to_series() + .unstack() + .T + ) + print("\n\nSimulated reaction delta Gs:") + print(idata.posterior["dgr_train"].mean(dim=["chain", "draw"]).to_series()) + print("\n\nSimulated measurements:") + print( + idata.posterior_predictive["yrep_conc_train"] + .mean(dim=["chain", "draw"]) + .to_series() + ) + print("\n\nSimulated measurements:") + for var in ["yrep_conc_train", "yrep_flux_train"]: + if var in idata.posterior_predictive.data_vars: + print( + idata.posterior_predictive[var] + .mean(dim=["chain", "draw"]) + .to_series() + ) + print("\n\nSimulated log likelihoods:") + for var in ["llik_conc_train", "llik_flux_train"]: + if var in idata.log_likelihood.data_vars: + print( + idata.log_likelihood[var] + .mean(dim=["chain", "draw"]) + .to_series() + ) + print("\n\nSimulated allostery terms:") + print( + idata.posterior["allostery_train"] + .mean(dim=["chain", "draw"]) + .to_series() + .unstack() + .T + ) + print("\n\nSimulated reversibility terms:") + print( + idata.posterior["reversibility_train"] + .mean(dim=["chain", "draw"]) + .to_series() + .unstack() + .T + ) + print("\n\nSimulated saturation terms:") + print( + idata.posterior["saturation_train"] + .mean(dim=["chain", "draw"]) + .to_series() + .unstack() + .T + ) + if mi.kinetic_model.phosphorylations is not None: + print("\n\nSimulated phosphorylation terms:") + print( + idata.posterior["phosphorylation_train"] + .mean(dim=["chain", "draw"]) + .to_series() + .unstack() + .T + ) + print("\n\nSimulated membrane potential:") + print( + idata.posterior["psi_train"].mean(dim=["chain", "draw"]).to_series().T + ) + print("\n\nSimulated steady state deviation:") + # TODO: put the dimensions directly in the idata object + steady_dev = ( + idata.posterior["steady_dev"].mean(dim=["chain", "draw"]).to_series().T + ) + steady_dev.index = steady_dev.index.set_names("experiment", level=0) + steady_dev.index = steady_dev.index.set_names("metabolite", level=1) + balanced_metabolites = [m.id for m in mi.kinetic_model.mics if m.balanced] + steady_dev.index = steady_dev.index.set_levels( + [ + idata.posterior.experiments, + [ + m + for m in idata.posterior.mics.to_series() + if str(m) in balanced_metabolites + ], + ] + ) + print(steady_dev) + return output_path diff --git a/maud/data_model/maud_config.py b/maud/data_model/maud_config.py index 22a85052..b578493d 100644 --- a/maud/data_model/maud_config.py +++ b/maud/data_model/maud_config.py @@ -38,6 +38,8 @@ class MaudConfig(BaseModel): :param stanc_options: Options for CmdStanModel argument `stanc_options`. :param cpp_options: Options for CmdStanModel `cpp_options`. :param variational_options: Arguments for CmdStanModel.variational. + :param pathfinder_options: Arguments for CmdStanModel.pathfinder. + :param laplace_options: Arguments for CmdStanModel.laplace. :param optimize_options: Arguments for CmdStanModel.optimize. :param user_inits_file: path to a csv file of initial values. :param steady_state_threshold_abs: abs threshold for Sv=0 be at steady state @@ -60,6 +62,8 @@ class MaudConfig(BaseModel): stanc_options: Optional[dict] = None cpp_options: Optional[dict] = None variational_options: Optional[dict] = None + pathfinder_options: Optional[dict] = None + laplace_options: Optional[dict] = None optimize_options: Optional[dict] = None user_inits_file: Optional[str] = None ode_solver_config: ODESolverConfig = Field(default_factory=ODESolverConfig) diff --git a/maud/running_stan.py b/maud/running_stan.py index 0a051d57..b7ea04c3 100644 --- a/maud/running_stan.py +++ b/maud/running_stan.py @@ -7,8 +7,9 @@ import arviz as az import cmdstanpy -from cmdstanpy import CmdStanMLE +from cmdstanpy import CmdStanLaplace, CmdStanMLE from cmdstanpy.stanfit.mcmc import CmdStanMCMC +from cmdstanpy.stanfit.pathfinder import CmdStanPathfinder from cmdstanpy.stanfit.vb import CmdStanVB from maud.data_model.maud_input import MaudInput @@ -41,6 +42,8 @@ "output_samples": 10, "require_converged": True, } +DEFAULT_PATHFINDER_CONFIG = {"num_paths": 4, "draws": 10, "show_console": True} +DEFAULT_LAPLACE_CONFIG = {"draws": 10, "show_console": True} SIM_CONFIG = { "chains": 1, "fixed_param": True, @@ -140,6 +143,59 @@ def variational(mi: MaudInput, output_dir: str) -> CmdStanVB: ) +def pathfinder(mi: MaudInput, output_dir: str) -> CmdStanPathfinder: + """Run the pathfinder algorithm via cmdstanpy. + + :param mi: a MaudInput object + :param output_dir: a string specifying where to save the output. + """ + mi_options = ( + {} + if mi.config.pathfinder_options is None + else mi.config.pathfinder_options + ) + model = load_stan_model( + "model", mi.config.cpp_options, mi.config.stanc_options + ) + set_up_output_dir(output_dir, mi) + return model.pathfinder( + data=os.path.join(output_dir, "input_data_train.json"), + inits=os.path.join(output_dir, "inits.json"), + **{ + **DEFAULT_PATHFINDER_CONFIG, + **mi_options, + **{"output_dir": output_dir}, + }, + ) + + +def laplace(mi: MaudInput, output_dir: str) -> CmdStanLaplace: + """Run Laplace approximation. + + :param mi: a MaudInput object + :param output_dir: a string specifying where to save the output. + """ + mi_options_laplace = ( + {} if mi.config.laplace_options is None else mi.config.laplace_options + ) + mi_options_optimize = ( + {} if mi.config.optimize_options is None else mi.config.optimize_options + ) + fixed_args = {"output_dir": output_dir} + fixed_args_opt = {"inits": os.path.join(output_dir, "inits.json")} + laplace_args = DEFAULT_LAPLACE_CONFIG | mi_options_laplace | fixed_args + opt_args = DEFAULT_OPTIMIZE_CONFIG | mi_options_optimize | fixed_args_opt + model = load_stan_model( + "model", mi.config.cpp_options, mi.config.stanc_options + ) + set_up_output_dir(output_dir, mi) + return model.laplace_sample( + data=os.path.join(output_dir, "input_data_train.json"), + opt_args=opt_args, + **laplace_args, + ) + + def simulate(mi: MaudInput, output_dir: str, n: int) -> CmdStanMCMC: """Generate simulations from the prior mean. From 5ec4feb5c047ca57544b65d207939e35506424c6 Mon Sep 17 00:00:00 2001 From: teddygroves Date: Thu, 19 Oct 2023 15:25:32 +0200 Subject: [PATCH 2/8] Update docs --- docs/inputting.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/inputting.md b/docs/inputting.md index 6857064c..2cb90ec6 100644 --- a/docs/inputting.md +++ b/docs/inputting.md @@ -39,6 +39,9 @@ The following optional fields can also be specified: * `stanc_options` Table of valid choices for [CmdStanModel](https://cmdstanpy.readthedocs.io/en/v1.1.0/api.html#cmdstanpy.CmdStanModel) argument `stanc_options` * `cpp_options` Table of valid choices for [`CmdStanModel](https://cmdstanpy.readthedocs.io/en/v1.1.0/api.html#cmdstanpy.CmdStanModel) argument `cpp_options` * `variational_options` Arguments for the cmdstanpy method [`CmdStanModel.variational](https://cmdstanpy.readthedocs.io/en/v1.1.0/api.html#cmdstanpy.CmdStanModel.variational) +* `optimize_options` Arguments for the cmdstanpy method [`CmdStanModel.optimize](https://cmdstanpy.readthedocs.io/en/v1.1.0/api.html#cmdstanpy.CmdStanModel.optimize) +* `pathfinder_options` Arguments for the cmdstanpy method [`CmdStanModel.pathfinder](https://cmdstanpy.readthedocs.io/en/v1.1.0/api.html#cmdstanpy.CmdStanModel.pathfinder) +* `laplace_options` Arguments for the cmdstanpy method [`CmdStanModel.laplace_sample](https://cmdstanpy.readthedocs.io/en/v1.1.0/api.html#cmdstanpy.CmdStanModel.laplace_sample) * `user_inits_file` path to a toml file of initial values * `steady_state_threshold_abs` absolute threshold for Sv=0 be at steady state * `steady_state_threshold_rel` relative threshold for Sv=0 be at steady state From 106fcf546b108b5d1c909b50de80c2c3f3da6733 Mon Sep 17 00:00:00 2001 From: teddygroves Date: Thu, 19 Oct 2023 15:25:48 +0200 Subject: [PATCH 3/8] Update docstrings --- maud/cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/maud/cli.py b/maud/cli.py index d8562465..3a67b9c8 100644 --- a/maud/cli.py +++ b/maud/cli.py @@ -514,12 +514,12 @@ def do_variational(data_path, output_dir): type=click.Path(exists=True, dir_okay=True, file_okay=False), ) def pathfinder_command(data_path, output_dir): - """Generate pathfinder samples given a user input directory.""" + """Generate draws using the pathfinder algorithm.""" click.echo(do_pathfinder(data_path, output_dir)) def do_pathfinder(data_path, output_dir): - """Generate pathfinder samples given a user input directory.""" + """Generate draws using the pathfinder algorithm.""" mi = load_maud_input(data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output_pf-{mi.config.name}-{now}" @@ -542,12 +542,12 @@ def do_pathfinder(data_path, output_dir): type=click.Path(exists=True, dir_okay=True, file_okay=False), ) def laplace_command(data_path, output_dir): - """Do Laplace approximation.""" + """Generate approximate posterior draws using the Laplace method.""" click.echo(do_laplace(data_path, output_dir)) def do_laplace(data_path, output_dir): - """Do Laplace approximation.""" + """Generate approximate posterior draws using the Laplace method.""" mi = load_maud_input(data_path=data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output_opt-{mi.config.name}-{now}" From 59db57a4b279e50a95b901c33a7775663c80c9cd Mon Sep 17 00:00:00 2001 From: teddygroves Date: Thu, 19 Oct 2023 15:43:37 +0200 Subject: [PATCH 4/8] save pathfinder inits --- maud/cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/maud/cli.py b/maud/cli.py index 3a67b9c8..a9262358 100644 --- a/maud/cli.py +++ b/maud/cli.py @@ -531,7 +531,10 @@ def do_pathfinder(data_path, output_dir): os.mkdir(samples_path) print(f"Copying user input from {data_path} to {ui_dir}") shutil.copytree(data_path, ui_dir) - pathfinder(mi, samples_path) + pf = pathfinder(mi, samples_path) + inits_pf = pf.create_inits() + with open(os.path.join(samples_path, "inits_pathfinder.json", "w")) as f: + json.dump(inits_pf, f) return output_path From dd609ee4c2cc7010906fea7e1979ab80d99aeaa3 Mon Sep 17 00:00:00 2001 From: teddygroves Date: Thu, 19 Oct 2023 15:53:36 +0200 Subject: [PATCH 5/8] Save pathfinder inits properly --- maud/cli.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/maud/cli.py b/maud/cli.py index a9262358..6c3d3a43 100644 --- a/maud/cli.py +++ b/maud/cli.py @@ -24,6 +24,7 @@ import arviz as az import click import importlib_resources +from stanio import write_stan_json from maud.data.example_inputs import linear, methionine from maud.getting_idatas import get_idata @@ -533,8 +534,11 @@ def do_pathfinder(data_path, output_dir): shutil.copytree(data_path, ui_dir) pf = pathfinder(mi, samples_path) inits_pf = pf.create_inits() - with open(os.path.join(samples_path, "inits_pathfinder.json", "w")) as f: - json.dump(inits_pf, f) + for i, inits_dict in enumerate(inits_pf): + write_stan_json( + os.path.join(samples_path, f"inits_pathfinder-{str(i)}.json"), + inits_dict, + ) return output_path @@ -553,7 +557,7 @@ def do_laplace(data_path, output_dir): """Generate approximate posterior draws using the Laplace method.""" mi = load_maud_input(data_path=data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") - output_name = f"maud_output_opt-{mi.config.name}-{now}" + output_name = f"maud_output_laplace-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) samples_path = os.path.join(output_path, "samples") ui_dir = os.path.join(output_path, "user_input") From d0abfdd5c2976b4a9ec8575e4a520564206c8e48 Mon Sep 17 00:00:00 2001 From: teddygroves Date: Thu, 19 Oct 2023 20:30:02 +0200 Subject: [PATCH 6/8] Handle mode argument to laplace --- maud/running_stan.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/maud/running_stan.py b/maud/running_stan.py index b7ea04c3..5fbc4471 100644 --- a/maud/running_stan.py +++ b/maud/running_stan.py @@ -184,7 +184,15 @@ def laplace(mi: MaudInput, output_dir: str) -> CmdStanLaplace: fixed_args = {"output_dir": output_dir} fixed_args_opt = {"inits": os.path.join(output_dir, "inits.json")} laplace_args = DEFAULT_LAPLACE_CONFIG | mi_options_laplace | fixed_args - opt_args = DEFAULT_OPTIMIZE_CONFIG | mi_options_optimize | fixed_args_opt + if "mode" in laplace_args.keys(): + if laplace_args["mode"] is not None: + opt_args = None + else: + raise ValueError("'mode' must not be None!") + else: + opt_args = ( + DEFAULT_OPTIMIZE_CONFIG | mi_options_optimize | fixed_args_opt + ) model = load_stan_model( "model", mi.config.cpp_options, mi.config.stanc_options ) From 109f194f114594cf559c018c473ef18b46ebc9f7 Mon Sep 17 00:00:00 2001 From: teddygroves Date: Fri, 20 Oct 2023 13:13:44 +0200 Subject: [PATCH 7/8] fix bug in variational --- maud/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maud/cli.py b/maud/cli.py index 6c3d3a43..c55aa941 100644 --- a/maud/cli.py +++ b/maud/cli.py @@ -493,7 +493,7 @@ def do_variational(data_path, output_dir): of cmdstanpy's diagnose and summary methods. """ - mi = load_maud_input(data_path, mode="sample") + mi = load_maud_input(data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output_vi-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) From 81935a7e59c5f54639386604dbdae679e6a55031 Mon Sep 17 00:00:00 2001 From: teddygroves Date: Fri, 20 Oct 2023 13:22:21 +0200 Subject: [PATCH 8/8] gitignore some things --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 88028c94..e89cee6a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +maud/stan/model +maud/stan/out_of_sample_model +*.exe +*.hpp + build/ dist/ *__pycache__*