-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_experiment.py
More file actions
134 lines (110 loc) · 4.7 KB
/
run_experiment.py
File metadata and controls
134 lines (110 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import click
import sys
import os
import json
import warnings
from subprocess import check_call
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
import yaml
import logging
from pathlib import Path
from typing import Dict, Any
warnings.filterwarnings("ignore")
PYTHON = sys.executable
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_config(config_path: str) -> Dict[str, Any]:
with open(config_path, "r") as f:
return yaml.safe_load(f)
def format_lr(lr):
return f"{lr:.0e}".replace("e-0", "e-").replace("e+0", "e")
def launch_experiment(experiment_config: Dict[str, Any]) -> Dict[str, Any]:
cmd = [
PYTHON,
"cloud_shadows_segmentation/train.py",
f"--data_dir={experiment_config['data_dir']}",
f"--model_name={experiment_config['model_name']}",
f"--run_dir={experiment_config['run_dir']}",
f"--batch_size={experiment_config['batch_size']}",
f"--n_workers={experiment_config['n_workers']}",
f"--lr={experiment_config['lr']}",
f"--in_dim={experiment_config['in_dim']}",
f"--fold={experiment_config['fold']}",
f"--norm_type={experiment_config['norm']}",
f"--hidden_dims={experiment_config['hidden_dims']}",
]
if experiment_config["weighted"]:
cmd.append("--weighted")
if experiment_config["pretrained"]:
cmd.append("--pretrained")
if experiment_config["finetune"]:
cmd.append("--finetune")
if experiment_config["use_amp"]:
cmd.append("--use_amp")
logger.info(f"Starting experiment with command: {' '.join(cmd)}")
check_call(cmd)
# Load and return results
lr_str = format_lr(experiment_config["lr"])
job_name = f"{experiment_config['model_name']}_lr{lr_str}_{experiment_config['norm']}_w{experiment_config['weighted']}_f{experiment_config['fold']}"
directory = Path(experiment_config["run_dir"]) / job_name
with open(Path(directory, "metrics_test.json"), "r") as f:
results = json.load(f)
return {**experiment_config, **results}
def resume_experiment(experiment_config: Dict[str, Any]) -> Dict[str, Any]:
lr_str = format_lr(experiment_config["lr"])
job_name = f"{experiment_config['model_name']}_lr{lr_str}_{experiment_config['norm']}_w{experiment_config['weighted']}_f{experiment_config['fold']}"
directory = Path(experiment_config["run_dir"]) / job_name
pth_test = Path(directory, "metrics_test.json")
pth_val = Path(directory, "metrics_val.json")
if os.path.isfile(pth_test):
experiment_config["pretrained"] = True
logger.info(f"Experiment {job_name} already completed. Skipping.")
with open(pth_test, "r") as f:
results = json.load(f)
return {**experiment_config, **results}
elif os.path.isfile(pth_val):
logger.info(f"Resuming experiment {job_name} from checkpoint.")
experiment_config["pretrained"] = True
else:
logger.info(f"Starting new experiment {job_name}.")
experiment_config["pretrained"] = False
return launch_experiment(experiment_config)
@click.command()
@click.option(
"--config", default="experiments_config.yaml", help="Path to experiment configuration file"
)
def run_cli(config):
"""Main function to run experiments for the HCSR algorithms."""
cfg = load_config(config)
ft = cfg["finetune"]
experiments = []
for model_name in cfg["model_names"]:
for norm in cfg["norm_types"]:
for weighted in cfg["weights"]:
for lr in cfg["lrs"]:
for fold in range(cfg["n_folds"]):
exp_config = {
"model_name": model_name,
"norm": norm,
"weighted": weighted,
"lr": float(lr),
"fold": fold,
"finetune": ft,
**cfg["fixed_params"],
}
experiments.append(exp_config)
logger.info(f"Launching {len(experiments)} experiments")
if cfg["n_parallel_training"] > 1:
with ProcessPoolExecutor(max_workers=cfg.get("n_parallel_training", 1)) as executor:
results = list(executor.map(resume_experiment, experiments))
else:
results = list(map(resume_experiment, experiments))
results_df = pd.DataFrame(results)
exp_pth = cfg["fixed_params"]["run_dir"]
results_df.to_csv(f"{exp_pth}/experiment_results.csv", index=False)
logger.info("All experiments completed. Results saved.")
logger.info(f"Best experiment: {results_df.loc[results_df['f1'].idxmax()].to_dict()}")
if __name__ == "__main__":
run_cli()