-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
88 lines (73 loc) · 2.74 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import logging
import hydra
import mlflow
from dataclasses import asdict
from hydra.utils import instantiate
from tqdm import tqdm
from agents import get_agent
from config import NeurioConfig, flatten
from environment import get_env_info, get_multiprocess_environment
from game_loop import GameLoop
from reward_trackers import (
BestReward,
MovingAvgReward,
MovingMaxReward,
MovingMinReward,
SumReward,
)
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
@hydra.main(version_base="1.3", config_path="../configs", config_name="neurio_config")
def main(config: NeurioConfig) -> float:
config = instantiate(config)
study_name = f"Neurio-lev-{config.level}"
if (experiment := mlflow.get_experiment_by_name(study_name)) is not None:
exp_id = experiment.experiment_id
else:
exp_id = mlflow.create_experiment(study_name)
mlflow.set_tracking_uri(f"file://{hydra.utils.get_original_cwd()}/mlruns")
with mlflow.start_run(experiment_id=exp_id):
mlflow.log_params(flatten(asdict(config)))
level = config.level
# Create env
train_env = get_multiprocess_environment(
config.num_workers,
config=config.environment,
level=level,
render_mode="human" if config.render else None,
)
# Set up agent
agent = get_agent(config.agent, get_env_info(train_env))
# Run game loop
log.info(f"Train on level {level}")
max_reward = MovingMaxReward(history_size=500)
min_reward = MovingMinReward(history_size=500)
avg_rewards = {}
for hist_size in [100, 200, 500, 1000, 2000, 5000, 10000, None]:
if hist_size is not None:
name = f"avg_reward_{hist_size:05d}"
else:
name = "avg_reward"
avg_rewards[name] = MovingAvgReward(history_size=hist_size)
sum_avg_rewards = SumReward(list(avg_rewards.values()))
best_sum_avg_rewards = BestReward(sum_avg_rewards)
loop = GameLoop(
train_env,
agent,
reward_trackers={
"min_reward": min_reward,
"max_reward": max_reward,
**avg_rewards,
"sum_avg_rewards": sum_avg_rewards,
"best_sum_avg_rewards": best_sum_avg_rewards,
},
)
log.info(f"Run {config.num_iters} iters.")
for train_iter in tqdm(range(config.num_iters)):
loop.run_train_iter(config.steps_per_iter)
if train_iter % config.save_frequency == 0:
agent.save(train_iter)
agent.save(config.num_iters)
return best_sum_avg_rewards.get_value()
if __name__ == "__main__":
main() # type: ignore