forked from Bick95/PPO
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
66 lines (46 loc) · 1.89 KB
/
main.py
File metadata and controls
66 lines (46 loc) · 1.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import ast
import torch
from constants import FINAL
from ppo import ProximalPolicyOptimization as PPO
from utils_main import get_unique_save_path, save_ppo, get_argparser, copy_config_to_results_folder
# Path to directory where to save all data to be saved after training
save_dir = './train_results/' + get_unique_save_path() + '/'
# Create parser for parsing command line arguments entered by user
parser = get_argparser(save_dir=save_dir)
# Parse arguments provided via command line
args = parser.parse_args()
def main(args):
print('Args:\n', args)
# Load configurations from file
file = open(args.config_path, 'r').read()
config = ast.literal_eval(file)
# Print config for feedback purposes
print('Config:\n', config)
if not args.demo_path:
# Training mode
# Make a copy of the config file for documentation purposes
copy_config_to_results_folder(src_path=args.config_path, save_path=save_dir)
# Set up PPO agent as specified in configurations file
ppo = PPO(**config)
try:
# Train the PPO agent
ppo.learn()
except KeyboardInterrupt:
# In case of keyboard interrupt, don't discard full test run so far, but make final eval and save outcome so far
ppo.eval_and_log(eval_type=FINAL)
# Save as requested
save_ppo(ppo=ppo, args=args, save_dir=save_dir, config=config)
else:
# Demo mode - Replay trained agent
print('Demo mode. Model used for running a performance demonstration:', args.demo_path)
# Set up PPO agent as specified in configurations file
ppo = PPO(**config)
# Load trained model
ppo.load(args.demo_path)
# Do the visual evaluation
ppo.eval(time_steps=5000, render=True)
print('Done.')
# Clean up cuda session
torch.cuda.empty_cache()
if __name__ == "__main__":
main(args)