PPO/main.py at main · wenming-ma/PPO · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import ast
import torch
from constants import FINAL
from ppo import ProximalPolicyOptimization as PPO
from utils_main import get_unique_save_path, save_ppo, get_argparser, copy_config_to_results_folder

# Path to directory where to save all data to be saved after training
save_dir = './train_results/' + get_unique_save_path() + '/'

# Create parser for parsing command line arguments entered by user
parser = get_argparser(save_dir=save_dir)

# Parse arguments provided via command line
args = parser.parse_args()


def main(args):

    print('Args:\n', args)

    # Load configurations from file
    file = open(args.config_path, 'r').read()
    config = ast.literal_eval(file)

    # Print config for feedback purposes
    print('Config:\n', config)

    if not args.demo_path:
        # Training mode

        # Make a copy of the config file for documentation purposes
        copy_config_to_results_folder(src_path=args.config_path, save_path=save_dir)

        # Set up PPO agent as specified in configurations file
        ppo = PPO(**config)

        try:
            # Train the PPO agent
            ppo.learn()
        except KeyboardInterrupt:
            # In case of keyboard interrupt, don't discard full test run so far, but make final eval and save outcome so far
            ppo.eval_and_log(eval_type=FINAL)

        # Save as requested
        save_ppo(ppo=ppo, args=args, save_dir=save_dir, config=config)

    else:
        # Demo mode - Replay trained agent

        print('Demo mode. Model used for running a performance demonstration:', args.demo_path)

        # Set up PPO agent as specified in configurations file
        ppo = PPO(**config)
        # Load trained model
        ppo.load(args.demo_path)
        # Do the visual evaluation
        ppo.eval(time_steps=5000, render=True)

    print('Done.')

    # Clean up cuda session
    torch.cuda.empty_cache()


if __name__ == "__main__":
    main(args)