forked from younggyoseo/Ape-X
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.py
More file actions
120 lines (96 loc) · 3.74 KB
/
eval.py
File metadata and controls
120 lines (96 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
Module for evaluator in Ape-X.
"""
import _pickle as pickle
import os
from multiprocessing import Process, Queue
import zmq
import torch
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import utils
from wrapper import make_atari, wrap_atari_dqn
from model import DuelingDQN
from arguments import argparser
from datetime import datetime
def get_environ():
learner_ip = os.environ.get('LEARNER_IP', '-1')
assert learner_ip != '-1'
return learner_ip
def connect_param_socket(ctx, param_socket, learner_ip, actor_id):
socket = ctx.socket(zmq.REQ)
socket.connect("tcp://{}:52002".format(learner_ip))
socket.send(pickle.dumps((actor_id, 1)))
socket.recv()
param_socket.connect('tcp://{}:52001'.format(learner_ip))
socket.send(pickle.dumps((actor_id, 2)))
socket.recv()
print("Successfully connected to learner!")
socket.close()
def recv_param(learner_ip, actor_id, param_queue):
ctx = zmq.Context()
param_socket = ctx.socket(zmq.SUB)
param_socket.setsockopt(zmq.SUBSCRIBE, b'')
param_socket.setsockopt(zmq.CONFLATE, 1)
connect_param_socket(ctx, param_socket, learner_ip, actor_id)
while True:
data = param_socket.recv(copy=False)
param = pickle.loads(data)
param_queue.put(param)
def exploration(args, actor_id, param_queue):
writer = SummaryWriter(comment="-{}-eval".format(args.env))
args.clip_rewards = False
args.episode_life = False
env = make_atari(args.env)
env = wrap_atari_dqn(env, args)
seed = args.seed + actor_id
utils.set_global_seeds(seed, use_torch=True)
env.seed(seed)
model = DuelingDQN(env, args)
param = param_queue.get(block=True)
model.load_state_dict(param)
param = None
print("Received First Parameter!")
episode_reward, episode_length, episode_idx = 0, 0, 0
state = env.reset()
tb_dict = {k: [] for k in ['episode_reward', 'episode_length']}
while True:
action, _ = model.act(torch.FloatTensor(np.array(state)), 0.)
next_state, reward, done, _ = env.step(action)
state = next_state
episode_reward += reward
episode_length += 1
if done or episode_length == args.max_episode_length:
state = env.reset()
tb_dict["episode_reward"].append(episode_reward)
tb_dict["episode_length"].append(episode_length)
episode_reward = 0
episode_length = 0
episode_idx += 1
param = param_queue.get()
model.load_state_dict(param)
print(f"{datetime.now()} Updated Parameter..")
if (episode_idx * args.num_envs_per_worker) % args.tb_interval == 0:
writer.add_scalar('evaluator/episode_reward_mean', np.mean(tb_dict['episode_reward']), episode_idx)
writer.add_scalar('evaluator/episode_reward_max', np.max(tb_dict['episode_reward']), episode_idx)
writer.add_scalar('evaluator/episode_reward_min', np.min(tb_dict['episode_reward']), episode_idx)
writer.add_scalar('evaluator/episode_reward_std', np.std(tb_dict['episode_reward']), episode_idx)
writer.add_scalar('evaluator/episode_length_mean', np.mean(tb_dict['episode_length']), episode_idx)
tb_dict['episode_reward'].clear()
tb_dict['episode_length'].clear()
def main():
learner_ip = get_environ()
args = argparser()
param_queue = Queue(maxsize=3)
procs = [
Process(target=exploration, args=(args, -1, param_queue)),
Process(target=recv_param, args=(learner_ip, -1, param_queue)),
]
for p in procs:
p.start()
for p in procs:
p.join()
return True
if __name__ == '__main__':
os.environ["OMP_NUM_THREADS"] = "1"
main()