-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathDQN_Hops_RewardsTesting.py
116 lines (102 loc) · 3.39 KB
/
DQN_Hops_RewardsTesting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from collections import deque
import time
import random
import os
import pickle
from os import path
REPLAYMEM = "replay.dat"
VARIABLES = "variable.dat"
NACTIONS = 4
GRIDSIZE = 8
NROUTERS = GRIDSIZE * GRIDSIZE
NSTATES = 2 * NROUTERS # Number of states - 32
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def oneHotEncode(my_id, dest_id):
result = []
my_id_vec = [0] * NROUTERS
dest_id_vec = [0] * NROUTERS
my_id_vec[my_id] = 1
dest_id_vec[dest_id] = 1
result.extend(my_id_vec)
result.extend(dest_id_vec)
return np.array(result)
def vectorize(my_id, dest_id, free_vcs):
result = [my_id, dest_id]
result.extend(free_vcs)
return np.array(result)
def getFutureDistance(my_id, dest_id):
my_x = my_id % GRIDSIZE
my_y = my_id // GRIDSIZE
dest_x = dest_id % GRIDSIZE
dest_y = dest_id // GRIDSIZE
dist = abs(my_x - dest_x) + abs(my_y - dest_y)
return dist
# Queries main network for Q values given current observation space (environment state)
def get_qs(model, my_id, dest_id, free_vcs, past_dist):
# model = tf.keras.models.load_model('./saved_model')
# state = oneHotEncode(my_id, dest_id)
state = oneHotEncode(my_id, dest_id)
actions = model.predict(np.array(state).reshape(-1, 128))
optimal_action = np.argmax(actions)
return model, optimal_action
if __name__ == "__main__":
print('Starting testing!')
tm = tf.keras.models.load_model('./saved_target_model')
legal_actions = 0
total_actions = 0
while(1):
while(1):
# print("Waiting for file: Python")
if(path.exists("input.txt")):
break
my_id = -1
dest_id = 0
free_vcs = []
past_dist = 0
while(True):
#print("here")
with open('input.txt','r') as f:
lines = f.readlines()
if(len(lines) == 7):
break
my_id = int(lines[0])
dest_id = int(lines[1])
free_vcs.append(int(lines[2]))
free_vcs.append(int(lines[3]))
free_vcs.append(int(lines[4]))
free_vcs.append(int(lines[5]))
past_dist = int(lines[6])
os.remove("input.txt")
tm, action = get_qs(tm, my_id, dest_id, free_vcs, past_dist)
total_actions += 1
print('Action taken:', action)
my_x = my_id % GRIDSIZE
my_y = my_id // GRIDSIZE
while True:
if(action == 0 and my_y < GRIDSIZE-1):
legal_actions += 1
break
elif(action == 1 and my_x < GRIDSIZE-1):
legal_actions += 1
break
elif(action == 2 and my_y>0):
legal_actions += 1
break
elif(action == 3 and my_x>0):
legal_actions += 1
break
elif(my_id == dest_id):
break
else:
action = np.random.randint(0, NACTIONS);
f = open("action.txt","w")
f.write(str(action))
f.close()
print('Total actions: ', total_actions)
print('Legal actions: ', legal_actions)