-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathntest.py
More file actions
54 lines (50 loc) · 1.79 KB
/
ntest.py
File metadata and controls
54 lines (50 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#
# _ _
# |\| _ _|_ _ / \ _ ___|_ | _
# | |(_) |__> \_/\_/(/_ | | | (_)\^/
#
# ntest.py created December 1st 2020
# by richard juan (contact@richardjuan-business.com)
#
# ----------------------------------------------
#
# using Deep Q Learning to solve MCDO
#
# ----------------------------------------------
from mcdo import MacDo, Agent, plotLearning
import numpy as np
if __name__ == '__main__':
env = MacDo()
brain = Agent(gamma=0.75, epsilon=0.5, batch_size=16, n_actions=30,
input_dims=[16], alpha=0.03)
scores = []
eps_history = []
num_games = 800
score = 0
for i in range(num_games):
if i % 10 == 0 and i > 0:
avg_score = np.mean(scores[max(0, i-10):(i+1)])
print('episode: ', i,'score: ', score,
' average score %.3f' % avg_score,
'epsilon %.3f' % brain.EPSILON)
else:
print('episode: ', i,'score: ', score)
eps_history.append(brain.EPSILON)
done = False
observation = env.reset()
score = 0
while not done:
action = brain.chooseAction(observation)
observation_, reward, done, info = env.step(action)
score += reward
brain.storeTransition(observation, action, reward, observation_,
done)
observation = observation_
brain.learn()
# uncomment to render while running
#env.render()
scores.append(score)
x = [i+1 for i in range(num_games)]
filename = str(num_games) + 'Games' + 'Gamma' + str(brain.GAMMA) + \
'Alpha' + str(brain.ALPHA) + 'Memory' +'.png'
plotLearning(x, scores, eps_history, filename)