DeepQLearning_logistic_solving/ntest.py at main · NotsOverflow/DeepQLearning_logistic_solving · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#
#              _            _
# |\| _ _|_ _ / \    _  ___|_ |  _
# | |(_) |__> \_/\_/(/_ |  |  | (_)\^/
#
# ntest.py created December 1st 2020
# by richard juan (contact@richardjuan-business.com)
#
# ----------------------------------------------
#
# using Deep Q Learning to solve MCDO
#
# ----------------------------------------------

from mcdo import MacDo, Agent, plotLearning
import numpy as np

if __name__ == '__main__':
    env = MacDo()
    brain = Agent(gamma=0.75, epsilon=0.5, batch_size=16, n_actions=30,
                  input_dims=[16], alpha=0.03)

    scores = []
    eps_history = []
    num_games = 800
    score = 0
    for i in range(num_games):
        if i % 10 == 0 and i > 0:
            avg_score = np.mean(scores[max(0, i-10):(i+1)])
            print('episode: ', i,'score: ', score,
                 ' average score %.3f' % avg_score,
                'epsilon %.3f' % brain.EPSILON)
        else:
            print('episode: ', i,'score: ', score)
        eps_history.append(brain.EPSILON)
        done = False
        observation = env.reset()
        score = 0
        while not done:
            action = brain.chooseAction(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            brain.storeTransition(observation, action, reward, observation_,
                                  done)
            observation = observation_
            brain.learn()
            # uncomment to render while running
            #env.render()
        scores.append(score)

    x = [i+1 for i in range(num_games)]
    filename = str(num_games) + 'Games' + 'Gamma' + str(brain.GAMMA) + \
               'Alpha' + str(brain.ALPHA) + 'Memory'  +'.png'
    plotLearning(x, scores, eps_history, filename)