-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathQlearning.java
More file actions
125 lines (105 loc) · 3.74 KB
/
Qlearning.java
File metadata and controls
125 lines (105 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package com.masim.qlearning;
import java.util.Random;
import com.masim.emissionAgents.EmissionUnit;
public class Qlearning {
static double p = -2;
static double r = 3;
static double na = -1;
public static final int Q_SIZE = 5;
public static final double GAMMA = 0.8;
public static final int ITERATIONS = 30;
public static final int INITIAL_STATES[] = new int[] {1, 3, 2, 4, 0};
public static double[][] R = { { p, r, na, na, na },
{ p, p, r, na, na },
{ na, p, p, r, na },
{ na, na, p, p, r },
{ na, na, na, p, r } };
public static void train(EmissionUnit qla)
{
// initialize();
System.out.println(qla.getLocalName()+" :Debut");
// Perform training, starting at all initial states.
for(int j = 0; j < ITERATIONS; j++)
{
for(int i = 0; i < Q_SIZE; i++)
{
episode(INITIAL_STATES[i],qla);
} // i
} // j
System.out.println(qla.getLocalName()+" :Terminer");
}
private static void episode(final int initialState, EmissionUnit qla)
{
qla.currentState = initialState;
// Travel from state to state until goal state is reached.
do
{
chooseAnAction(qla);
}while(qla.currentState == 4);
// When currentState = 5, Run through the set once more for convergence.
for(int i = 0; i < Q_SIZE; i++)
{
chooseAnAction(qla);
}
return;
}
private static void chooseAnAction(EmissionUnit qla)
{
int possibleAction = 0;
// Randomly choose a possible action connected to the current state.
possibleAction = getRandomAction(Q_SIZE,qla);
if(R[qla.currentState][possibleAction] >= 0){
qla.q[qla.currentState][possibleAction] = reward(possibleAction,qla);
qla.currentState = possibleAction;
}
return;
}
private static int reward(final int Action, EmissionUnit qla)
{
return (int)(R[qla.currentState][Action] + (GAMMA * maximum(Action, false,qla)));
}
private static int maximum(final int State, final boolean ReturnIndexOnly, EmissionUnit qla)
{
// If ReturnIndexOnly = True, the Q matrix index is returned.
// If ReturnIndexOnly = False, the Q matrix value is returned.
int winner = 0;
boolean foundNewWinner = false;
boolean done = false;
while(!done)
{
foundNewWinner = false;
for(int i = 0; i < Q_SIZE; i++)
{
if(i != winner){ // Avoid self-comparison.
if(qla.q[State][i] > qla.q[State][winner]){
winner = i;
foundNewWinner = true;
}
}
}
if(foundNewWinner == false){
done = true;
}
}
if(ReturnIndexOnly == true){
return winner;
}else{
return qla.q[State][winner];
}
}
private static int getRandomAction(final int upperBound, EmissionUnit qla)
{
int action = 0;
boolean choiceIsValid = false;
// Randomly choose a possible action connected to the current state.
while(choiceIsValid == false)
{
// Get a random value between 0(inclusive) and 6(exclusive).
action = new Random().nextInt(upperBound);
if(R[qla.currentState][action] > -1){
choiceIsValid = true;
}
}
return action;
}
}