MASAP/Qlearning.java at master · SabriGhazi/MASAP · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package com.masim.qlearning;

import java.util.Random;

import com.masim.emissionAgents.EmissionUnit;

public class Qlearning {

	static double p = -2;
	static double r = 3;
	static double na = -1;

		public static final int Q_SIZE = 5;
	    public static final double GAMMA = 0.8;
	    public static final int ITERATIONS = 30;
	    public static final int INITIAL_STATES[] = new int[] {1, 3, 2, 4, 0};

	public static double[][] R = { { p, r, na, na, na },
								{ p, p, r, na, na },
								{ na, p, p, r, na },
								{ na, na, p, p, r },
								{ na, na, na, p, r } };


	public static void train(EmissionUnit qla)
	    {
	       // initialize();
		 System.out.println(qla.getLocalName()+" :Debut");
	        // Perform training, starting at all initial states.
	        for(int j = 0; j < ITERATIONS; j++)
	        {
	            for(int i = 0; i < Q_SIZE; i++)
	            {
	                episode(INITIAL_STATES[i],qla);
	            } // i
	        } // j
	        System.out.println(qla.getLocalName()+" :Terminer");
	    }
	 private static void episode(final int initialState, EmissionUnit qla)
	    {
		 	qla.currentState = initialState;

	        // Travel from state to state until goal state is reached.
	        do
	        {
	            chooseAnAction(qla);
	        }while(qla.currentState == 4);

	        // When currentState = 5, Run through the set once more for convergence.
	        for(int i = 0; i < Q_SIZE; i++)
	        {
	            chooseAnAction(qla);
	        }
	        return;
	    }
	 private static void chooseAnAction(EmissionUnit qla)
	    {
	        int possibleAction = 0;

	        // Randomly choose a possible action connected to the current state.
	        possibleAction = getRandomAction(Q_SIZE,qla);

	        if(R[qla.currentState][possibleAction] >= 0){
	        	qla.q[qla.currentState][possibleAction] = reward(possibleAction,qla);
	        	qla.currentState = possibleAction;
	        }
	        return;
	    }

	 private static int reward(final int Action, EmissionUnit qla)
	    {
	        return (int)(R[qla.currentState][Action] + (GAMMA * maximum(Action, false,qla)));
	    }

	 private static int maximum(final int State, final boolean ReturnIndexOnly, EmissionUnit qla)
	    {
	        // If ReturnIndexOnly = True, the Q matrix index is returned.
	        // If ReturnIndexOnly = False, the Q matrix value is returned.
	        int winner = 0;
	        boolean foundNewWinner = false;
	        boolean done = false;

	        while(!done)
	        {
	            foundNewWinner = false;
	            for(int i = 0; i < Q_SIZE; i++)
	            {
	                if(i != winner){             // Avoid self-comparison.
	                    if(qla.q[State][i] > qla.q[State][winner]){
	                        winner = i;
	                        foundNewWinner = true;
	                    }
	                }
	            }

	            if(foundNewWinner == false){
	                done = true;
	            }
	        }

	        if(ReturnIndexOnly == true){
	            return winner;
	        }else{
	            return qla.q[State][winner];
	        }
	    }

	  private static int getRandomAction(final int upperBound, EmissionUnit qla)
	    {
	        int action = 0;
	        boolean choiceIsValid = false;

	        // Randomly choose a possible action connected to the current state.
	        while(choiceIsValid == false)
	        {
	            // Get a random value between 0(inclusive) and 6(exclusive).
	            action = new Random().nextInt(upperBound);
	            if(R[qla.currentState][action] > -1){
	                choiceIsValid = true;
	            }
	        }

	        return action;
	    }
}