fix done flag

bob7783 · bob7783 · commit ef3312d45ac9 · 2020-02-03T18:00:22.000-05:00
diff --git a/tf2.0/rl_trader.py b/tf2.0/rl_trader.py
@@ -288,11 +288,7 @@ def replay(self, batch_size=32):
     done = minibatch['d']
 
     # Calculate the tentative target: Q(s',a)
-    target = rewards + self.gamma * np.amax(self.model.predict(next_states), axis=1)
-
-    # The value of terminal states is zero
-    # so set the target to be the reward only
-    target[done] = rewards[done]
+    target = rewards + (1 - done) * self.gamma * np.amax(self.model.predict(next_states), axis=1)
 
     # With the Keras API, the target (usually) must have the same
     # shape as the predictions.