We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ef90832 commit ef3312dCopy full SHA for ef3312d
tf2.0/rl_trader.py
@@ -288,11 +288,7 @@ def replay(self, batch_size=32):
288
done = minibatch['d']
289
290
# Calculate the tentative target: Q(s',a)
291
- target = rewards + self.gamma * np.amax(self.model.predict(next_states), axis=1)
292
-
293
- # The value of terminal states is zero
294
- # so set the target to be the reward only
295
- target[done] = rewards[done]
+ target = rewards + (1 - done) * self.gamma * np.amax(self.model.predict(next_states), axis=1)
296
297
# With the Keras API, the target (usually) must have the same
298
# shape as the predictions.
0 commit comments