-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathaux.py
More file actions
43 lines (38 loc) · 1.14 KB
/
aux.py
File metadata and controls
43 lines (38 loc) · 1.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Rewards function and such
import numpy
# Info given:
# {'coins': 0, 'flag_get': False, 'life': 2, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40}
def reward(ninfo, pinfo):
# coins : N/A
# flag : endgame, big plus
# life : >0, big minus
# score : N/A
# stage : N/A
# status : N/A
# time : lose points for losing time, minus
# world : N/A
# x_pos : move right, plus
flag = 0
if ninfo['flag_get']:
flag = 15
if ninfo['life'] != pinfo['life']:
return -15
return ((ninfo['time']-pinfo['time']) + 0.5*(ninfo['x_pos']-pinfo['x_pos']) + flag)
# 0.95 discount rate
def discountrewards(rewards):
discrewards = numpy.empty(len(rewards))
cumreward = 0
discrate = 0.95
for i in reversed(range(len(rewards))):
cumreward = rewards[i] + (cumreward * discrate)
discrewards[i] = cumreward
return discrewards
def discnormrewards(allrewards):
alldiscrewards = []
for rewards in allrewards:
alldiscrewards.append(discountrewards(rewards))
fullrewards = numpy.concatenate(alldiscrewards)
rmean = fullrewards.mean()
rstd = fullrewards.std()
return [(discrewards - rmean)/rstd
for discrewards in alldiscrewards]