Skip to content

Commit ab45d0e

Browse files
committed
Smooth target env
1 parent 7d7ea7b commit ab45d0e

4 files changed

Lines changed: 22 additions & 15 deletions

File tree

pufferlib/config/ocean/target.ini

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ num_agents = 8
1010
num_goals = 4
1111

1212
[train]
13-
total_timesteps = 500_000_000
13+
total_timesteps = 100_000_000
1414
gamma = 0.99
15-
learning_rate = 0.0015
15+
learning_rate = 0.015
1616
minibatch_size = 32768
1717
ent_coef = 0.02
18+

pufferlib/ocean/target/target.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ int main() {
2222
.num_goals = 8
2323
};
2424
init(&env);
25-
env.observations = calloc(env.num_agents*(2*(env.num_agents + env.num_goals)+1), sizeof(float));
26-
env.actions = calloc(2*env.num_agents, sizeof(float));
25+
env.observations = calloc(env.num_agents*(2*(env.num_agents + env.num_goals)+4), sizeof(float));
26+
env.actions = calloc(2*env.num_agents, sizeof(int));
2727
env.rewards = calloc(env.num_agents, sizeof(float));
2828
env.terminals = calloc(env.num_agents, sizeof(unsigned char));
2929

@@ -34,7 +34,8 @@ int main() {
3434
env.actions[0] = 0;
3535
} else {
3636
for (int i=0; i<env.num_agents; i++) {
37-
env.actions[i] = (float)rand()/(float)RAND_MAX - 0.5f ;
37+
env.actions[2*i] = rand() % 9;
38+
env.actions[2*i + 1] = rand() % 5;
3839
}
3940
//forward_linearlstm(net, env.observations, env.actions);
4041
}

pufferlib/ocean/target/target.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <stdio.h>
55
#include "raylib.h"
66

7-
const float VELOCITY = 20.0f;
7+
const float MAX_SPEED = 20.0f;
88

99
typedef struct {
1010
float perf;
@@ -23,6 +23,7 @@ typedef struct {
2323
float x;
2424
float y;
2525
float heading;
26+
float speed;
2627
int ticks_since_reward;
2728
} Agent;
2829

@@ -37,7 +38,7 @@ typedef struct {
3738
Agent* agents;
3839
Goal* goals;
3940
float* observations;
40-
float* actions;
41+
int* actions;
4142
float* rewards;
4243
unsigned char* terminals;
4344
int width;
@@ -113,21 +114,29 @@ void c_step(Target* env) {
113114
env->rewards[i] = 0;
114115
Agent* agent = &env->agents[i];
115116
agent->ticks_since_reward += 1;
116-
agent->heading += env->actions[i];
117+
118+
agent->heading += ((float)env->actions[2*i] - 4.0f)/12.0f;
117119
if (agent->heading < 0) {
118120
agent->heading += 2*PI;
119121
} else if (agent->heading > 2*PI) {
120122
agent->heading -= 2*PI;
121123
}
122124

123-
agent->x += VELOCITY*cosf(agent->heading);
125+
agent->speed += 1.0f*((float)env->actions[2*i + 1] - 2.0f);
126+
if (agent->speed > MAX_SPEED) {
127+
agent->speed = MAX_SPEED;
128+
} else if (agent->speed < -MAX_SPEED) {
129+
agent->speed = -MAX_SPEED;
130+
}
131+
132+
agent->x += agent->speed*cosf(agent->heading);
124133
if (agent->x < 0) {
125134
agent->x = 0;
126135
} else if (agent->x > env->width) {
127136
agent->x = env->width;
128137
}
129138

130-
agent->y += VELOCITY*sinf(agent->heading);
139+
agent->y += agent->speed*sinf(agent->heading);
131140
if (agent->y < 0) {
132141
agent->y = 0;
133142
} else if (agent->y > env->height) {

pufferlib/ocean/target/target.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,13 @@ def __init__(self, num_envs=1, width=1080, height=720, num_agents=8,
2424
num_goals=8, render_mode=None, log_interval=128, size=11, buf=None, seed=0):
2525
self.single_observation_space = gymnasium.spaces.Box(low=0, high=1,
2626
shape=(2*(num_agents+num_goals) + 4,), dtype=np.float32)
27-
self.single_action_space = gymnasium.spaces.Box(
28-
low=-0.5, high=0.5, shape=(1,), dtype=np.float32)
29-
#self.single_action_space = gymnasium.spaces.Discrete(9)
27+
self.single_action_space = gymnasium.spaces.MultiDiscrete([9, 5])
3028

3129
self.render_mode = render_mode
3230
self.num_agents = num_envs*num_agents
3331
self.log_interval = log_interval
3432

3533
super().__init__(buf)
36-
#self.actions = self.actions.astype(np.float32)
3734
c_envs = []
3835
for i in range(num_envs):
3936
c_env = binding.env_init(
@@ -55,7 +52,6 @@ def reset(self, seed=None):
5552

5653
def step(self, actions):
5754
self.tick += 1
58-
#actions = (actions.astype(np.float32) - 4)/8
5955
self.actions[:] = actions
6056
binding.vec_step(self.c_envs)
6157

0 commit comments

Comments
 (0)