Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 55 additions & 8 deletions config/ocean/cpr.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,63 @@ vec = multiprocessing
rnn_name = Recurrent

[env]
num_envs = 256
num_envs = 512
vision = 3
widths = [32]
heights = [32]
num_agents = [8]
num_agents = [12]
report_interval=1
reward_food = 1.0
interactive_food_reward = 5.0
reward_move = -0.01
reward_food = 0.1
interactive_food_reward = 0.2
reward_move = +0.00
food_base_spawn_rate = 2e-3

[train]
total_timesteps = 100_000_000
num_envs = 1
num_workers = 1
total_timesteps = 60_000_000
device = cpu
batch_size = 32768
minibatch_size = 8192
bptt_horizon = 16
checkpoint_interval = 200
learning_rate = 0.0008524
gamma = 0.9989
gae_lambda = 0.99
vf_coef = 1
ent_coef = 0.01
adam_beta1 = 0.9
adam_beta2 = 0.999
adam_eps = 1e-12
max_grad_norm = 0.5
vf_clip_coef = 0.1
update_epochs = 1

[workspace]
name = boxingbytes
project = pufferai

[sweep.metric]
goal = maximize
name = score
min = -10
max = 10

[sweep.env.reward_food]
distribution = log_normal
min = 0.0001
max = 0.01
mean = 0.001
scale = auto

[sweep.env.interactive_food_reward]
distribution = log_normal
min = 0.0001
max = 0.02
mean = 0.002
scale = auto

[sweep.train.total_timesteps]
distribution = log_normal
min = 50e6
max = 75e6
mean = 60e6
scale = time
14 changes: 6 additions & 8 deletions pufferlib/ocean/cpr/cpr.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
#include <unistd.h>

int main() {
int width = 24;
int height = 24;
int width = 32;
int height = 32;

int render_cell_size = 32;

CCpr env = {
.num_agents = 8,
.num_agents = 1,
.width = width,
.height = height,
.vision = 2,
.vision = 3,
.reward_food = 1.0f,
.interactive_food_reward = 5.0f,
.food_base_spawn_rate = 2e-3,
Expand All @@ -21,9 +21,9 @@ int main() {
c_reset(&env);

Renderer *renderer = init_renderer(render_cell_size, width, height);

while (!WindowShouldClose()) {

c_render(renderer, &env);
int st = 0;
// User can take control of the first puffer
if (IsKeyDown(KEY_LEFT_SHIFT)) {
Expand All @@ -41,12 +41,10 @@ int main() {
sleep(2);
}
for (int i = st; i < env.num_agents; i++) {
env.actions[i] = rand() % 4;
env.actions[i] = rand() % 5;
// printf("Agent %d gets actions %d\n", i, env->actions[i]);
}
c_step(&env);

c_render(renderer, &env);
}
close_renderer(renderer);
free_CCpr(&env);
Expand Down
Loading
Loading