Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 46 additions & 48 deletions config/boids.ini
Original file line number Diff line number Diff line change
@@ -1,68 +1,66 @@
[base]
package = ocean
env_name = boids
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 64
num_boids = 64
num_agents = 64
; num_envs = 1
; num_boids = 1
margin_turn_factor = 0.0
centering_factor = 0.00
avoid_factor = 1.00
matching_factor = 1.00
; num_agents = 5
report_interval = 1
; margin_turn_factor = 1.0
; cohesion_factor = 0.0048
; separation_factor = 0.0128
; alignment_factor = 0.02
margin_turn_factor = 1.0
cohesion_factor = 0.001
separation_factor = 0.0
alignment_factor = 0.0

[vec]
num_workers = 2
num_envs = 2
batch_size = auto
total_agents = 4096
num_buffers = 8
num_threads = 8

[train]
total_timesteps = 100_000_000
gamma = 0.95
learning_rate = 0.025
minibatch_size = 16384
; minibatch_size = 1

; [sweep]
; method = protein
; metric = episode_length
[sweep]
method = Protein
metric = perf
sweep_only = margin_turn_factor, cohesion_factor, separation_factor, alignment_factor

; [sweep.train.total_timesteps]
; distribution = log_normal
; min = 1e6
; max = 1e7
; mean = 5e6
; scale = 0.5
[sweep.train.total_timesteps]
distribution = log_normal
min = 1e3
max = 1e7
scale = time

; [sweep.train.gamma]
; distribution = log_normal
; min = 0.9
; max = 0.999
; mean = 0.97
[sweep.env.margin_turn_factor]
distribution = log_normal
min = 0.01
max = 5.0
scale = auto

; [sweep.train.gae_lambda]
; distribution = log_normal
; min = 0.7
; max = 0.999
; mean = 0.95

; [sweep.train.learning_rate]
; distribution = log_normal
; min = 0.0001
; max = 0.001
; mean = 0.00025
; scale = 0.5

; [sweep.train.batch_size]
; min = 32768
; max = 131072
; mean = 65536
; scale = 0.5

; [sweep.train.minibatch_size]
; min = 512
; max = 2048
; mean = 1024
; scale = 0.5
[sweep.env.cohesion_factor]
distribution = log_normal
min = 0.01
max = 1
scale = auto

[sweep.env.separation_factor]
distribution = log_normal
min = 0.01
max = 1
scale = auto

[sweep.env.alignment_factor]
distribution = log_normal
min = 0.01
max = 1
scale = auto
35 changes: 19 additions & 16 deletions ocean/boids/binding.c
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
#include "boids.h"
#define OBS_SIZE 512 // 64 boids * 8 obs per boid
#define NUM_ATNS 2 // Two discrete actions per boid
#define ACT_SIZES {3, 3}
#define OBS_TENSOR_T FloatTensor

#define Env Boids
#include "../env_binding.h"
#include "vecenv.h"

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->num_boids = unpack(kwargs, "num_boids");
env->report_interval = unpack(kwargs, "report_interval");
env->margin_turn_factor = unpack(kwargs, "margin_turn_factor");
env->centering_factor = unpack(kwargs, "centering_factor");
env->avoid_factor = unpack(kwargs, "avoid_factor");
env->matching_factor = unpack(kwargs, "matching_factor");
void my_init(Env* env, Dict* kwargs) {
env->num_agents = (unsigned int)dict_get(kwargs, "num_agents")->value;
env->report_interval = (unsigned)dict_get(kwargs, "report_interval")->value;
env->margin_turn_factor = (float)dict_get(kwargs, "margin_turn_factor")->value;
env->cohesion_factor = (float)dict_get(kwargs, "cohesion_factor")->value;
env->separation_factor = (float)dict_get(kwargs, "separation_factor")->value;
env->alignment_factor = (float)dict_get(kwargs, "alignment_factor")->value;
init(env);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "n", log->n);
return 0;
void my_log(Log* log, Dict* out) {
dict_set(out, "score", log->score);
dict_set(out, "margin_turn_reward", log->t_margin_turn_reward);
dict_set(out, "cohesion_reward", log->t_cohesion_reward);
dict_set(out, "separation_reward", log->t_separation_reward);
dict_set(out, "alignment_reward", log->t_alignment_reward);
dict_set(out, "n", log->n);
}
49 changes: 28 additions & 21 deletions ocean/boids/boids.c
Original file line number Diff line number Diff line change
@@ -1,40 +1,43 @@
// Standalone C demo for Boids environment
// Compile using: ./scripts/build_ocean.sh boids [local|fast]
// Compile using: ./scripts/build.sh boids [local|fast]
// Run with: ./boids

#include <time.h>
#include "boids.h"
#include <stdlib.h>

// --- Demo Configuration ---
#define NUM_BOIDS_DEMO 20 // Number of boids for the standalone demo
#define MAX_STEPS_DEMO 500 // Max steps per episode in the demo
#define num_agents_DEMO 32 // Number of boids for the standalone demo
#define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
#define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
#define ACTION_SCALE 3.0f // Corresponds to action space [-3.0, 3.0]

// Dummy action generation: random velocity changes for each boid
void generate_dummy_actions(Boids* env) {
for (unsigned int i = 0; i < env->num_boids; ++i) {
// Generate random floats in [-1, 1] range
float rand_vx = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
float rand_vy = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;

// Scale to the action space [-ACTION_SCALE, ACTION_SCALE]
env->actions[i * 2 + 0] = rand_vx * ACTION_SCALE;
env->actions[i * 2 + 1] = rand_vy * ACTION_SCALE;
for (unsigned int i = 0; i < env->num_agents; ++i) {
env->actions[i * 2] = (float)(rand() % 3);
env->actions[i * 2 + 1] = (float)(rand() % 3);
}
}

void apply_manual_actions(Boids* env) {
float mouse_x = (float)GetMouseX();
float mouse_y = (float)GetMouseY();
for (unsigned int i = 0; i < env->num_agents; ++i) {
env->actions[i * 2] = mouse_x - env->boids[i].x;
env->actions[i * 2 + 1] = mouse_y - env->boids[i].y;
}
}

void demo() {
// Initialize Boids environment struct
Boids env = {0};
env.num_boids = NUM_BOIDS_DEMO;
env.num_agents = num_agents_DEMO;
env.report_interval = REPORT_INTERVAL_DEMO;

// In the Python binding, these pointers are assigned from NumPy arrays.
// Here, we need to allocate them explicitly.
size_t obs_size = env.num_boids * 4; // num_boids * (x, y, vx, vy)
size_t act_size = env.num_boids * 2; // num_boids * (dvx, dvy)
size_t obs_size = env.num_agents * env.num_agents * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
size_t act_size = env.num_agents * 2; // the 2 = (dvx, dvy)
env.observations = (float*)calloc(obs_size, sizeof(float));
env.actions = (float*)calloc(act_size, sizeof(float));
env.rewards = (float*)calloc(env.num_boids, sizeof(float)); // Env-level reward
env.rewards = (float*)calloc(env.num_agents, sizeof(float)); // Env-level reward

if (!env.observations || !env.actions || !env.rewards) {
fprintf(stderr, "ERROR: Failed to allocate memory for demo buffers.\n");
Expand All @@ -57,10 +60,14 @@ void demo() {
c_reset(&env);
int total_steps = 0;

printf("Starting Boids demo with %d boids. Press ESC to exit.\n", env.num_boids);
printf("Starting Boids demo with %u boids. Press ESC to exit. Hold SHIFT + arrows/WASD to steer.\n", env.num_agents);

while (!WindowShouldClose() && total_steps < MAX_STEPS_DEMO) { // Raylib function to check if ESC is pressed or window closed
generate_dummy_actions(&env);
if (IsKeyDown(KEY_LEFT_SHIFT)) {
apply_manual_actions(&env);
} else {
generate_dummy_actions(&env);
}
c_step(&env);
c_render(&env);
total_steps++;
Expand Down
Loading
Loading