PufferAI · jsuarez5341 · May 14, 2025 · May 8, 2025 · May 9, 2025 · May 11, 2025
diff --git a/pufferlib/config/ocean/gpudrive.ini b/pufferlib/config/ocean/gpudrive.ini
@@ -3,11 +3,10 @@ package = ocean
 env_name = puffer_gpudrive
 policy_name = GPUDrive
 rnn_name = Recurrent
-
 [vec]
-num_workers = 16
-num_envs = 16
-batch_size = 8
+num_workers = 8
+num_envs = 8
+batch_size = 2
 #backend = Serial
 
 [policy]
@@ -19,66 +18,62 @@ input_size = 512
 hidden_size = 512
 
 [env]
-num_envs = 72
-reward_vehicle_collision = 0.0
-reward_offroad_collision = 0.0
-
+num_agents = 1024
+reward_vehicle_collision = -0.3988470042550285
+reward_offroad_collision = -0.2096585204075736
+spawn_immunity_timer = 28.852396095973667
+num_maps = 100
 [train]
-total_timesteps = 150_000_000
-#learning_rate = 0.005
+total_timesteps = 100_000_000
+#learning_rate = 0.02
+#gamma = 0.985
 anneal_lr = True
-batch_size = 738192
-minibatch_size = 23296
-max_minibatch_size = 23296
+batch_size = 745472
+minibatch_size = 11648
+max_minibatch_size = 11648
 bptt_horizon = 91
 
-#adam_beta1 = 0.9225899639773112
-#adam_beta2 = 0.9
-#adam_eps = 0.0004030478187254784
-#ent_coef = 0.0020159472963835016
-#gae_lambda = 0.8829440612065992
-#gamma = 0.9872971455373439
-#learning_rate = 0.0003947934701844728
-#max_grad_norm = 0.5296288081133984
-#prio_alpha = 0.99
-#prio_beta0 = 0.48469847315324566
-#update_epochs = 2
-#vf_coef = 3.6777541336880786
-#checkpoint_interval = 1000
+adam_beta1 = 0.9552561751280151
+adam_beta2 = 0.9998539818458761
+adam_eps = 1.9142243622636095e-10
+clip_coef = 0.3092857081843024
+ent_coef = 0.00253448350160094
+gae_lambda = 0.9422289315382624
+gamma = 0.9775539006742
+learning_rate = 0.0055822711860840125
+max_grad_norm = 1.5891941467078512
+prio_alpha = 0.52915904496396
+prio_beta0 = 0.4455420904215256
+update_epochs = 1
+vf_coef = 1.1854206994433176
+checkpoint_interval = 1000
+
 
-adam_beta1 = 0.9852000972032763
-adam_beta2 = 0.9948751690861872
-adam_eps = 0.000002967099767264975
-clip_coef = 0.3153578071651496
-ent_coef = 0.000369784972524992
-gae_lambda = 0.9385892578563558
-gamma = 0.9864999317644947
-learning_rate = 0.0022659903674495338
-max_grad_norm = 1.942292174080673
-prio_alpha = 0.9414003089586056
-prio_beta = 0.9429842108374631
-vf_clip_coef = 1.9533056765171148
-vf_coef = 3.2028923035616774
 
 [sweep.train.total_timesteps]
 distribution = log_normal
-min = 5e7
-max = 2e8
-mean = 1e8
+min = 2e6
+max = 5e6
+mean = 3.5e6
 scale = time
 
 [sweep.env.reward_vehicle_collision]
 distribution = uniform
 min = -1.0
-max = -0.25
 max = 0.0
-mean = -0.5
+mean = -0.2
 scale = auto 
 
 [sweep.env.reward_offroad_collision]
 distribution = uniform
 min = -1.0
-max = -0.25
 max = 0.0
-mean = -0.5
+mean = -0.2
+scale = auto
+
+[sweep.env.spawn_immunity_timer]
+distribution = uniform
+min = 1
+max = 91
+mean = 30
 scale = auto
diff --git a/pufferlib/ocean/gpudrive/binding.c b/pufferlib/ocean/gpudrive/binding.c
@@ -4,23 +4,61 @@
 #include "../env_binding.h"
 
 static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
-    int num_envs = unpack(kwargs, "num_envs");
-    GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive));
-    PyObject* agent_offsets = PyList_New(num_envs+1);
-    int total_count = 0;
-    // getting  agent counts and offsets
-    for(int i = 0;i< num_envs;i++) {
+    int num_agents = unpack(kwargs, "num_agents");
+    int num_maps = unpack(kwargs, "num_maps");
+    // GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive));
+    // PyObject* agent_offsets = PyList_New(num_envs+1);
+    // PyObject* map_ids = PyList_New(num_envs);
+    srand(time(NULL));
+    int total_agent_count = 0;
+    int env_count = 0;
+    int max_envs = num_agents;
+    PyObject* agent_offsets = PyList_New(max_envs+1);
+    PyObject* map_ids = PyList_New(max_envs);
+    // getting env count
+    while(total_agent_count < num_agents && env_count < max_envs){
         char map_file[100];
-        sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", i);
-        temp_envs[i].entities = load_map_binary(map_file, &temp_envs[i]);
-        set_active_agents(&temp_envs[i]);
-        PyObject* num = PyLong_FromLong(total_count);
-        PyList_SetItem(agent_offsets, i, num);
-        //Py_DECREF(num);
-        total_count += temp_envs[i].active_agent_count;
+        int map_id = rand() % num_maps;
+        GPUDrive* env = calloc(1, sizeof(GPUDrive));
+        sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id);
+        env->entities = load_map_binary(map_file, env);
+        set_active_agents(env);
+        // Store map_id
+        PyObject* map_id_obj = PyLong_FromLong(map_id);
+        PyList_SetItem(map_ids, env_count, map_id_obj);
+        // Store agent offset
+        PyObject* offset = PyLong_FromLong(total_agent_count);
+        PyList_SetItem(agent_offsets, env_count, offset);
+        total_agent_count += env->active_agent_count;
+        env_count++;
+        for(int j=0;j<env->num_entities;j++) {
+            free_entity(&env->entities[j]);
+        }
+        free(env->entities);
+        free(env->active_agent_indices);
+        free(env->static_car_indices);
+        free(env->expert_static_car_indices);
+        free(env);
+    }
+    if(total_agent_count >= num_agents){
+        total_agent_count = num_agents;
     }
-    PyObject* num = PyLong_FromLong(total_count);
-    PyList_SetItem(agent_offsets, num_envs, num);
+    PyObject* final_total_agent_count = PyLong_FromLong(total_agent_count);
+    PyList_SetItem(agent_offsets, env_count, final_total_agent_count);
+    PyObject* final_env_count = PyLong_FromLong(env_count);
+    // resize lists
+    PyObject* resized_agent_offsets = PyList_GetSlice(agent_offsets, 0, env_count + 1);
+    PyObject* resized_map_ids = PyList_GetSlice(map_ids, 0, env_count);
+    //
+    Py_DECREF(agent_offsets);
+    Py_DECREF(map_ids);
+    // create a tuple
+    PyObject* tuple = PyTuple_New(3);
+    PyTuple_SetItem(tuple, 0, resized_agent_offsets);
+    PyTuple_SetItem(tuple, 1, resized_map_ids);
+    PyTuple_SetItem(tuple, 2, final_env_count);
+    return tuple;
+
     //Py_DECREF(num);
     /*
     for(int i = 0;i<num_envs; i++) {
@@ -33,18 +71,21 @@ static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
     }
     free(temp_envs);
     */
-    return agent_offsets;
+    // return agent_offsets;
 }
 
 static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
     env->human_agent_idx = unpack(kwargs, "human_agent_idx");
     env->reward_vehicle_collision = unpack(kwargs, "reward_vehicle_collision");
     env->reward_offroad_collision = unpack(kwargs, "reward_offroad_collision");
-    int env_id = unpack(kwargs, "env_id");
+    env->spawn_immunity_timer = unpack(kwargs, "spawn_immunity_timer");
+    int map_id = unpack(kwargs, "map_id");
+    int max_agents = unpack(kwargs, "max_agents");
 
     char map_file[100];
-    sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", env_id);
+    sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id);
     env->map_name = map_file;
+    env->num_agents = max_agents;
     init(env);
     return 0;
 }
@@ -58,5 +99,7 @@ static int my_log(PyObject* dict, Log* log) {
     assign_to_dict(dict, "collision_rate", log->collision_rate);
     assign_to_dict(dict, "dnf_rate", log->dnf_rate);
     assign_to_dict(dict, "n", log->n);
+    assign_to_dict(dict, "completion_rate", log->completion_rate);
+    assign_to_dict(dict, "clean_collision_rate", log->clean_collision_rate);
     return 0;
 }
diff --git a/pufferlib/ocean/gpudrive/gpudrive.c b/pufferlib/ocean/gpudrive/gpudrive.c
@@ -102,7 +102,8 @@ void demo() {
         .human_agent_idx = 0,
         .reward_vehicle_collision = -0.1f,
         .reward_offroad_collision = -0.1f,
-	    .map_name = "resources/gpudrive/binaries/map_000.bin"
+	    .map_name = "resources/gpudrive/binaries/map_086.bin",
+        .spawn_immunity_timer = 30
     };
     allocate(&env);
     c_reset(&env);
@@ -173,7 +174,7 @@ void performance_test() {
     GPUDrive env = {
         .dynamics_model = CLASSIC,
         .human_agent_idx = 0,
-	    .map_name = "resources/gpudrive/binaries/map_005.bin"
+	    .map_name = "resources/gpudrive/binaries/map_055.bin"
     };
     clock_t start_time, end_time;
     double cpu_time_used;
@@ -206,7 +207,7 @@ void performance_test() {
 }
 
 int main() {
-    // demo();
-    performance_test();
+    demo();
+    // performance_test();
     return 0;
 }