PufferAI · jsuarez5341 · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/config/ocean/gpudrive.ini b/config/ocean/gpudrive.ini
@@ -8,6 +8,7 @@ rnn_name = Recurrent
 num_workers = 16
 num_envs = 16
 batch_size = 8
+#backend = Serial
 
 [policy]
 input_size = 64
@@ -18,19 +19,49 @@ input_size = 512
 hidden_size = 512
 
 [env]
-num_envs = 64
+num_envs = 72
 reward_vehicle_collision = -0.75
 reward_offroad_collision = -0.75
 
 [train]
-total_timesteps = 250_000_000
-learning_rate = 0.005
+total_timesteps = 100_000_000
+#learning_rate = 0.005
 anneal_lr = True
-batch_size = 752752
+batch_size = 738192
 minibatch_size = 23296
 max_minibatch_size = 23296
 bptt_horizon = 91
 
+#adam_beta1 = 0.9225899639773112
+#adam_beta2 = 0.9
+#adam_eps = 0.0004030478187254784
+#ent_coef = 0.0020159472963835016
+#gae_lambda = 0.8829440612065992
+#gamma = 0.9872971455373439
+#learning_rate = 0.0003947934701844728
+#max_grad_norm = 0.5296288081133984
+#prio_alpha = 0.99
+#prio_beta0 = 0.48469847315324566
+#update_epochs = 2
+#vf_coef = 3.6777541336880786
+#checkpoint_interval = 1000
+
+adam_beta1 = 0.9852000972032763
+adam_beta2 = 0.9948751690861872
+adam_eps = 0.000002967099767264975
+clip_coef = 0.3153578071651496
+ent_coef = 0.000369784972524992
+gae_lambda = 0.9385892578563558
+gamma = 0.9864999317644947
+learning_rate = 0.0022659903674495338
+max_grad_norm = 1.942292174080673
+prio_alpha = 0.9414003089586056
+prio_beta = 0.9429842108374631
+vf_clip_coef = 1.9533056765171148
+vf_coef = 3.2028923035616774
+
+
+
 [sweep.env.reward_vehicle_collision]
 distribution = uniform
 min = -1.0

diff --git a/pufferlib/ocean/gpudrive/cy_gpudrive.pyx b/pufferlib/ocean/gpudrive/cy_gpudrive.pyx
diff --git a/pufferlib/ocean/gpudrive/gpudrive.c b/pufferlib/ocean/gpudrive/gpudrive.c
@@ -160,10 +160,10 @@ void demo() {
         // Handle human input for the controlled agent
         // handle_human_input(&env);
         c_step(&env);
-        c_render(client, &env);
+        c_render(&env);
     }
 
-    close_client(client);
+    close_client(env.client);
     free_allocated(&env);
 }
 
@@ -205,7 +205,7 @@ void performance_test() {
 }
 
 int main() {
-    demo();
-    //performance_test();
+    //demo();
+    performance_test();
     return 0;
 }