diff --git a/pufferlib/config/ocean/gpudrive.ini b/pufferlib/config/ocean/gpudrive.ini
index 4391e1c7f1..17ac2ae893 100644
--- a/pufferlib/config/ocean/gpudrive.ini
+++ b/pufferlib/config/ocean/gpudrive.ini
@@ -3,11 +3,10 @@ package = ocean
 env_name = puffer_gpudrive
 policy_name = GPUDrive
 rnn_name = Recurrent
-
 [vec]
-num_workers = 16
-num_envs = 16
-batch_size = 8
+num_workers = 8
+num_envs = 8
+batch_size = 2
 #backend = Serial
 
 [policy]
@@ -19,66 +18,62 @@ input_size = 512
 hidden_size = 512
 
 [env]
-num_envs = 72
-reward_vehicle_collision = 0.0
-reward_offroad_collision = 0.0
-
+num_agents = 1024
+reward_vehicle_collision = -0.3988470042550285
+reward_offroad_collision = -0.2096585204075736
+spawn_immunity_timer = 28.852396095973667
+num_maps = 100
 [train]
-total_timesteps = 150_000_000
-#learning_rate = 0.005
+total_timesteps = 100_000_000
+#learning_rate = 0.02
+#gamma = 0.985
 anneal_lr = True
-batch_size = 738192
-minibatch_size = 23296
-max_minibatch_size = 23296
+batch_size = 745472
+minibatch_size = 11648
+max_minibatch_size = 11648
 bptt_horizon = 91
 
-#adam_beta1 = 0.9225899639773112
-#adam_beta2 = 0.9
-#adam_eps = 0.0004030478187254784
-#ent_coef = 0.0020159472963835016
-#gae_lambda = 0.8829440612065992
-#gamma = 0.9872971455373439
-#learning_rate = 0.0003947934701844728
-#max_grad_norm = 0.5296288081133984
-#prio_alpha = 0.99
-#prio_beta0 = 0.48469847315324566
-#update_epochs = 2
-#vf_coef = 3.6777541336880786
-#checkpoint_interval = 1000
+adam_beta1 = 0.9552561751280151
+adam_beta2 = 0.9998539818458761
+adam_eps = 1.9142243622636095e-10
+clip_coef = 0.3092857081843024
+ent_coef = 0.00253448350160094
+gae_lambda = 0.9422289315382624
+gamma = 0.9775539006742
+learning_rate = 0.0055822711860840125
+max_grad_norm = 1.5891941467078512
+prio_alpha = 0.52915904496396
+prio_beta0 = 0.4455420904215256
+update_epochs = 1
+vf_coef = 1.1854206994433176
+checkpoint_interval = 1000
+
 
-adam_beta1 = 0.9852000972032763
-adam_beta2 = 0.9948751690861872
-adam_eps = 0.000002967099767264975
-clip_coef = 0.3153578071651496
-ent_coef = 0.000369784972524992
-gae_lambda = 0.9385892578563558
-gamma = 0.9864999317644947
-learning_rate = 0.0022659903674495338
-max_grad_norm = 1.942292174080673
-prio_alpha = 0.9414003089586056
-prio_beta = 0.9429842108374631
-vf_clip_coef = 1.9533056765171148
-vf_coef = 3.2028923035616774
 
 [sweep.train.total_timesteps]
 distribution = log_normal
-min = 5e7
-max = 2e8
-mean = 1e8
+min = 2e6
+max = 5e6
+mean = 3.5e6
 scale = time
  
 [sweep.env.reward_vehicle_collision]
 distribution = uniform
 min = -1.0
-max = -0.25
 max = 0.0
-mean = -0.5
+mean = -0.2
 scale = auto 
  
 [sweep.env.reward_offroad_collision]
 distribution = uniform
 min = -1.0
-max = -0.25
 max = 0.0
-mean = -0.5
+mean = -0.2
+scale = auto
+
+[sweep.env.spawn_immunity_timer]
+distribution = uniform
+min = 1
+max = 91
+mean = 30
 scale = auto
diff --git a/pufferlib/ocean/gpudrive/binding.c b/pufferlib/ocean/gpudrive/binding.c
index a8999d90f3..f584a8b85f 100644
--- a/pufferlib/ocean/gpudrive/binding.c
+++ b/pufferlib/ocean/gpudrive/binding.c
@@ -4,23 +4,61 @@
 #include "../env_binding.h"
 
 static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
-    int num_envs = unpack(kwargs, "num_envs");
-    GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive));
-    PyObject* agent_offsets = PyList_New(num_envs+1);
-    int total_count = 0;
-    // getting  agent counts and offsets
-    for(int i = 0;i< num_envs;i++) {
+    int num_agents = unpack(kwargs, "num_agents");
+    int num_maps = unpack(kwargs, "num_maps");
+    // GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive));
+    // PyObject* agent_offsets = PyList_New(num_envs+1);
+    // PyObject* map_ids = PyList_New(num_envs);
+    srand(time(NULL));
+    int total_agent_count = 0;
+    int env_count = 0;
+    int max_envs = num_agents;
+    PyObject* agent_offsets = PyList_New(max_envs+1);
+    PyObject* map_ids = PyList_New(max_envs);
+    // getting env count
+    while(total_agent_count < num_agents && env_count < max_envs){
         char map_file[100];
-        sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", i);
-        temp_envs[i].entities = load_map_binary(map_file, &temp_envs[i]);
-        set_active_agents(&temp_envs[i]);
-        PyObject* num = PyLong_FromLong(total_count);
-        PyList_SetItem(agent_offsets, i, num);
-        //Py_DECREF(num);
-        total_count += temp_envs[i].active_agent_count;
+        int map_id = rand() % num_maps;
+        GPUDrive* env = calloc(1, sizeof(GPUDrive));
+        sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id);
+        env->entities = load_map_binary(map_file, env);
+        set_active_agents(env);
+        // Store map_id
+        PyObject* map_id_obj = PyLong_FromLong(map_id);
+        PyList_SetItem(map_ids, env_count, map_id_obj);
+        // Store agent offset
+        PyObject* offset = PyLong_FromLong(total_agent_count);
+        PyList_SetItem(agent_offsets, env_count, offset);
+        total_agent_count += env->active_agent_count;
+        env_count++;
+        for(int j=0;j<env->num_entities;j++) {
+            free_entity(&env->entities[j]);
+        }
+        free(env->entities);
+        free(env->active_agent_indices);
+        free(env->static_car_indices);
+        free(env->expert_static_car_indices);
+        free(env);
+    }
+    if(total_agent_count >= num_agents){
+        total_agent_count = num_agents;
     }
-    PyObject* num = PyLong_FromLong(total_count);
-    PyList_SetItem(agent_offsets, num_envs, num);
+    PyObject* final_total_agent_count = PyLong_FromLong(total_agent_count);
+    PyList_SetItem(agent_offsets, env_count, final_total_agent_count);
+    PyObject* final_env_count = PyLong_FromLong(env_count);
+    // resize lists
+    PyObject* resized_agent_offsets = PyList_GetSlice(agent_offsets, 0, env_count + 1);
+    PyObject* resized_map_ids = PyList_GetSlice(map_ids, 0, env_count);
+    //
+    Py_DECREF(agent_offsets);
+    Py_DECREF(map_ids);
+    // create a tuple
+    PyObject* tuple = PyTuple_New(3);
+    PyTuple_SetItem(tuple, 0, resized_agent_offsets);
+    PyTuple_SetItem(tuple, 1, resized_map_ids);
+    PyTuple_SetItem(tuple, 2, final_env_count);
+    return tuple;
+
     //Py_DECREF(num);
     /*
     for(int i = 0;i<num_envs; i++) {
@@ -33,18 +71,21 @@ static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
     }
     free(temp_envs);
     */
-    return agent_offsets;
+    // return agent_offsets;
 }
 
 static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
     env->human_agent_idx = unpack(kwargs, "human_agent_idx");
     env->reward_vehicle_collision = unpack(kwargs, "reward_vehicle_collision");
     env->reward_offroad_collision = unpack(kwargs, "reward_offroad_collision");
-    int env_id = unpack(kwargs, "env_id");
+    env->spawn_immunity_timer = unpack(kwargs, "spawn_immunity_timer");
+    int map_id = unpack(kwargs, "map_id");
+    int max_agents = unpack(kwargs, "max_agents");
 
     char map_file[100];
-    sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", env_id);
+    sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id);
     env->map_name = map_file;
+    env->num_agents = max_agents;
     init(env);
     return 0;
 }
@@ -58,5 +99,7 @@ static int my_log(PyObject* dict, Log* log) {
     assign_to_dict(dict, "collision_rate", log->collision_rate);
     assign_to_dict(dict, "dnf_rate", log->dnf_rate);
     assign_to_dict(dict, "n", log->n);
+    assign_to_dict(dict, "completion_rate", log->completion_rate);
+    assign_to_dict(dict, "clean_collision_rate", log->clean_collision_rate);
     return 0;
 }
diff --git a/pufferlib/ocean/gpudrive/gpudrive.c b/pufferlib/ocean/gpudrive/gpudrive.c
index 123074e677..2bffbf2eda 100644
--- a/pufferlib/ocean/gpudrive/gpudrive.c
+++ b/pufferlib/ocean/gpudrive/gpudrive.c
@@ -102,7 +102,8 @@ void demo() {
         .human_agent_idx = 0,
         .reward_vehicle_collision = -0.1f,
         .reward_offroad_collision = -0.1f,
-	    .map_name = "resources/gpudrive/binaries/map_000.bin"
+	    .map_name = "resources/gpudrive/binaries/map_086.bin",
+        .spawn_immunity_timer = 30
     };
     allocate(&env);
     c_reset(&env);
@@ -173,7 +174,7 @@ void performance_test() {
     GPUDrive env = {
         .dynamics_model = CLASSIC,
         .human_agent_idx = 0,
-	    .map_name = "resources/gpudrive/binaries/map_005.bin"
+	    .map_name = "resources/gpudrive/binaries/map_055.bin"
     };
     clock_t start_time, end_time;
     double cpu_time_used;
@@ -206,7 +207,7 @@ void performance_test() {
 }
 
 int main() {
-    // demo();
-    performance_test();
+    demo();
+    // performance_test();
     return 0;
 }
diff --git a/pufferlib/ocean/gpudrive/gpudrive.h b/pufferlib/ocean/gpudrive/gpudrive.h
index effcd4ccf6..d8be0457e5 100644
--- a/pufferlib/ocean/gpudrive/gpudrive.h
+++ b/pufferlib/ocean/gpudrive/gpudrive.h
@@ -93,6 +93,8 @@ struct Log {
     float score;
     float offroad_rate;
     float collision_rate;
+    float clean_collision_rate;
+    float completion_rate;
     float dnf_rate;
     float n;
 };
@@ -127,6 +129,11 @@ struct Entity {
     float vz;
     float heading;
     int valid;
+    int reached_goal;
+    int respawn_timestep;
+    int collided_before_goal;
+    int reached_goal_this_episode;
+    int active_agent;
 };
 
 void free_entity(Entity* entity){
@@ -176,8 +183,6 @@ struct GPUDrive {
     int* expert_static_car_indices;
     int timestep;
     int dynamics_model;
-    float* fake_data;
-    char* goal_reached;
     float* map_corners;
     int* grid_cells;  // holds entity ids and geometry index per cell
     int grid_cols;
@@ -189,22 +194,28 @@ struct GPUDrive {
     float reward_vehicle_collision;
     float reward_offroad_collision;
     char* map_name;
-    char* reached_goal_this_episode;
     float world_mean_x;
     float world_mean_y;
+    int spawn_immunity_timer;
 };
 
 void add_log(GPUDrive* env) {
     for(int i = 0; i < env->active_agent_count; i++){
-        if(env->reached_goal_this_episode[i]) {
-            env->log.score += 1.0f;
-            env->log.perf += 1.0f;
+        Entity* e = &env->entities[env->active_agent_indices[i]];
+        if(e->reached_goal_this_episode){
+            env->log.completion_rate += 1.0f;
         }
         int offroad = env->logs[i].offroad_rate;
         env->log.offroad_rate += offroad;
         int collided = env->logs[i].collision_rate;
         env->log.collision_rate += collided;
-        if(!offroad && !collided && !env->reached_goal_this_episode[i]){
+        int clean_collided = env->logs[i].clean_collision_rate;
+        env->log.clean_collision_rate += clean_collided;
+        if(e->reached_goal_this_episode && !e->collided_before_goal){
+            env->log.score += 1.0f;
+            env->log.perf += 1.0f;
+        }
+        if(!offroad && !collided && !e->reached_goal_this_episode){
             env->log.dnf_rate += 1.0f;
         }
         env->log.episode_length += env->logs[i].episode_length;
@@ -304,6 +315,9 @@ void set_start_position(GPUDrive* env){
             e->vx = 0;
             e->vy = 0;
             e->vz = 0;
+            e->reached_goal = 0;
+            e->collided_before_goal = 0;
+            e->respawn_timestep = -1;
         } else{
             e->vx = e->traj_vx[0];
             e->vy = e->traj_vy[0];
@@ -318,6 +332,23 @@ void set_start_position(GPUDrive* env){
 
 }
 
+int valid_active_agent(GPUDrive* env, int agent_idx){
+    float cos_heading = cosf(env->entities[agent_idx].traj_heading[0]);
+    float sin_heading = sinf(env->entities[agent_idx].traj_heading[0]);
+    float goal_x = env->entities[agent_idx].goal_position_x - env->entities[agent_idx].traj_x[0];
+    float goal_y = env->entities[agent_idx].goal_position_y - env->entities[agent_idx].traj_y[0];
+    // Rotate to ego vehicle's frame
+    float rel_goal_x = goal_x*cos_heading + goal_y*sin_heading;
+    float rel_goal_y = -goal_x*sin_heading + goal_y*cos_heading;
+    float distance_to_goal = relative_distance_2d(0, 0, rel_goal_x, rel_goal_y);
+    env->entities[agent_idx].width *= 0.7f;
+    env->entities[agent_idx].length *= 0.7f;
+    if(distance_to_goal >= 2.0f && env->entities[agent_idx].mark_as_expert == 0 && env->active_agent_count < env->num_agents){
+        return distance_to_goal;
+    }
+    return 0;
+}
+
 void set_active_agents(GPUDrive* env){
     env->static_car_count = 0;
     env->num_cars = 1;
@@ -325,30 +356,35 @@ void set_active_agents(GPUDrive* env){
     int active_agent_indices[MAX_CARS];
     int static_car_indices[MAX_CARS];
     int expert_static_car_indices[MAX_CARS];
-    env->active_agent_count = 1;
-    active_agent_indices[0] = env->num_objects-1;
+    
+    if(env->num_agents ==0){
+        env->num_agents = MAX_CARS;
+    }
+    int first_agent_id = env->num_objects-1;
+    float distance_to_goal = valid_active_agent(env, first_agent_id);
+    if(distance_to_goal){
+        env->active_agent_count = 1;
+        active_agent_indices[0] = first_agent_id;
+        env->entities[first_agent_id].active_agent = 1;
+        env->num_cars = 1;
+    } else {
+        env->active_agent_count = 0;
+        env->num_cars = 0;
+    }
     for(int i = 0; i < env->num_objects-1 && env->num_cars < MAX_CARS; i++){
         if(env->entities[i].type != 1) continue;
         if(env->entities[i].traj_valid[0] != 1) continue;
         env->num_cars++;
-        float cos_heading = cosf(env->entities[i].traj_heading[0]);
-        float sin_heading = sinf(env->entities[i].traj_heading[0]);
-        float goal_x = env->entities[i].goal_position_x - env->entities[i].traj_x[0];
-        float goal_y = env->entities[i].goal_position_y - env->entities[i].traj_y[0];
-        // Rotate to ego vehicle's frame
-        float rel_goal_x = goal_x*cos_heading + goal_y*sin_heading;
-        float rel_goal_y = -goal_x*sin_heading + goal_y*cos_heading;
-        float distance_to_goal = relative_distance_2d(0, 0, rel_goal_x, rel_goal_y);
-        env->entities[i].width *= 0.7f;
-        env->entities[i].length *= 0.7f;
-        
-        if(distance_to_goal >= 2.0f && env->entities[i].mark_as_expert == 0){
+        float distance_to_goal = valid_active_agent(env, i);
+        if(distance_to_goal > 0){
             active_agent_indices[env->active_agent_count] = i;
             env->active_agent_count++;
+            env->entities[i].active_agent = 1;
         } else {
             static_car_indices[env->static_car_count] = i;
             env->static_car_count++;
-            if(env->entities[i].mark_as_expert == 1){
+            env->entities[i].active_agent = 0;
+            if(env->entities[i].mark_as_expert == 1 || (distance_to_goal >=2.0f && env->active_agent_count == env->num_agents)){
                 expert_static_car_indices[env->expert_static_car_count] = i;
                 env->expert_static_car_count++;
             }
@@ -611,8 +647,6 @@ void init(GPUDrive* env){
     set_start_position(env);
     // printf("Active agents: %d\n", env->active_agent_count);
     env->logs = (Log*)calloc(env->active_agent_count, sizeof(Log));
-    env->goal_reached = (char*)calloc(env->active_agent_count, sizeof(char));
-    env->reached_goal_this_episode = (char*)calloc(env->active_agent_count, sizeof(char));
     init_grid_map(env);
     env->vision_range = 21;
     init_neighbor_offsets(env);
@@ -627,9 +661,6 @@ void free_initialized(GPUDrive* env){
     free(env->entities);
     free(env->active_agent_indices);
     free(env->logs);
-    free(env->fake_data);
-    free(env->goal_reached);
-    free(env->reached_goal_this_episode);
     free(env->map_corners);
     free(env->grid_cells);
     free(env->neighbor_offsets);
@@ -637,6 +668,7 @@ void free_initialized(GPUDrive* env){
     free(env->neighbor_cache_indices);
     free(env->static_car_indices);
     free(env->expert_static_car_indices);
+    // free(env->map_name);
 }
 
 void allocate(GPUDrive* env){
@@ -662,6 +694,10 @@ void free_allocated(GPUDrive* env){
     free_initialized(env);
 }
 
+void c_close(GPUDrive* env){
+    free_initialized(env);
+}
+
 float clipSpeed(float speed) {
     const float maxSpeed = MAX_SPEED;
     if (speed > maxSpeed) return maxSpeed;
@@ -699,8 +735,8 @@ void move_dynamics(GPUDrive* env, int action_idx, int agent_idx){
         // Time step (adjust as needed)
         const float dt = 0.1f;        
         // Update speed with acceleration
-        speed = speed + acceleration*dt;
-        if (speed < 0) speed = 0;  // Prevent going backward
+        speed = speed + 0.5f*acceleration*dt;
+        // if (speed < 0) speed = 0;  // Prevent going backward
         speed = clipSpeed(speed);
         // compute yaw rate
         float beta = tanh(.5*tanf(steering));
@@ -882,9 +918,17 @@ void collision_check(GPUDrive* env, int agent_idx) {
         if (collided == VEHICLE_COLLISION) break;
     }
     agent->collision_state = collided;
-    if (car_collided_with_index != -1) {
-        env->entities[car_collided_with_index].collision_state = VEHICLE_COLLISION;
+    // spawn immunity for collisions with other agent cars as agent_idx respawns
+    if(collided == VEHICLE_COLLISION && env->entities[car_collided_with_index].active_agent == 1 && env->entities[agent_idx].respawn_timestep != -1 && env->timestep - env->entities[agent_idx].respawn_timestep < env->spawn_immunity_timer){
+        agent->collision_state = 0;
     }
+
+    // spawn immunity for collisions with other cars who just respawned
+    if (car_collided_with_index != -1 && (env->entities[car_collided_with_index].respawn_timestep != -1 && env->timestep - env->entities[car_collided_with_index].respawn_timestep >= env->spawn_immunity_timer )) {
+        env->entities[car_collided_with_index].collision_state = VEHICLE_COLLISION;
+    } else if (car_collided_with_index != -1 && (env->entities[car_collided_with_index].respawn_timestep != -1 && env->timestep - env->entities[car_collided_with_index].respawn_timestep < env->spawn_immunity_timer )) {
+        agent->collision_state = 0;
+    } 
 }
 
 float normalize_value(float value, float min, float max){
@@ -892,7 +936,7 @@ float normalize_value(float value, float min, float max){
 }
 
 float reverse_normalize_value(float value, float min, float max){
-    return value*(max - min) + min;
+    return value*100.0f;
 }
 
 void compute_observations(GPUDrive* env) {
@@ -915,10 +959,10 @@ void compute_observations(GPUDrive* env) {
         float rel_goal_y = -goal_x*sin_heading + goal_y*cos_heading;
         //obs[0] = normalize_value(rel_goal_x, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD);
         //obs[1] = normalize_value(rel_goal_y, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD);
-        obs[0] = rel_goal_x/20.0f;
-        obs[1] = rel_goal_y/20.0f;
+        obs[0] = rel_goal_x/100.0f;
+        obs[1] = rel_goal_y/100.0f;
         //obs[2] = ego_speed / MAX_SPEED;
-        obs[2] = ego_speed / 5.0f;
+        obs[2] = ego_speed / 100.0f;
         obs[3] = ego_entity->width / MAX_VEH_WIDTH;
         obs[4] = ego_entity->length / MAX_VEH_LEN;
         obs[5] = (ego_entity->collision_state > 0) ? 1 : 0;
@@ -946,8 +990,8 @@ void compute_observations(GPUDrive* env) {
             float rel_x = dx*cos_heading + dy*sin_heading;
             float rel_y = -dx*sin_heading + dy*cos_heading;
             // Store observations with correct indexing
-            obs[obs_idx] = rel_x / 20.0f;
-            obs[obs_idx + 1] = rel_y / 20.0f;
+            obs[obs_idx] = rel_x / 100.0f;
+            obs[obs_idx + 1] = rel_y / 100.0f;
             obs[obs_idx + 2] = other_entity->width / MAX_VEH_WIDTH;
             obs[obs_idx + 3] = other_entity->length / MAX_VEH_LEN;
             // relative heading
@@ -996,8 +1040,8 @@ void compute_observations(GPUDrive* env) {
             // Compute sin and cos of relative angle directly without atan2f
             float cos_angle = dx_norm*cos_heading + dy_norm*sin_heading;
             float sin_angle = -dx_norm*sin_heading + dy_norm*cos_heading;
-            obs[obs_idx] = x_obs / 20.0f;
-            obs[obs_idx + 1] = y_obs / 20.0f;
+            obs[obs_idx] = x_obs / 100.0f;
+            obs[obs_idx + 1] = y_obs / 100.0f;
             obs[obs_idx + 2] = length / MAX_ROAD_SEGMENT_LENGTH;
             obs[obs_idx + 3] = width / MAX_ROAD_SCALE;
             obs[obs_idx + 4] = cos_angle / MAX_ORIENTATION_RAD;
@@ -1017,10 +1061,12 @@ void c_reset(GPUDrive* env){
     for(int x = 0;x<env->active_agent_count; x++){
         env->logs[x] = (Log){0};
         int agent_idx = env->active_agent_indices[x];
+        env->entities[agent_idx].respawn_timestep = -1;
+        env->entities[agent_idx].reached_goal = 0;
+        env->entities[agent_idx].collided_before_goal = 0;
+        env->entities[agent_idx].reached_goal_this_episode = 0;
         collision_check(env, agent_idx);
     }
-    memset(env->goal_reached, 0, env->active_agent_count*sizeof(char));
-    memset(env->reached_goal_this_episode, 0, env->active_agent_count*sizeof(char));
     compute_observations(env);
 }
 
@@ -1030,6 +1076,8 @@ void respawn_agent(GPUDrive* env, int agent_idx){
     env->entities[agent_idx].heading = env->entities[agent_idx].traj_heading[0];
     env->entities[agent_idx].vx = env->entities[agent_idx].traj_vx[0];
     env->entities[agent_idx].vy = env->entities[agent_idx].traj_vy[0];
+    env->entities[agent_idx].reached_goal = 0;
+    env->entities[agent_idx].respawn_timestep = env->timestep;
 }
 
 void c_step(GPUDrive* env){
@@ -1050,25 +1098,34 @@ void c_step(GPUDrive* env){
         env->logs[i].score = 0.0f;
 	    env->logs[i].episode_length += 1;
         int agent_idx = env->active_agent_indices[i];
-        if(env->goal_reached[i] || env->entities[agent_idx].collision_state > 0){
+        int reached_goal = env->entities[agent_idx].reached_goal;
+        int collision_state = env->entities[agent_idx].collision_state;
+        if(reached_goal || collision_state > 0){
             respawn_agent(env, agent_idx);
-            env->goal_reached[i] = 0;
         }
         env->entities[agent_idx].collision_state = 0;
         move_dynamics(env, i, agent_idx);
         // move_expert(env, env->actions, agent_idx);
         collision_check(env, agent_idx);
-        if(env->entities[agent_idx].collision_state > 0 && env->goal_reached[i] == 0){
-            if(env->entities[agent_idx].collision_state == VEHICLE_COLLISION){
+        collision_state = env->entities[agent_idx].collision_state;
+        
+        if(collision_state > 0){
+            if(collision_state == VEHICLE_COLLISION){
                 env->rewards[i] = env->reward_vehicle_collision;
                 env->logs[i].collision_rate = 1.0f;
                 env->logs[i].episode_return += env->reward_vehicle_collision;
+                if(env->entities[agent_idx].respawn_timestep == -1){
+                    env->logs[i].clean_collision_rate = 1.0f;
+                }
             }
-            else if(env->entities[agent_idx].collision_state == OFFROAD){
+            else if(collision_state == OFFROAD){
                 env->rewards[i] = env->reward_offroad_collision;
                 env->logs[i].offroad_rate = 1.0f;
                 env->logs[i].episode_return += env->reward_offroad_collision;
             }
+            if(!env->entities[agent_idx].reached_goal_this_episode){
+                env->entities[agent_idx].collided_before_goal = 1;
+            }
         }
 
         float distance_to_goal = relative_distance_2d(
@@ -1076,12 +1133,11 @@ void c_step(GPUDrive* env){
                 env->entities[agent_idx].y,
                 env->entities[agent_idx].goal_position_x,
                 env->entities[agent_idx].goal_position_y);
-        int reached_goal = distance_to_goal < 2.0f;
-        if(reached_goal && env->goal_reached[i] == 0){            
+        if(distance_to_goal < 2.0f){  
             env->rewards[i] += 1.0f;
-	        env->goal_reached[i] = 1;
-	        env->logs[i].episode_return += 1.0f;
-            env->reached_goal_this_episode[i] = 1;
+            env->logs[i].episode_return += 1.0f;
+	        env->entities[agent_idx].reached_goal = 1;
+            env->entities[agent_idx].reached_goal_this_episode = 1;
 	    }
     }
     compute_observations(env);
@@ -1421,7 +1477,7 @@ void c_render(GPUDrive* env) {
                 car_model = client->cars[0];  // Collided agent
             }
             // Draw obs for human selected agent
-            if(agent_index == env->human_agent_idx && env->goal_reached[agent_index] == 0) {
+            if(agent_index == env->human_agent_idx && !env->entities[agent_index].reached_goal) {
                 draw_agent_obs(env, agent_index);
             }
             // Draw cube for cars static and active
@@ -1442,7 +1498,7 @@ void c_render(GPUDrive* env) {
             rlPopMatrix();
             // FPV Camera Control
             if(IsKeyDown(KEY_LEFT_CONTROL) && env->human_agent_idx== agent_index){
-                if(env->goal_reached[agent_index] == 1){
+                if(env->entities[agent_index].reached_goal){
                     env->human_agent_idx = rand() % env->active_agent_count;
                 }
                 Vector3 camera_position = (Vector3){
@@ -1549,9 +1605,10 @@ void c_render(GPUDrive* env) {
 }
 
 void close_client(Client* client){
-    for (int i = 0; i < 5; i++) {
+    for (int i = 0; i < 6; i++) {
         UnloadModel(client->cars[i]);
     }
+    UnloadTexture(client->puffers);
     CloseWindow();
     free(client);
 }
diff --git a/pufferlib/ocean/gpudrive/gpudrive.py b/pufferlib/ocean/gpudrive/gpudrive.py
index 528496663e..a852979751 100644
--- a/pufferlib/ocean/gpudrive/gpudrive.py
+++ b/pufferlib/ocean/gpudrive/gpudrive.py
@@ -7,16 +7,18 @@
 from pufferlib.ocean.gpudrive import binding
 
 class GPUDrive(pufferlib.PufferEnv):
-    def __init__(self, num_envs=1, render_mode=None, report_interval=1,
+    def __init__(self, render_mode=None, report_interval=1,
             width=1280, height=1024,
             human_agent_idx=0,
             reward_vehicle_collision=-0.1,
             reward_offroad_collision=-0.1,
+            spawn_immunity_timer=30,
+            num_maps=100,
+            num_agents=512,
             buf = None,
             seed=1):
 
         # env
-        self.num_agents = num_envs
         self.render_mode = render_mode
         self.report_interval = report_interval
         
@@ -24,9 +26,8 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1,
         self.single_observation_space = gymnasium.spaces.Box(low=-1, high=1,
             shape=(self.num_obs,), dtype=np.float32)
         self.single_action_space = gymnasium.spaces.MultiDiscrete([7, 13])
-        agent_offsets = binding.shared(num_envs=num_envs)
-        total_agents = agent_offsets[-1]
-        self.num_agents = total_agents
+        agent_offsets, map_ids, num_envs = binding.shared(num_agents=num_agents, num_maps=num_maps)
+        self.num_agents = num_agents
         super().__init__(buf=buf)
         env_ids = []
         for i in range(num_envs):
@@ -42,7 +43,9 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1,
                 human_agent_idx=human_agent_idx,
                 reward_vehicle_collision=reward_vehicle_collision,
                 reward_offroad_collision=reward_offroad_collision,
-                env_id=i
+                spawn_immunity_timer=spawn_immunity_timer,
+                map_id=map_ids[i],
+                max_agents = nxt-cur
             )
             env_ids.append(env_id)
 
@@ -67,7 +70,7 @@ def step(self, actions):
             self.terminals, self.truncations, info)
 
     def render(self):
-        binding.vec_render(self.c_envs, 63)
+        binding.vec_render(self.c_envs, 0)
         
     def close(self):
         binding.vec_close(self.c_envs)
@@ -239,7 +242,7 @@ def process_all_maps():
     data_dir = Path("data/processed/training")
     
     # Get all JSON files in the training directory
-    json_files = sorted(data_dir.glob("*.json"))[0:512]
+    json_files = sorted(data_dir.glob("*.json"))
     
     print(f"Found {len(json_files)} JSON files")
     
@@ -254,13 +257,13 @@ def process_all_maps():
         except Exception as e:
             print(f"Error processing {map_path.name}: {e}")
 
-def test_performance(timeout=10, atn_cache=1024, num_envs=75):
+def test_performance(timeout=10, atn_cache=1024, num_agents=1024):
     import time
 
-    env = GPUDrive(num_envs=num_envs)
+    env = GPUDrive(num_agents=num_agents)
     env.reset()
     tick = 0
-    num_agents = 3968
+    num_agents = 1024
     actions = np.stack([
         np.random.randint(0, space.n + 1, (atn_cache, num_agents))
         for space in env.single_action_space
@@ -273,8 +276,7 @@ def test_performance(timeout=10, atn_cache=1024, num_envs=75):
         tick += 1
 
     print(f'SPS: {num_agents * tick / (time.time() - start)}')
-
-
+    env.close()
 if __name__ == '__main__':
-    # test_performance()
-    process_all_maps()
+    test_performance()
+    # process_all_maps()
diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py
index ef05533b69..83f70c44db 100644
--- a/pufferlib/ocean/torch.py
+++ b/pufferlib/ocean/torch.py
@@ -759,18 +759,9 @@ def encode_observations(self, observations, state=None):
         ego_features = self.ego_encoder(ego_obs)
         partner_features, _ = self.partner_encoder(partner_objects).max(dim=1)
         road_features, _ = self.road_encoder(road_objects).max(dim=1)
-        #partner_features_post_mask = self.post_mask_partner_encoder(partner_features)
-        #road_features_post_mask = self.post_mask_road_encoder(road_features)
         
-        #concat_features = torch.cat([ego_features, road_features_post_mask, partner_features_post_mask], dim=1)
         concat_features = torch.cat([ego_features, road_features, partner_features], dim=1)
         
-        # Apply max pooling across concatenated features
-        # Reshape to [batch, 3, hidden_size] to pool across the 3 modalities
-        # pooled_features = torch.max(
-        #     concat_features.view(-1, 3, self.hidden_size), dim=1
-        # )[0]
-        
         # Pass through shared embedding
         embedding = F.relu(self.shared_embedding(concat_features))
         # embedding = self.shared_embedding(concat_features)