diff --git a/pufferlib/config/ocean/gpudrive.ini b/pufferlib/config/ocean/gpudrive.ini index 4391e1c7f1..17ac2ae893 100644 --- a/pufferlib/config/ocean/gpudrive.ini +++ b/pufferlib/config/ocean/gpudrive.ini @@ -3,11 +3,10 @@ package = ocean env_name = puffer_gpudrive policy_name = GPUDrive rnn_name = Recurrent - [vec] -num_workers = 16 -num_envs = 16 -batch_size = 8 +num_workers = 8 +num_envs = 8 +batch_size = 2 #backend = Serial [policy] @@ -19,66 +18,62 @@ input_size = 512 hidden_size = 512 [env] -num_envs = 72 -reward_vehicle_collision = 0.0 -reward_offroad_collision = 0.0 - +num_agents = 1024 +reward_vehicle_collision = -0.3988470042550285 +reward_offroad_collision = -0.2096585204075736 +spawn_immunity_timer = 28.852396095973667 +num_maps = 100 [train] -total_timesteps = 150_000_000 -#learning_rate = 0.005 +total_timesteps = 100_000_000 +#learning_rate = 0.02 +#gamma = 0.985 anneal_lr = True -batch_size = 738192 -minibatch_size = 23296 -max_minibatch_size = 23296 +batch_size = 745472 +minibatch_size = 11648 +max_minibatch_size = 11648 bptt_horizon = 91 -#adam_beta1 = 0.9225899639773112 -#adam_beta2 = 0.9 -#adam_eps = 0.0004030478187254784 -#ent_coef = 0.0020159472963835016 -#gae_lambda = 0.8829440612065992 -#gamma = 0.9872971455373439 -#learning_rate = 0.0003947934701844728 -#max_grad_norm = 0.5296288081133984 -#prio_alpha = 0.99 -#prio_beta0 = 0.48469847315324566 -#update_epochs = 2 -#vf_coef = 3.6777541336880786 -#checkpoint_interval = 1000 +adam_beta1 = 0.9552561751280151 +adam_beta2 = 0.9998539818458761 +adam_eps = 1.9142243622636095e-10 +clip_coef = 0.3092857081843024 +ent_coef = 0.00253448350160094 +gae_lambda = 0.9422289315382624 +gamma = 0.9775539006742 +learning_rate = 0.0055822711860840125 +max_grad_norm = 1.5891941467078512 +prio_alpha = 0.52915904496396 +prio_beta0 = 0.4455420904215256 +update_epochs = 1 +vf_coef = 1.1854206994433176 +checkpoint_interval = 1000 + -adam_beta1 = 0.9852000972032763 -adam_beta2 = 0.9948751690861872 -adam_eps = 0.000002967099767264975 -clip_coef = 0.3153578071651496 -ent_coef = 0.000369784972524992 -gae_lambda = 0.9385892578563558 -gamma = 0.9864999317644947 -learning_rate = 0.0022659903674495338 -max_grad_norm = 1.942292174080673 -prio_alpha = 0.9414003089586056 -prio_beta = 0.9429842108374631 -vf_clip_coef = 1.9533056765171148 -vf_coef = 3.2028923035616774 [sweep.train.total_timesteps] distribution = log_normal -min = 5e7 -max = 2e8 -mean = 1e8 +min = 2e6 +max = 5e6 +mean = 3.5e6 scale = time [sweep.env.reward_vehicle_collision] distribution = uniform min = -1.0 -max = -0.25 max = 0.0 -mean = -0.5 +mean = -0.2 scale = auto [sweep.env.reward_offroad_collision] distribution = uniform min = -1.0 -max = -0.25 max = 0.0 -mean = -0.5 +mean = -0.2 +scale = auto + +[sweep.env.spawn_immunity_timer] +distribution = uniform +min = 1 +max = 91 +mean = 30 scale = auto diff --git a/pufferlib/ocean/gpudrive/binding.c b/pufferlib/ocean/gpudrive/binding.c index a8999d90f3..f584a8b85f 100644 --- a/pufferlib/ocean/gpudrive/binding.c +++ b/pufferlib/ocean/gpudrive/binding.c @@ -4,23 +4,61 @@ #include "../env_binding.h" static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) { - int num_envs = unpack(kwargs, "num_envs"); - GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive)); - PyObject* agent_offsets = PyList_New(num_envs+1); - int total_count = 0; - // getting agent counts and offsets - for(int i = 0;i< num_envs;i++) { + int num_agents = unpack(kwargs, "num_agents"); + int num_maps = unpack(kwargs, "num_maps"); + // GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive)); + // PyObject* agent_offsets = PyList_New(num_envs+1); + // PyObject* map_ids = PyList_New(num_envs); + srand(time(NULL)); + int total_agent_count = 0; + int env_count = 0; + int max_envs = num_agents; + PyObject* agent_offsets = PyList_New(max_envs+1); + PyObject* map_ids = PyList_New(max_envs); + // getting env count + while(total_agent_count < num_agents && env_count < max_envs){ char map_file[100]; - sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", i); - temp_envs[i].entities = load_map_binary(map_file, &temp_envs[i]); - set_active_agents(&temp_envs[i]); - PyObject* num = PyLong_FromLong(total_count); - PyList_SetItem(agent_offsets, i, num); - //Py_DECREF(num); - total_count += temp_envs[i].active_agent_count; + int map_id = rand() % num_maps; + GPUDrive* env = calloc(1, sizeof(GPUDrive)); + sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id); + env->entities = load_map_binary(map_file, env); + set_active_agents(env); + // Store map_id + PyObject* map_id_obj = PyLong_FromLong(map_id); + PyList_SetItem(map_ids, env_count, map_id_obj); + // Store agent offset + PyObject* offset = PyLong_FromLong(total_agent_count); + PyList_SetItem(agent_offsets, env_count, offset); + total_agent_count += env->active_agent_count; + env_count++; + for(int j=0;jnum_entities;j++) { + free_entity(&env->entities[j]); + } + free(env->entities); + free(env->active_agent_indices); + free(env->static_car_indices); + free(env->expert_static_car_indices); + free(env); + } + if(total_agent_count >= num_agents){ + total_agent_count = num_agents; } - PyObject* num = PyLong_FromLong(total_count); - PyList_SetItem(agent_offsets, num_envs, num); + PyObject* final_total_agent_count = PyLong_FromLong(total_agent_count); + PyList_SetItem(agent_offsets, env_count, final_total_agent_count); + PyObject* final_env_count = PyLong_FromLong(env_count); + // resize lists + PyObject* resized_agent_offsets = PyList_GetSlice(agent_offsets, 0, env_count + 1); + PyObject* resized_map_ids = PyList_GetSlice(map_ids, 0, env_count); + // + Py_DECREF(agent_offsets); + Py_DECREF(map_ids); + // create a tuple + PyObject* tuple = PyTuple_New(3); + PyTuple_SetItem(tuple, 0, resized_agent_offsets); + PyTuple_SetItem(tuple, 1, resized_map_ids); + PyTuple_SetItem(tuple, 2, final_env_count); + return tuple; + //Py_DECREF(num); /* for(int i = 0;ihuman_agent_idx = unpack(kwargs, "human_agent_idx"); env->reward_vehicle_collision = unpack(kwargs, "reward_vehicle_collision"); env->reward_offroad_collision = unpack(kwargs, "reward_offroad_collision"); - int env_id = unpack(kwargs, "env_id"); + env->spawn_immunity_timer = unpack(kwargs, "spawn_immunity_timer"); + int map_id = unpack(kwargs, "map_id"); + int max_agents = unpack(kwargs, "max_agents"); char map_file[100]; - sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", env_id); + sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id); env->map_name = map_file; + env->num_agents = max_agents; init(env); return 0; } @@ -58,5 +99,7 @@ static int my_log(PyObject* dict, Log* log) { assign_to_dict(dict, "collision_rate", log->collision_rate); assign_to_dict(dict, "dnf_rate", log->dnf_rate); assign_to_dict(dict, "n", log->n); + assign_to_dict(dict, "completion_rate", log->completion_rate); + assign_to_dict(dict, "clean_collision_rate", log->clean_collision_rate); return 0; } diff --git a/pufferlib/ocean/gpudrive/gpudrive.c b/pufferlib/ocean/gpudrive/gpudrive.c index 123074e677..2bffbf2eda 100644 --- a/pufferlib/ocean/gpudrive/gpudrive.c +++ b/pufferlib/ocean/gpudrive/gpudrive.c @@ -102,7 +102,8 @@ void demo() { .human_agent_idx = 0, .reward_vehicle_collision = -0.1f, .reward_offroad_collision = -0.1f, - .map_name = "resources/gpudrive/binaries/map_000.bin" + .map_name = "resources/gpudrive/binaries/map_086.bin", + .spawn_immunity_timer = 30 }; allocate(&env); c_reset(&env); @@ -173,7 +174,7 @@ void performance_test() { GPUDrive env = { .dynamics_model = CLASSIC, .human_agent_idx = 0, - .map_name = "resources/gpudrive/binaries/map_005.bin" + .map_name = "resources/gpudrive/binaries/map_055.bin" }; clock_t start_time, end_time; double cpu_time_used; @@ -206,7 +207,7 @@ void performance_test() { } int main() { - // demo(); - performance_test(); + demo(); + // performance_test(); return 0; } diff --git a/pufferlib/ocean/gpudrive/gpudrive.h b/pufferlib/ocean/gpudrive/gpudrive.h index effcd4ccf6..d8be0457e5 100644 --- a/pufferlib/ocean/gpudrive/gpudrive.h +++ b/pufferlib/ocean/gpudrive/gpudrive.h @@ -93,6 +93,8 @@ struct Log { float score; float offroad_rate; float collision_rate; + float clean_collision_rate; + float completion_rate; float dnf_rate; float n; }; @@ -127,6 +129,11 @@ struct Entity { float vz; float heading; int valid; + int reached_goal; + int respawn_timestep; + int collided_before_goal; + int reached_goal_this_episode; + int active_agent; }; void free_entity(Entity* entity){ @@ -176,8 +183,6 @@ struct GPUDrive { int* expert_static_car_indices; int timestep; int dynamics_model; - float* fake_data; - char* goal_reached; float* map_corners; int* grid_cells; // holds entity ids and geometry index per cell int grid_cols; @@ -189,22 +194,28 @@ struct GPUDrive { float reward_vehicle_collision; float reward_offroad_collision; char* map_name; - char* reached_goal_this_episode; float world_mean_x; float world_mean_y; + int spawn_immunity_timer; }; void add_log(GPUDrive* env) { for(int i = 0; i < env->active_agent_count; i++){ - if(env->reached_goal_this_episode[i]) { - env->log.score += 1.0f; - env->log.perf += 1.0f; + Entity* e = &env->entities[env->active_agent_indices[i]]; + if(e->reached_goal_this_episode){ + env->log.completion_rate += 1.0f; } int offroad = env->logs[i].offroad_rate; env->log.offroad_rate += offroad; int collided = env->logs[i].collision_rate; env->log.collision_rate += collided; - if(!offroad && !collided && !env->reached_goal_this_episode[i]){ + int clean_collided = env->logs[i].clean_collision_rate; + env->log.clean_collision_rate += clean_collided; + if(e->reached_goal_this_episode && !e->collided_before_goal){ + env->log.score += 1.0f; + env->log.perf += 1.0f; + } + if(!offroad && !collided && !e->reached_goal_this_episode){ env->log.dnf_rate += 1.0f; } env->log.episode_length += env->logs[i].episode_length; @@ -304,6 +315,9 @@ void set_start_position(GPUDrive* env){ e->vx = 0; e->vy = 0; e->vz = 0; + e->reached_goal = 0; + e->collided_before_goal = 0; + e->respawn_timestep = -1; } else{ e->vx = e->traj_vx[0]; e->vy = e->traj_vy[0]; @@ -318,6 +332,23 @@ void set_start_position(GPUDrive* env){ } +int valid_active_agent(GPUDrive* env, int agent_idx){ + float cos_heading = cosf(env->entities[agent_idx].traj_heading[0]); + float sin_heading = sinf(env->entities[agent_idx].traj_heading[0]); + float goal_x = env->entities[agent_idx].goal_position_x - env->entities[agent_idx].traj_x[0]; + float goal_y = env->entities[agent_idx].goal_position_y - env->entities[agent_idx].traj_y[0]; + // Rotate to ego vehicle's frame + float rel_goal_x = goal_x*cos_heading + goal_y*sin_heading; + float rel_goal_y = -goal_x*sin_heading + goal_y*cos_heading; + float distance_to_goal = relative_distance_2d(0, 0, rel_goal_x, rel_goal_y); + env->entities[agent_idx].width *= 0.7f; + env->entities[agent_idx].length *= 0.7f; + if(distance_to_goal >= 2.0f && env->entities[agent_idx].mark_as_expert == 0 && env->active_agent_count < env->num_agents){ + return distance_to_goal; + } + return 0; +} + void set_active_agents(GPUDrive* env){ env->static_car_count = 0; env->num_cars = 1; @@ -325,30 +356,35 @@ void set_active_agents(GPUDrive* env){ int active_agent_indices[MAX_CARS]; int static_car_indices[MAX_CARS]; int expert_static_car_indices[MAX_CARS]; - env->active_agent_count = 1; - active_agent_indices[0] = env->num_objects-1; + + if(env->num_agents ==0){ + env->num_agents = MAX_CARS; + } + int first_agent_id = env->num_objects-1; + float distance_to_goal = valid_active_agent(env, first_agent_id); + if(distance_to_goal){ + env->active_agent_count = 1; + active_agent_indices[0] = first_agent_id; + env->entities[first_agent_id].active_agent = 1; + env->num_cars = 1; + } else { + env->active_agent_count = 0; + env->num_cars = 0; + } for(int i = 0; i < env->num_objects-1 && env->num_cars < MAX_CARS; i++){ if(env->entities[i].type != 1) continue; if(env->entities[i].traj_valid[0] != 1) continue; env->num_cars++; - float cos_heading = cosf(env->entities[i].traj_heading[0]); - float sin_heading = sinf(env->entities[i].traj_heading[0]); - float goal_x = env->entities[i].goal_position_x - env->entities[i].traj_x[0]; - float goal_y = env->entities[i].goal_position_y - env->entities[i].traj_y[0]; - // Rotate to ego vehicle's frame - float rel_goal_x = goal_x*cos_heading + goal_y*sin_heading; - float rel_goal_y = -goal_x*sin_heading + goal_y*cos_heading; - float distance_to_goal = relative_distance_2d(0, 0, rel_goal_x, rel_goal_y); - env->entities[i].width *= 0.7f; - env->entities[i].length *= 0.7f; - - if(distance_to_goal >= 2.0f && env->entities[i].mark_as_expert == 0){ + float distance_to_goal = valid_active_agent(env, i); + if(distance_to_goal > 0){ active_agent_indices[env->active_agent_count] = i; env->active_agent_count++; + env->entities[i].active_agent = 1; } else { static_car_indices[env->static_car_count] = i; env->static_car_count++; - if(env->entities[i].mark_as_expert == 1){ + env->entities[i].active_agent = 0; + if(env->entities[i].mark_as_expert == 1 || (distance_to_goal >=2.0f && env->active_agent_count == env->num_agents)){ expert_static_car_indices[env->expert_static_car_count] = i; env->expert_static_car_count++; } @@ -611,8 +647,6 @@ void init(GPUDrive* env){ set_start_position(env); // printf("Active agents: %d\n", env->active_agent_count); env->logs = (Log*)calloc(env->active_agent_count, sizeof(Log)); - env->goal_reached = (char*)calloc(env->active_agent_count, sizeof(char)); - env->reached_goal_this_episode = (char*)calloc(env->active_agent_count, sizeof(char)); init_grid_map(env); env->vision_range = 21; init_neighbor_offsets(env); @@ -627,9 +661,6 @@ void free_initialized(GPUDrive* env){ free(env->entities); free(env->active_agent_indices); free(env->logs); - free(env->fake_data); - free(env->goal_reached); - free(env->reached_goal_this_episode); free(env->map_corners); free(env->grid_cells); free(env->neighbor_offsets); @@ -637,6 +668,7 @@ void free_initialized(GPUDrive* env){ free(env->neighbor_cache_indices); free(env->static_car_indices); free(env->expert_static_car_indices); + // free(env->map_name); } void allocate(GPUDrive* env){ @@ -662,6 +694,10 @@ void free_allocated(GPUDrive* env){ free_initialized(env); } +void c_close(GPUDrive* env){ + free_initialized(env); +} + float clipSpeed(float speed) { const float maxSpeed = MAX_SPEED; if (speed > maxSpeed) return maxSpeed; @@ -699,8 +735,8 @@ void move_dynamics(GPUDrive* env, int action_idx, int agent_idx){ // Time step (adjust as needed) const float dt = 0.1f; // Update speed with acceleration - speed = speed + acceleration*dt; - if (speed < 0) speed = 0; // Prevent going backward + speed = speed + 0.5f*acceleration*dt; + // if (speed < 0) speed = 0; // Prevent going backward speed = clipSpeed(speed); // compute yaw rate float beta = tanh(.5*tanf(steering)); @@ -882,9 +918,17 @@ void collision_check(GPUDrive* env, int agent_idx) { if (collided == VEHICLE_COLLISION) break; } agent->collision_state = collided; - if (car_collided_with_index != -1) { - env->entities[car_collided_with_index].collision_state = VEHICLE_COLLISION; + // spawn immunity for collisions with other agent cars as agent_idx respawns + if(collided == VEHICLE_COLLISION && env->entities[car_collided_with_index].active_agent == 1 && env->entities[agent_idx].respawn_timestep != -1 && env->timestep - env->entities[agent_idx].respawn_timestep < env->spawn_immunity_timer){ + agent->collision_state = 0; } + + // spawn immunity for collisions with other cars who just respawned + if (car_collided_with_index != -1 && (env->entities[car_collided_with_index].respawn_timestep != -1 && env->timestep - env->entities[car_collided_with_index].respawn_timestep >= env->spawn_immunity_timer )) { + env->entities[car_collided_with_index].collision_state = VEHICLE_COLLISION; + } else if (car_collided_with_index != -1 && (env->entities[car_collided_with_index].respawn_timestep != -1 && env->timestep - env->entities[car_collided_with_index].respawn_timestep < env->spawn_immunity_timer )) { + agent->collision_state = 0; + } } float normalize_value(float value, float min, float max){ @@ -892,7 +936,7 @@ float normalize_value(float value, float min, float max){ } float reverse_normalize_value(float value, float min, float max){ - return value*(max - min) + min; + return value*100.0f; } void compute_observations(GPUDrive* env) { @@ -915,10 +959,10 @@ void compute_observations(GPUDrive* env) { float rel_goal_y = -goal_x*sin_heading + goal_y*cos_heading; //obs[0] = normalize_value(rel_goal_x, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD); //obs[1] = normalize_value(rel_goal_y, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD); - obs[0] = rel_goal_x/20.0f; - obs[1] = rel_goal_y/20.0f; + obs[0] = rel_goal_x/100.0f; + obs[1] = rel_goal_y/100.0f; //obs[2] = ego_speed / MAX_SPEED; - obs[2] = ego_speed / 5.0f; + obs[2] = ego_speed / 100.0f; obs[3] = ego_entity->width / MAX_VEH_WIDTH; obs[4] = ego_entity->length / MAX_VEH_LEN; obs[5] = (ego_entity->collision_state > 0) ? 1 : 0; @@ -946,8 +990,8 @@ void compute_observations(GPUDrive* env) { float rel_x = dx*cos_heading + dy*sin_heading; float rel_y = -dx*sin_heading + dy*cos_heading; // Store observations with correct indexing - obs[obs_idx] = rel_x / 20.0f; - obs[obs_idx + 1] = rel_y / 20.0f; + obs[obs_idx] = rel_x / 100.0f; + obs[obs_idx + 1] = rel_y / 100.0f; obs[obs_idx + 2] = other_entity->width / MAX_VEH_WIDTH; obs[obs_idx + 3] = other_entity->length / MAX_VEH_LEN; // relative heading @@ -996,8 +1040,8 @@ void compute_observations(GPUDrive* env) { // Compute sin and cos of relative angle directly without atan2f float cos_angle = dx_norm*cos_heading + dy_norm*sin_heading; float sin_angle = -dx_norm*sin_heading + dy_norm*cos_heading; - obs[obs_idx] = x_obs / 20.0f; - obs[obs_idx + 1] = y_obs / 20.0f; + obs[obs_idx] = x_obs / 100.0f; + obs[obs_idx + 1] = y_obs / 100.0f; obs[obs_idx + 2] = length / MAX_ROAD_SEGMENT_LENGTH; obs[obs_idx + 3] = width / MAX_ROAD_SCALE; obs[obs_idx + 4] = cos_angle / MAX_ORIENTATION_RAD; @@ -1017,10 +1061,12 @@ void c_reset(GPUDrive* env){ for(int x = 0;xactive_agent_count; x++){ env->logs[x] = (Log){0}; int agent_idx = env->active_agent_indices[x]; + env->entities[agent_idx].respawn_timestep = -1; + env->entities[agent_idx].reached_goal = 0; + env->entities[agent_idx].collided_before_goal = 0; + env->entities[agent_idx].reached_goal_this_episode = 0; collision_check(env, agent_idx); } - memset(env->goal_reached, 0, env->active_agent_count*sizeof(char)); - memset(env->reached_goal_this_episode, 0, env->active_agent_count*sizeof(char)); compute_observations(env); } @@ -1030,6 +1076,8 @@ void respawn_agent(GPUDrive* env, int agent_idx){ env->entities[agent_idx].heading = env->entities[agent_idx].traj_heading[0]; env->entities[agent_idx].vx = env->entities[agent_idx].traj_vx[0]; env->entities[agent_idx].vy = env->entities[agent_idx].traj_vy[0]; + env->entities[agent_idx].reached_goal = 0; + env->entities[agent_idx].respawn_timestep = env->timestep; } void c_step(GPUDrive* env){ @@ -1050,25 +1098,34 @@ void c_step(GPUDrive* env){ env->logs[i].score = 0.0f; env->logs[i].episode_length += 1; int agent_idx = env->active_agent_indices[i]; - if(env->goal_reached[i] || env->entities[agent_idx].collision_state > 0){ + int reached_goal = env->entities[agent_idx].reached_goal; + int collision_state = env->entities[agent_idx].collision_state; + if(reached_goal || collision_state > 0){ respawn_agent(env, agent_idx); - env->goal_reached[i] = 0; } env->entities[agent_idx].collision_state = 0; move_dynamics(env, i, agent_idx); // move_expert(env, env->actions, agent_idx); collision_check(env, agent_idx); - if(env->entities[agent_idx].collision_state > 0 && env->goal_reached[i] == 0){ - if(env->entities[agent_idx].collision_state == VEHICLE_COLLISION){ + collision_state = env->entities[agent_idx].collision_state; + + if(collision_state > 0){ + if(collision_state == VEHICLE_COLLISION){ env->rewards[i] = env->reward_vehicle_collision; env->logs[i].collision_rate = 1.0f; env->logs[i].episode_return += env->reward_vehicle_collision; + if(env->entities[agent_idx].respawn_timestep == -1){ + env->logs[i].clean_collision_rate = 1.0f; + } } - else if(env->entities[agent_idx].collision_state == OFFROAD){ + else if(collision_state == OFFROAD){ env->rewards[i] = env->reward_offroad_collision; env->logs[i].offroad_rate = 1.0f; env->logs[i].episode_return += env->reward_offroad_collision; } + if(!env->entities[agent_idx].reached_goal_this_episode){ + env->entities[agent_idx].collided_before_goal = 1; + } } float distance_to_goal = relative_distance_2d( @@ -1076,12 +1133,11 @@ void c_step(GPUDrive* env){ env->entities[agent_idx].y, env->entities[agent_idx].goal_position_x, env->entities[agent_idx].goal_position_y); - int reached_goal = distance_to_goal < 2.0f; - if(reached_goal && env->goal_reached[i] == 0){ + if(distance_to_goal < 2.0f){ env->rewards[i] += 1.0f; - env->goal_reached[i] = 1; - env->logs[i].episode_return += 1.0f; - env->reached_goal_this_episode[i] = 1; + env->logs[i].episode_return += 1.0f; + env->entities[agent_idx].reached_goal = 1; + env->entities[agent_idx].reached_goal_this_episode = 1; } } compute_observations(env); @@ -1421,7 +1477,7 @@ void c_render(GPUDrive* env) { car_model = client->cars[0]; // Collided agent } // Draw obs for human selected agent - if(agent_index == env->human_agent_idx && env->goal_reached[agent_index] == 0) { + if(agent_index == env->human_agent_idx && !env->entities[agent_index].reached_goal) { draw_agent_obs(env, agent_index); } // Draw cube for cars static and active @@ -1442,7 +1498,7 @@ void c_render(GPUDrive* env) { rlPopMatrix(); // FPV Camera Control if(IsKeyDown(KEY_LEFT_CONTROL) && env->human_agent_idx== agent_index){ - if(env->goal_reached[agent_index] == 1){ + if(env->entities[agent_index].reached_goal){ env->human_agent_idx = rand() % env->active_agent_count; } Vector3 camera_position = (Vector3){ @@ -1549,9 +1605,10 @@ void c_render(GPUDrive* env) { } void close_client(Client* client){ - for (int i = 0; i < 5; i++) { + for (int i = 0; i < 6; i++) { UnloadModel(client->cars[i]); } + UnloadTexture(client->puffers); CloseWindow(); free(client); } diff --git a/pufferlib/ocean/gpudrive/gpudrive.py b/pufferlib/ocean/gpudrive/gpudrive.py index 528496663e..a852979751 100644 --- a/pufferlib/ocean/gpudrive/gpudrive.py +++ b/pufferlib/ocean/gpudrive/gpudrive.py @@ -7,16 +7,18 @@ from pufferlib.ocean.gpudrive import binding class GPUDrive(pufferlib.PufferEnv): - def __init__(self, num_envs=1, render_mode=None, report_interval=1, + def __init__(self, render_mode=None, report_interval=1, width=1280, height=1024, human_agent_idx=0, reward_vehicle_collision=-0.1, reward_offroad_collision=-0.1, + spawn_immunity_timer=30, + num_maps=100, + num_agents=512, buf = None, seed=1): # env - self.num_agents = num_envs self.render_mode = render_mode self.report_interval = report_interval @@ -24,9 +26,8 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1, self.single_observation_space = gymnasium.spaces.Box(low=-1, high=1, shape=(self.num_obs,), dtype=np.float32) self.single_action_space = gymnasium.spaces.MultiDiscrete([7, 13]) - agent_offsets = binding.shared(num_envs=num_envs) - total_agents = agent_offsets[-1] - self.num_agents = total_agents + agent_offsets, map_ids, num_envs = binding.shared(num_agents=num_agents, num_maps=num_maps) + self.num_agents = num_agents super().__init__(buf=buf) env_ids = [] for i in range(num_envs): @@ -42,7 +43,9 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1, human_agent_idx=human_agent_idx, reward_vehicle_collision=reward_vehicle_collision, reward_offroad_collision=reward_offroad_collision, - env_id=i + spawn_immunity_timer=spawn_immunity_timer, + map_id=map_ids[i], + max_agents = nxt-cur ) env_ids.append(env_id) @@ -67,7 +70,7 @@ def step(self, actions): self.terminals, self.truncations, info) def render(self): - binding.vec_render(self.c_envs, 63) + binding.vec_render(self.c_envs, 0) def close(self): binding.vec_close(self.c_envs) @@ -239,7 +242,7 @@ def process_all_maps(): data_dir = Path("data/processed/training") # Get all JSON files in the training directory - json_files = sorted(data_dir.glob("*.json"))[0:512] + json_files = sorted(data_dir.glob("*.json")) print(f"Found {len(json_files)} JSON files") @@ -254,13 +257,13 @@ def process_all_maps(): except Exception as e: print(f"Error processing {map_path.name}: {e}") -def test_performance(timeout=10, atn_cache=1024, num_envs=75): +def test_performance(timeout=10, atn_cache=1024, num_agents=1024): import time - env = GPUDrive(num_envs=num_envs) + env = GPUDrive(num_agents=num_agents) env.reset() tick = 0 - num_agents = 3968 + num_agents = 1024 actions = np.stack([ np.random.randint(0, space.n + 1, (atn_cache, num_agents)) for space in env.single_action_space @@ -273,8 +276,7 @@ def test_performance(timeout=10, atn_cache=1024, num_envs=75): tick += 1 print(f'SPS: {num_agents * tick / (time.time() - start)}') - - + env.close() if __name__ == '__main__': - # test_performance() - process_all_maps() + test_performance() + # process_all_maps() diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index ef05533b69..83f70c44db 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -759,18 +759,9 @@ def encode_observations(self, observations, state=None): ego_features = self.ego_encoder(ego_obs) partner_features, _ = self.partner_encoder(partner_objects).max(dim=1) road_features, _ = self.road_encoder(road_objects).max(dim=1) - #partner_features_post_mask = self.post_mask_partner_encoder(partner_features) - #road_features_post_mask = self.post_mask_road_encoder(road_features) - #concat_features = torch.cat([ego_features, road_features_post_mask, partner_features_post_mask], dim=1) concat_features = torch.cat([ego_features, road_features, partner_features], dim=1) - # Apply max pooling across concatenated features - # Reshape to [batch, 3, hidden_size] to pool across the 3 modalities - # pooled_features = torch.max( - # concat_features.view(-1, 3, self.hidden_size), dim=1 - # )[0] - # Pass through shared embedding embedding = F.relu(self.shared_embedding(concat_features)) # embedding = self.shared_embedding(concat_features)