Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions config/ocean/gpudrive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ hidden_size = 512

[env]
num_envs = 75
reward_vehicle_collision = 0
reward_offroad_collision = 0
reward_vehicle_collision = -0.75
reward_offroad_collision = -0.75

[train]
total_timesteps = 150_000_000
total_timesteps = 250_000_000
learning_rate = 0.005
num_workers = 8
num_envs = 8
Expand Down
5 changes: 2 additions & 3 deletions pufferlib/ocean/gpudrive/cy_gpudrive.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ cdef class CyGPUDrive:
free(agent_offsets)
return total_count, py_offsets
def __init__(self, float[:, :] observations, int[:,:] actions,
float[:] rewards, unsigned char[:] masks, unsigned char[:] terminals, int num_envs,
float[:] rewards, unsigned char[:] terminals, int num_envs,
int human_agent_idx, reward_vehicle_collision,
reward_offroad_collision, offsets):

Expand Down Expand Up @@ -214,7 +214,6 @@ cdef class CyGPUDrive:
observations=&observations[clone_agent_offset, 0],
actions=&actions[clone_agent_offset,0],
rewards=&rewards[clone_agent_offset],
masks=&masks[clone_agent_offset],
dones=&terminals[clone_agent_offset],
log_buffer=self.logs,
human_agent_idx=human_agent_idx,
Expand All @@ -237,7 +236,7 @@ cdef class CyGPUDrive:
c_step(&self.envs[i])

def render(self):
cdef GPUDrive* env = &self.envs[11]
cdef GPUDrive* env = &self.envs[2]
if self.client == NULL:
import os
cwd = os.getcwd()
Expand Down
53 changes: 25 additions & 28 deletions pufferlib/ocean/gpudrive/gpudrive.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ struct GPUDrive {
float* observations;
int* actions;
float* rewards;
unsigned char* masks;
unsigned char* dones;
LogBuffer* log_buffer;
Log* logs;
Expand Down Expand Up @@ -233,7 +232,7 @@ struct GPUDrive {
float reward_vehicle_collision;
float reward_offroad_collision;
char* map_name;
char* reached_goal_this_turn;
char* reached_goal_this_episode;
float world_mean_x;
float world_mean_y;
};
Expand Down Expand Up @@ -629,7 +628,7 @@ void init(GPUDrive* env){
// printf("Active agents: %d\n", env->active_agent_count);
env->logs = (Log*)calloc(env->active_agent_count, sizeof(Log));
env->goal_reached = (char*)calloc(env->active_agent_count, sizeof(char));
env->reached_goal_this_turn = (char*)calloc(env->active_agent_count, sizeof(char));
env->reached_goal_this_episode = (char*)calloc(env->active_agent_count, sizeof(char));
init_grid_map(env);
env->vision_range = 21;
init_neighbor_offsets(env);
Expand All @@ -646,7 +645,7 @@ void free_initialized(GPUDrive* env){
free(env->logs);
free(env->fake_data);
free(env->goal_reached);
free(env->reached_goal_this_turn);
free(env->reached_goal_this_episode);
free(env->map_corners);
free(env->grid_cells);
free(env->neighbor_offsets);
Expand All @@ -667,7 +666,6 @@ void allocate(GPUDrive* env){
env->observations = (float*)calloc(env->active_agent_count*max_obs, sizeof(float));
env->actions = (int*)calloc(env->active_agent_count*2, sizeof(int));
env->rewards = (float*)calloc(env->active_agent_count, sizeof(float));
env->masks = (unsigned char*)calloc(env->active_agent_count, sizeof(unsigned char));
env->dones = (unsigned char*)calloc(env->active_agent_count, sizeof(unsigned char));
env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE);
// printf("allocated\n");
Expand All @@ -677,7 +675,6 @@ void free_allocated(GPUDrive* env){
free(env->observations);
free(env->actions);
free(env->rewards);
free(env->masks);
free(env->dones);
free_logbuffer(env->log_buffer);
free_initialized(env);
Expand Down Expand Up @@ -921,9 +918,6 @@ void compute_observations(GPUDrive* env) {
memset(env->observations, 0, max_obs*env->active_agent_count*sizeof(float));
float (*observations)[max_obs] = (float(*)[max_obs])env->observations;
for(int i = 0; i < env->active_agent_count; i++) {
if(env->goal_reached[i] && !env->reached_goal_this_turn[i]){
continue;
}
float* obs = &observations[i][0];
Entity* ego_entity = &env->entities[env->active_agent_indices[i]];
if(ego_entity->type > 3) break;
Expand Down Expand Up @@ -1041,31 +1035,34 @@ void c_reset(GPUDrive* env){
collision_check(env, agent_idx);
}
memset(env->goal_reached, 0, env->active_agent_count*sizeof(char));
memset(env->masks, 1, env->active_agent_count*sizeof(char));
memset(env->dones, 0, env->active_agent_count*sizeof(char));
memset(env->reached_goal_this_episode, 0, env->active_agent_count*sizeof(char));
compute_observations(env);
}

void respawn_agent(GPUDrive* env, int agent_idx){
env->entities[agent_idx].x = env->entities[agent_idx].traj_x[0];
env->entities[agent_idx].y = env->entities[agent_idx].traj_y[0];
env->entities[agent_idx].heading = env->entities[agent_idx].traj_heading[0];
env->entities[agent_idx].vx = env->entities[agent_idx].traj_vx[0];
env->entities[agent_idx].vy = env->entities[agent_idx].traj_vy[0];
}

void c_step(GPUDrive* env){
memset(env->rewards, 0, env->active_agent_count * sizeof(float));
memset(env->reached_goal_this_turn, 0, env->active_agent_count * sizeof(char));
env->timestep++;
if(env->timestep == 91){
for(int i = 0; i < env->active_agent_count; i++){
if(env->goal_reached[i] == 0){
if(!env->reached_goal_this_episode[i]) {
env->logs[i].score = 0.0f;
}
else {
env->logs[i].score = 1.0f;
env->logs[i].dnf_rate = 0.0f;
env->rewards[i] += -1.0f;
env->logs[i].episode_return += -1.0f;
}
int offroad = env->logs[i].offroad_rate;
int collided = env->logs[i].collision_rate;
int goal_reached = env->goal_reached[i];
if(!offroad && !collided && !goal_reached){
env->logs[i].dnf_rate = 1.0f;
}

add_log(env->log_buffer, &env->logs[i]);
}
c_reset(env);
Expand All @@ -1081,13 +1078,11 @@ void c_step(GPUDrive* env){
env->logs[i].score = 0.0f;
env->logs[i].episode_length += 1;
int agent_idx = env->active_agent_indices[i];
if(env->goal_reached[i] || env->entities[agent_idx].collision_state > 0){
respawn_agent(env, agent_idx);
env->goal_reached[i] = 0;
}
env->entities[agent_idx].collision_state = 0;
if(env->goal_reached[i]){
env->masks[i] = 0;
env->entities[agent_idx].x = -10000;
env->entities[agent_idx].y = -10000;
continue;
}
move_dynamics(env, i, agent_idx);
// move_expert(env, env->actions, agent_idx);
collision_check(env, agent_idx);
Expand All @@ -1102,6 +1097,8 @@ void c_step(GPUDrive* env){
env->logs[i].offroad_rate = 1.0f;
env->logs[i].episode_return += env->reward_offroad_collision;
}
env->logs[i].score = 0.0f;
add_log(env->log_buffer, &env->logs[i]);
}

float distance_to_goal = relative_distance_2d(
Expand All @@ -1110,13 +1107,13 @@ void c_step(GPUDrive* env){
env->entities[agent_idx].goal_position_x,
env->entities[agent_idx].goal_position_y);
int reached_goal = distance_to_goal < 2.0f;
if(reached_goal && env->goal_reached[i] == 0){
if(reached_goal){
env->rewards[i] += 1.0f;
env->goal_reached[i] = 1;
env->reached_goal_this_turn[i] = 1;
env->logs[i].episode_return += 1.0f;
env->dones[i] = 1;
continue;
env->reached_goal_this_episode[i] = 1;
env->logs[i].score = 1.0f;
add_log(env->log_buffer, &env->logs[i]);
}
}
compute_observations(env);
Expand Down
3 changes: 2 additions & 1 deletion pufferlib/ocean/gpudrive/gpudrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1,
self.num_agents = total_agents
print("Num agents: ", self.num_agents)
super().__init__(buf=buf)
self.c_envs = CyGPUDrive(self.observations, self.actions, self.rewards, self.masks,
print("offsets: ", agent_offsets)
self.c_envs = CyGPUDrive(self.observations, self.actions, self.rewards,
self.terminals, num_envs, human_agent_idx, reward_vehicle_collision, reward_offroad_collision, offsets = agent_offsets)


Expand Down
Loading