diff --git a/config/ocean/gpudrive.ini b/config/ocean/gpudrive.ini index ed7a4d3d43..bc82352f7c 100644 --- a/config/ocean/gpudrive.ini +++ b/config/ocean/gpudrive.ini @@ -8,6 +8,7 @@ rnn_name = Recurrent num_workers = 16 num_envs = 16 batch_size = 8 +#backend = Serial [policy] input_size = 64 @@ -18,19 +19,49 @@ input_size = 512 hidden_size = 512 [env] -num_envs = 64 +num_envs = 72 reward_vehicle_collision = -0.75 reward_offroad_collision = -0.75 [train] -total_timesteps = 250_000_000 -learning_rate = 0.005 +total_timesteps = 100_000_000 +#learning_rate = 0.005 anneal_lr = True -batch_size = 752752 +batch_size = 738192 minibatch_size = 23296 max_minibatch_size = 23296 bptt_horizon = 91 +#adam_beta1 = 0.9225899639773112 +#adam_beta2 = 0.9 +#adam_eps = 0.0004030478187254784 +#ent_coef = 0.0020159472963835016 +#gae_lambda = 0.8829440612065992 +#gamma = 0.9872971455373439 +#learning_rate = 0.0003947934701844728 +#max_grad_norm = 0.5296288081133984 +#prio_alpha = 0.99 +#prio_beta0 = 0.48469847315324566 +#update_epochs = 2 +#vf_coef = 3.6777541336880786 +#checkpoint_interval = 1000 + +adam_beta1 = 0.9852000972032763 +adam_beta2 = 0.9948751690861872 +adam_eps = 0.000002967099767264975 +clip_coef = 0.3153578071651496 +ent_coef = 0.000369784972524992 +gae_lambda = 0.9385892578563558 +gamma = 0.9864999317644947 +learning_rate = 0.0022659903674495338 +max_grad_norm = 1.942292174080673 +prio_alpha = 0.9414003089586056 +prio_beta = 0.9429842108374631 +vf_clip_coef = 1.9533056765171148 +vf_coef = 3.2028923035616774 + + + [sweep.env.reward_vehicle_collision] distribution = uniform min = -1.0 diff --git a/pufferlib/ocean/gpudrive/cy_gpudrive.pyx b/pufferlib/ocean/gpudrive/cy_gpudrive.pyx deleted file mode 100644 index 045e953eb9..0000000000 --- a/pufferlib/ocean/gpudrive/cy_gpudrive.pyx +++ /dev/null @@ -1,254 +0,0 @@ -from libc.stdlib cimport calloc, malloc, free -from libc.string cimport strcpy -import numpy as np -cdef extern from "gpudrive.h": - int LOG_BUFFER_SIZE - - ctypedef struct Log: - float episode_return; - float episode_length; - float score; - float offroad_rate; - float collision_rate; - float dnf_rate; - ctypedef struct LogBuffer - LogBuffer* allocate_logbuffer(int) - void free_logbuffer(LogBuffer*) - Log aggregate_and_clear(LogBuffer*) - - ctypedef struct Entity: - int type; - int road_object_id; - int road_point_id; - int array_size; - float* traj_x; - float* traj_y; - float* traj_z; - float* traj_vx; - float* traj_vy; - float* traj_vz; - float* traj_heading; - int* traj_valid; - float width; - float length; - float height; - float goal_position_x; - float goal_position_y; - float goal_position_z; - int mark_as_expert; - int collision_state; - float x; - float y; - float z; - float vx; - float vy; - float vz; - float heading; - int valid; - - ctypedef struct GPUDrive: - float* observations; - int* actions; - float* rewards; - unsigned char* masks; - unsigned char* dones; - LogBuffer* log_buffer; - Log* logs; - int num_agents; - int active_agent_count; - int* active_agent_indices; - int human_agent_idx; - Entity* entities; - int num_entities; - int num_cars; - int num_objects; - int num_roads; - int static_car_count; - int* static_car_indices; - int expert_static_car_count; - int* expert_static_car_indices; - int timestep; - int dynamics_model; - float* fake_data; - char* goal_reached; - float* map_corners; - int* grid_cells; - int grid_cols; - int grid_rows; - int vision_range; - int* neighbor_offsets; - int* neighbor_cache_entities; - int* neighbor_cache_indices; - float reward_vehicle_collision; - float reward_offroad_collision; - char* map_name; - char* reached_goal_this_turn; - float world_mean_x; - float world_mean_y; - - ctypedef struct Client - - void init(GPUDrive* env) - void free_allocated(GPUDrive* env) - void free_entity(Entity* entity) - - Client* make_client(GPUDrive* env) - void close_client(Client* client) - void c_render(Client* client, GPUDrive* env) - void c_reset(GPUDrive* env) - void c_step(GPUDrive* env) - Entity* load_map_binary(char* name, GPUDrive* env) - void set_active_agents(GPUDrive *env) - -cpdef entity_dtype(): - '''Make a dummy entity to get the dtype''' - # Create a numpy structured dtype that matches the Entity struct - return np.dtype([ - ('type', np.int32), - ('road_object_id', np.int32), - ('road_point_id', np.int32), - ('array_size', np.int32), - # For pointer fields, we use intp (integer large enough to hold a pointer) - ('traj_x', np.intp), - ('traj_y', np.intp), - ('traj_z', np.intp), - ('traj_vx', np.intp), - ('traj_vy', np.intp), - ('traj_vz', np.intp), - ('traj_heading', np.intp), - ('traj_valid', np.intp), - ('width', np.float32), - ('length', np.float32), - ('height', np.float32), - ('goal_position_x', np.float32), - ('goal_position_y', np.float32), - ('goal_position_z', np.float32), - ('collision_state', np.int32), - ('x', np.float32), - ('y', np.float32), - ('z', np.float32), - ('vx', np.float32), - ('vy', np.float32), - ('vz', np.float32), - ('heading', np.float32), - ('valid', np.int32) - ]) - -cdef class CyGPUDrive: - cdef: - GPUDrive* envs - Client* client - LogBuffer* logs - int num_envs - int* agent_offsets - int agent_count - - @staticmethod - def get_total_agent_count(int num_envs, int human_agent_idx, float reward_vehicle_collision, float reward_offroad_collision): - """Static method to count total agents across all environments""" - cdef int* agent_offsets = calloc(num_envs + 1, sizeof(int)) - cdef int total_count = 0 - cdef GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive)) - cdef int i - for i in range(num_envs): - temp_envs[i].human_agent_idx = human_agent_idx - temp_envs[i].reward_vehicle_collision = reward_vehicle_collision - temp_envs[i].reward_offroad_collision = reward_offroad_collision - - map_file = f"resources/gpudrive/binaries/map_{i:03d}.bin".encode('utf-8') - temp_envs[i].entities = load_map_binary(map_file, &temp_envs[i]) - set_active_agents(&temp_envs[i]) - - agent_offsets[i] = total_count - total_count += temp_envs[i].active_agent_count - if (temp_envs[i].active_agent_count ==0 ): - print("No active agents: ", map_file) - - agent_offsets[num_envs] = total_count - py_offsets = [agent_offsets[i] for i in range(num_envs + 1)] - for i in range(num_envs): - for x in range(temp_envs[i].num_entities): - free_entity(&temp_envs[i].entities[x]) - free(temp_envs[i].entities) - free(temp_envs[i].active_agent_indices) - free(temp_envs[i].static_car_indices) - free(temp_envs) - free(agent_offsets) - return total_count, py_offsets - def __init__(self, float[:, :] observations, int[:,:] actions, - float[:] rewards, unsigned char[:] terminals, int num_envs, - int human_agent_idx, reward_vehicle_collision, - reward_offroad_collision, offsets): - - self.client = NULL - self.num_envs = num_envs - cdef int num_clones - num_clones = 1 - self.envs = calloc(num_envs*num_clones, sizeof(GPUDrive)) - self.agent_offsets = calloc(num_envs + 1, sizeof(int)) - self.logs = allocate_logbuffer(LOG_BUFFER_SIZE) - cdef int i - for i in range(num_envs + 1): - self.agent_offsets[i] = offsets[i] - cdef int inc - cdef int index - cdef int total_envs - total_envs = num_envs * num_clones - cdef int total_agents - total_agents = self.agent_offsets[num_envs] - cdef char* c_map_file - for i in range(total_envs): - env_index = i % num_envs - clone_index = i // num_envs - inc = self.agent_offsets[env_index] - count = self.agent_offsets[env_index+1] - self.agent_offsets[env_index] - clone_agent_offset = clone_index * total_agents + inc - map_file = f"resources/gpudrive/binaries/map_{env_index:03d}.bin".encode('utf-8') - c_map_file = malloc(len(map_file) + 1) - strcpy(c_map_file, map_file) - self.envs[i] = GPUDrive( - observations=&observations[clone_agent_offset, 0], - actions=&actions[clone_agent_offset,0], - rewards=&rewards[clone_agent_offset], - dones=&terminals[clone_agent_offset], - log_buffer=self.logs, - human_agent_idx=human_agent_idx, - reward_vehicle_collision=reward_vehicle_collision, - reward_offroad_collision=reward_offroad_collision, - map_name = c_map_file - ) - init(&self.envs[i]) - self.client = NULL - - - def reset(self): - cdef int i - for i in range(self.num_envs): - c_reset(&self.envs[i]) - - def step(self): - cdef int i - for i in range(self.num_envs): - c_step(&self.envs[i]) - - def render(self): - cdef GPUDrive* env = &self.envs[24] - if self.client == NULL: - import os - cwd = os.getcwd() - os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) - self.client = make_client(env) - os.chdir(cwd) - - c_render(self.client, env) - - def close(self): - if self.client != NULL: - close_client(self.client) - self.client = NULL - - free(self.envs) - - def log(self): - cdef Log log = aggregate_and_clear(self.logs) - return log diff --git a/pufferlib/ocean/gpudrive/gpudrive.c b/pufferlib/ocean/gpudrive/gpudrive.c index 1b9a251bc2..bc8a3519ff 100644 --- a/pufferlib/ocean/gpudrive/gpudrive.c +++ b/pufferlib/ocean/gpudrive/gpudrive.c @@ -160,10 +160,10 @@ void demo() { // Handle human input for the controlled agent // handle_human_input(&env); c_step(&env); - c_render(client, &env); + c_render(&env); } - close_client(client); + close_client(env.client); free_allocated(&env); } @@ -205,7 +205,7 @@ void performance_test() { } int main() { - demo(); - //performance_test(); + //demo(); + performance_test(); return 0; } diff --git a/pufferlib/ocean/gpudrive/gpudrive.h b/pufferlib/ocean/gpudrive/gpudrive.h index fd4f4c59a4..03c3929a2f 100644 --- a/pufferlib/ocean/gpudrive/gpudrive.h +++ b/pufferlib/ocean/gpudrive/gpudrive.h @@ -81,65 +81,22 @@ static const int collision_offsets[25][2] = { {-2, 1}, {-1, 1}, {0, 1}, {1, 1}, {2, 1}, // Fourth row {-2, 2}, {-1, 2}, {0, 2}, {1, 2}, {2, 2} // Bottom row }; -#define LOG_BUFFER_SIZE 1024 +typedef struct GPUDrive GPUDrive; +typedef struct Client Client; typedef struct Log Log; + struct Log { float episode_return; float episode_length; + float perf; float score; float offroad_rate; float collision_rate; float dnf_rate; + float n; }; - -typedef struct LogBuffer LogBuffer; -struct LogBuffer { - Log* logs; - int length; - int idx; -}; - -LogBuffer* allocate_logbuffer(int size) { - LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer)); - logs->logs = (Log*)calloc(size, sizeof(Log)); - logs->length = size; - logs->idx = 0; - return logs; -} - -void free_logbuffer(LogBuffer* buffer) { - free(buffer->logs); - free(buffer); -} - -void add_log(LogBuffer* logs, Log* log) { - if (logs->idx == logs->length) { - return; - } - logs->logs[logs->idx] = *log; - logs->idx += 1; - //printf("Log: %f, %f,\n", log->episode_return, log->episode_length); -} - -Log aggregate_and_clear(LogBuffer* logs) { - Log log = {0}; - if (logs->idx == 0) { - return log; - } - for (int i = 0; i < logs->idx; i++) { - log.episode_return += logs->logs[i].episode_return / logs->idx; - log.episode_length += logs->logs[i].episode_length / logs->idx; - log.score += logs->logs[i].score / logs->idx; - log.offroad_rate += logs->logs[i].offroad_rate / logs->idx; - log.collision_rate += logs->logs[i].collision_rate / logs->idx; - log.dnf_rate += logs->logs[i].dnf_rate / logs->idx; - } - logs->idx = 0; - return log; -} - typedef struct Entity Entity; struct Entity { int type; @@ -196,13 +153,13 @@ float relative_distance_2d(float x1, float y1, float x2, float y2){ return distance; } -typedef struct GPUDrive GPUDrive; struct GPUDrive { + Client* client; float* observations; int* actions; float* rewards; - unsigned char* dones; - LogBuffer* log_buffer; + unsigned char* terminals; + Log log; Log* logs; int num_agents; int active_agent_count; @@ -237,6 +194,25 @@ struct GPUDrive { float world_mean_y; }; +void add_log(GPUDrive* env) { + for(int i = 0; i < env->active_agent_count; i++){ + if(env->reached_goal_this_episode[i]) { + env->log.score += 1.0f; + env->log.perf += 1.0f; + } + int offroad = env->logs[i].offroad_rate; + env->log.offroad_rate += offroad; + int collided = env->logs[i].collision_rate; + env->log.collision_rate += collided; + if(!offroad && !collided && !env->reached_goal_this_episode[i]){ + env->log.dnf_rate += 1.0f; + } + env->log.episode_length += env->logs[i].episode_length; + env->log.episode_return += env->logs[i].episode_return; + env->log.n += 1; + } +} + Entity* load_map_binary(const char* filename, GPUDrive* env) { FILE* file = fopen(filename, "rb"); //printf("fileanme: %s\n", filename); @@ -341,7 +317,7 @@ void set_active_agents(GPUDrive* env){ int expert_static_car_indices[MAX_CARS]; env->active_agent_count = 1; active_agent_indices[0] = env->num_objects-1; - for(int i = 0; i < env->num_objects && env->num_cars < MAX_CARS; i++){ + for(int i = 0; i < env->num_objects-1 && env->num_cars < MAX_CARS; i++){ if(env->entities[i].type != 1) continue; if(env->entities[i].traj_valid[0] != 1) continue; env->num_cars++; @@ -663,8 +639,7 @@ void allocate(GPUDrive* env){ env->observations = (float*)calloc(env->active_agent_count*max_obs, sizeof(float)); env->actions = (int*)calloc(env->active_agent_count*2, sizeof(int)); env->rewards = (float*)calloc(env->active_agent_count, sizeof(float)); - env->dones = (unsigned char*)calloc(env->active_agent_count, sizeof(unsigned char)); - env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE); + env->terminals= (unsigned char*)calloc(env->active_agent_count, sizeof(unsigned char)); // printf("allocated\n"); } @@ -672,8 +647,7 @@ void free_allocated(GPUDrive* env){ free(env->observations); free(env->actions); free(env->rewards); - free(env->dones); - free_logbuffer(env->log_buffer); + free(env->terminals); free_initialized(env); } @@ -1048,19 +1022,7 @@ void c_step(GPUDrive* env){ memset(env->rewards, 0, env->active_agent_count * sizeof(float)); env->timestep++; if(env->timestep == 91){ - for(int i = 0; i < env->active_agent_count; i++){ - if(!env->reached_goal_this_episode[i]) { - env->logs[i].score = 0.0f; - } else { - env->logs[i].score = 1.0f; - } - int offroad = env->logs[i].offroad_rate; - int collided = env->logs[i].collision_rate; - if(!offroad && !collided && !env->reached_goal_this_episode[i]){ - env->logs[i].dnf_rate = 1.0f; - } - add_log(env->log_buffer, &env->logs[i]); - } + add_log(env); c_reset(env); } // Move statix experts @@ -1080,7 +1042,7 @@ void c_step(GPUDrive* env){ } env->entities[agent_idx].collision_state = 0; move_dynamics(env, i, agent_idx); - // move_expert(env, env->actions, agent_idx); + //move_expert(env, env->actions, agent_idx); collision_check(env, agent_idx); if(env->entities[agent_idx].collision_state > 0 && env->goal_reached[i] == 0){ if(env->entities[agent_idx].collision_state == VEHICLE_COLLISION){ @@ -1372,7 +1334,12 @@ void draw_road_edge(GPUDrive* env, float start_x, float start_y, float end_x, fl DrawTriangle3D(t4, t1, b1, CURB_SIDE); } -void c_render(Client* client, GPUDrive* env) { +void c_render(GPUDrive* env) { + if (env->client == NULL) { + env->client = make_client(env); + } + Client* client = env->client; + BeginDrawing(); Color road = (Color){35, 35, 37, 255}; ClearBackground(road); diff --git a/pufferlib/ocean/gpudrive/gpudrive.py b/pufferlib/ocean/gpudrive/gpudrive.py index 8877afbb1c..57a4c51a67 100644 --- a/pufferlib/ocean/gpudrive/gpudrive.py +++ b/pufferlib/ocean/gpudrive/gpudrive.py @@ -5,6 +5,7 @@ import pufferlib from pufferlib.ocean.gpudrive.cy_gpudrive import CyGPUDrive, entity_dtype +from pufferlib.ocean.gpudrive import binding class GPUDrive(pufferlib.PufferEnv): def __init__(self, num_envs=1, render_mode=None, report_interval=1, @@ -25,37 +26,55 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1, shape=(self.num_obs,), dtype=np.float32) self.single_action_space = gymnasium.spaces.MultiDiscrete([7, 13]) - total_agents, agent_offsets =CyGPUDrive.get_total_agent_count( - num_envs, human_agent_idx, reward_vehicle_collision, reward_offroad_collision) + agent_offsets = binding.shared(num_envs=num_envs) + total_agents = agent_offsets[-1] self.num_agents = total_agents super().__init__(buf=buf) - self.c_envs = CyGPUDrive(self.observations, self.actions, self.rewards, - self.terminals, num_envs, human_agent_idx, reward_vehicle_collision, reward_offroad_collision, offsets = agent_offsets) + env_ids = [] + for i in range(num_envs): + cur = agent_offsets[i] + nxt = agent_offsets[i+1] + env_id = binding.env_init( + self.observations[cur:nxt], + self.actions[cur:nxt], + self.rewards[cur:nxt], + self.terminals[cur:nxt], + self.truncations[cur:nxt], + seed, + human_agent_idx=human_agent_idx, + reward_vehicle_collision=reward_vehicle_collision, + reward_offroad_collision=reward_offroad_collision, + env_id=i + ) + env_ids.append(env_id) + self.c_envs = binding.vectorize(*env_ids) + pass - def reset(self, seed=None): - self.c_envs.reset() + def reset(self, seed=0): + binding.vec_reset(self.c_envs, seed) self.tick = 0 return self.observations, [] def step(self, actions): self.actions[:] = actions - self.c_envs.step() + binding.vec_step(self.c_envs) self.tick+=1 info = [] if self.tick % self.report_interval == 0: - log = self.c_envs.log() - if log['episode_length'] > 0: + log = binding.vec_log(self.c_envs) + if log: info.append(log) - info.append({'total_agents': self.num_agents}) + return (self.observations, self.rewards, self.terminals, self.truncations, info) def render(self): - self.c_envs.render() + binding.vec_render(self.c_envs) def close(self): - self.c_envs.close() + binding.vec_close(self.c_envs) + def calculate_area(p1, p2, p3): # Calculate the area of the triangle using the determinant method return 0.5 * abs((p1['x'] - p3['x']) * (p2['y'] - p1['y']) - (p1['x'] - p2['x']) * (p3['y'] - p1['y'])) diff --git a/setup.py b/setup.py index 042def711f..4887b8ae56 100644 --- a/setup.py +++ b/setup.py @@ -319,7 +319,7 @@ def run(self): # 'pufferlib/ocean/tactical/c_tactical', #'pufferlib/ocean/squared/cy_squared', 'pufferlib/ocean/snake/cy_snake', - 'pufferlib/ocean/gpudrive/cy_gpudrive', + #'pufferlib/ocean/gpudrive/cy_gpudrive', #'pufferlib/ocean/pong/cy_pong', # 'pufferlib/ocean/breakout/cy_breakout', # 'pufferlib/ocean/cartpole/cy_cartpole', @@ -364,14 +364,14 @@ def run(self): #c_args += "-Wsign-compare -DNDEBUG -g -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC".split() -pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole'] +pure_c_extensions = ['gpudrive', 'squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole'] c_extensions = [ Extension( f'pufferlib.ocean.{name}.binding', sources=[f'pufferlib/ocean/{name}/binding.c'], include_dirs=[numpy.get_include(), 'raylib/include'], - extra_compile_args=extra_compile_args,# + ['-fsanitize=address,undefined,bounds,pointer-overflow,leak'], + extra_compile_args=extra_compile_args, # + ['-fsanitize=address,undefined,bounds,pointer-overflow,leak', '-static-libasan'], extra_link_args=extra_link_args,# + ['-fsanitize=address,undefined,bounds,pointer-overflow,leak', '-g'], extra_objects=[f'{RAYLIB_NAME}/lib/libraylib.a'], )