Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 41 additions & 46 deletions pufferlib/config/ocean/gpudrive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ package = ocean
env_name = puffer_gpudrive
policy_name = GPUDrive
rnn_name = Recurrent

[vec]
num_workers = 16
num_envs = 16
batch_size = 8
num_workers = 8
num_envs = 8
batch_size = 2
#backend = Serial

[policy]
Expand All @@ -19,66 +18,62 @@ input_size = 512
hidden_size = 512

[env]
num_envs = 72
reward_vehicle_collision = 0.0
reward_offroad_collision = 0.0

num_agents = 1024
reward_vehicle_collision = -0.3988470042550285
reward_offroad_collision = -0.2096585204075736
spawn_immunity_timer = 28.852396095973667
num_maps = 100
[train]
total_timesteps = 150_000_000
#learning_rate = 0.005
total_timesteps = 100_000_000
#learning_rate = 0.02
#gamma = 0.985
anneal_lr = True
batch_size = 738192
minibatch_size = 23296
max_minibatch_size = 23296
batch_size = 745472
minibatch_size = 11648
max_minibatch_size = 11648
bptt_horizon = 91

#adam_beta1 = 0.9225899639773112
#adam_beta2 = 0.9
#adam_eps = 0.0004030478187254784
#ent_coef = 0.0020159472963835016
#gae_lambda = 0.8829440612065992
#gamma = 0.9872971455373439
#learning_rate = 0.0003947934701844728
#max_grad_norm = 0.5296288081133984
#prio_alpha = 0.99
#prio_beta0 = 0.48469847315324566
#update_epochs = 2
#vf_coef = 3.6777541336880786
#checkpoint_interval = 1000
adam_beta1 = 0.9552561751280151
adam_beta2 = 0.9998539818458761
adam_eps = 1.9142243622636095e-10
clip_coef = 0.3092857081843024
ent_coef = 0.00253448350160094
gae_lambda = 0.9422289315382624
gamma = 0.9775539006742
learning_rate = 0.0055822711860840125
max_grad_norm = 1.5891941467078512
prio_alpha = 0.52915904496396
prio_beta0 = 0.4455420904215256
update_epochs = 1
vf_coef = 1.1854206994433176
checkpoint_interval = 1000


adam_beta1 = 0.9852000972032763
adam_beta2 = 0.9948751690861872
adam_eps = 0.000002967099767264975
clip_coef = 0.3153578071651496
ent_coef = 0.000369784972524992
gae_lambda = 0.9385892578563558
gamma = 0.9864999317644947
learning_rate = 0.0022659903674495338
max_grad_norm = 1.942292174080673
prio_alpha = 0.9414003089586056
prio_beta = 0.9429842108374631
vf_clip_coef = 1.9533056765171148
vf_coef = 3.2028923035616774

[sweep.train.total_timesteps]
distribution = log_normal
min = 5e7
max = 2e8
mean = 1e8
min = 2e6
max = 5e6
mean = 3.5e6
scale = time

[sweep.env.reward_vehicle_collision]
distribution = uniform
min = -1.0
max = -0.25
max = 0.0
mean = -0.5
mean = -0.2
scale = auto

[sweep.env.reward_offroad_collision]
distribution = uniform
min = -1.0
max = -0.25
max = 0.0
mean = -0.5
mean = -0.2
scale = auto

[sweep.env.spawn_immunity_timer]
distribution = uniform
min = 1
max = 91
mean = 30
scale = auto
79 changes: 61 additions & 18 deletions pufferlib/ocean/gpudrive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,61 @@
#include "../env_binding.h"

static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
int num_envs = unpack(kwargs, "num_envs");
GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive));
PyObject* agent_offsets = PyList_New(num_envs+1);
int total_count = 0;
// getting agent counts and offsets
for(int i = 0;i< num_envs;i++) {
int num_agents = unpack(kwargs, "num_agents");
int num_maps = unpack(kwargs, "num_maps");
// GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive));
// PyObject* agent_offsets = PyList_New(num_envs+1);
// PyObject* map_ids = PyList_New(num_envs);
srand(time(NULL));
int total_agent_count = 0;
int env_count = 0;
int max_envs = num_agents;
PyObject* agent_offsets = PyList_New(max_envs+1);
PyObject* map_ids = PyList_New(max_envs);
// getting env count
while(total_agent_count < num_agents && env_count < max_envs){
char map_file[100];
sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", i);
temp_envs[i].entities = load_map_binary(map_file, &temp_envs[i]);
set_active_agents(&temp_envs[i]);
PyObject* num = PyLong_FromLong(total_count);
PyList_SetItem(agent_offsets, i, num);
//Py_DECREF(num);
total_count += temp_envs[i].active_agent_count;
int map_id = rand() % num_maps;
GPUDrive* env = calloc(1, sizeof(GPUDrive));
sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id);
env->entities = load_map_binary(map_file, env);
set_active_agents(env);
// Store map_id
PyObject* map_id_obj = PyLong_FromLong(map_id);
PyList_SetItem(map_ids, env_count, map_id_obj);
// Store agent offset
PyObject* offset = PyLong_FromLong(total_agent_count);
PyList_SetItem(agent_offsets, env_count, offset);
total_agent_count += env->active_agent_count;
env_count++;
for(int j=0;j<env->num_entities;j++) {
free_entity(&env->entities[j]);
}
free(env->entities);
free(env->active_agent_indices);
free(env->static_car_indices);
free(env->expert_static_car_indices);
free(env);
}
if(total_agent_count >= num_agents){
total_agent_count = num_agents;
}
PyObject* num = PyLong_FromLong(total_count);
PyList_SetItem(agent_offsets, num_envs, num);
PyObject* final_total_agent_count = PyLong_FromLong(total_agent_count);
PyList_SetItem(agent_offsets, env_count, final_total_agent_count);
PyObject* final_env_count = PyLong_FromLong(env_count);
// resize lists
PyObject* resized_agent_offsets = PyList_GetSlice(agent_offsets, 0, env_count + 1);
PyObject* resized_map_ids = PyList_GetSlice(map_ids, 0, env_count);
//
Py_DECREF(agent_offsets);
Py_DECREF(map_ids);
// create a tuple
PyObject* tuple = PyTuple_New(3);
PyTuple_SetItem(tuple, 0, resized_agent_offsets);
PyTuple_SetItem(tuple, 1, resized_map_ids);
PyTuple_SetItem(tuple, 2, final_env_count);
return tuple;

//Py_DECREF(num);
/*
for(int i = 0;i<num_envs; i++) {
Expand All @@ -33,18 +71,21 @@ static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
}
free(temp_envs);
*/
return agent_offsets;
// return agent_offsets;
}

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->human_agent_idx = unpack(kwargs, "human_agent_idx");
env->reward_vehicle_collision = unpack(kwargs, "reward_vehicle_collision");
env->reward_offroad_collision = unpack(kwargs, "reward_offroad_collision");
int env_id = unpack(kwargs, "env_id");
env->spawn_immunity_timer = unpack(kwargs, "spawn_immunity_timer");
int map_id = unpack(kwargs, "map_id");
int max_agents = unpack(kwargs, "max_agents");

char map_file[100];
sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", env_id);
sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", map_id);
env->map_name = map_file;
env->num_agents = max_agents;
init(env);
return 0;
}
Expand All @@ -58,5 +99,7 @@ static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "collision_rate", log->collision_rate);
assign_to_dict(dict, "dnf_rate", log->dnf_rate);
assign_to_dict(dict, "n", log->n);
assign_to_dict(dict, "completion_rate", log->completion_rate);
assign_to_dict(dict, "clean_collision_rate", log->clean_collision_rate);
return 0;
}
9 changes: 5 additions & 4 deletions pufferlib/ocean/gpudrive/gpudrive.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ void demo() {
.human_agent_idx = 0,
.reward_vehicle_collision = -0.1f,
.reward_offroad_collision = -0.1f,
.map_name = "resources/gpudrive/binaries/map_000.bin"
.map_name = "resources/gpudrive/binaries/map_086.bin",
.spawn_immunity_timer = 30
};
allocate(&env);
c_reset(&env);
Expand Down Expand Up @@ -173,7 +174,7 @@ void performance_test() {
GPUDrive env = {
.dynamics_model = CLASSIC,
.human_agent_idx = 0,
.map_name = "resources/gpudrive/binaries/map_005.bin"
.map_name = "resources/gpudrive/binaries/map_055.bin"
};
clock_t start_time, end_time;
double cpu_time_used;
Expand Down Expand Up @@ -206,7 +207,7 @@ void performance_test() {
}

int main() {
// demo();
performance_test();
demo();
// performance_test();
return 0;
}
Loading
Loading