Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
122 commits
Select commit Hold shift + click to select a range
44a22b5
gpudrive memcpy testing
l1onh3art88 Feb 27, 2025
7bea544
slicing mem with memcpy is fast enough
l1onh3art88 Feb 27, 2025
9d93872
latest
l1onh3art88 Feb 28, 2025
0661c32
speed testing
l1onh3art88 Mar 5, 2025
46b0b35
rendering 90% there
l1onh3art88 Mar 8, 2025
dc81c2a
Merge branch 'gpudrive' of https://github.com/l1onh3art88/PufferLib i…
l1onh3art88 Mar 8, 2025
d8e8dad
visual change on camera and goal spheres
l1onh3art88 Mar 8, 2025
4830538
hunting for memory leaks
l1onh3art88 Mar 9, 2025
1c29a59
fixed memory shit
l1onh3art88 Mar 9, 2025
f039059
trying to get training loop setup
l1onh3art88 Mar 10, 2025
16c57ab
cleaning up mem leaks
l1onh3art88 Mar 10, 2025
46ad42b
train?
l1onh3art88 Mar 11, 2025
8516a9b
forgot this file
l1onh3art88 Mar 11, 2025
a49dc83
training loop stuck at epoch 1
l1onh3art88 Mar 11, 2025
6ddf5b4
work plz
l1onh3art88 Mar 11, 2025
ea72fb9
latest code
l1onh3art88 Mar 11, 2025
eec1457
fixed json bug
l1onh3art88 Mar 12, 2025
f6762ad
Merge branch 'gpudrive' of https://github.com/l1onh3art88/PufferLib i…
l1onh3art88 Mar 12, 2025
a888540
made a big mistake with binary file
l1onh3art88 Mar 12, 2025
b6d3b84
grr
l1onh3art88 Mar 12, 2025
4e6310c
Merge branch 'dev' of https://github.com/l1onh3art88/PufferLib into g…
l1onh3art88 Mar 12, 2025
d14e0d7
Merge branch 'dev' of https://github.com/l1onh3art88/PufferLib into g…
l1onh3art88 Mar 12, 2025
cfae24f
Training at 500k sps!!
l1onh3art88 Mar 12, 2025
9d863ab
1 obs llol
l1onh3art88 Mar 12, 2025
09ddf73
training with 1 obs like a madman
l1onh3art88 Mar 12, 2025
55f78d0
fixed rollout on cleanpufferl i think?
l1onh3art88 Mar 13, 2025
76788f6
removed z
l1onh3art88 Mar 13, 2025
d61ad5d
only reward 1 time
l1onh3art88 Mar 13, 2025
bd99fa6
fixing scoring
l1onh3art88 Mar 13, 2025
d579c78
made some adjusments
l1onh3art88 Mar 13, 2025
0db752a
random heading spawn
l1onh3art88 Mar 13, 2025
064b4fc
new map
l1onh3art88 Mar 14, 2025
830ab65
obs at 0 when done
l1onh3art88 Mar 14, 2025
9786063
change obs from 1 distane to x,y dist
l1onh3art88 Mar 14, 2025
8a337cd
trained succesful model to reach goal no collisions on
l1onh3art88 Mar 15, 2025
012eb90
new visual control on human
l1onh3art88 Mar 15, 2025
445bd31
adjsuted physics dynamics
l1onh3art88 Mar 15, 2025
85525e3
added collision checks 27k sps rn
l1onh3art88 Mar 15, 2025
8c0c70b
cython edit
l1onh3art88 Mar 15, 2025
3dba889
rewards negative on collision
l1onh3art88 Mar 15, 2025
1e364c9
collision stats
l1onh3art88 Mar 16, 2025
1179707
fixed collisions i think
l1onh3art88 Mar 16, 2025
eb93897
collions trained at 140k sps .999 score and .04 collision rate
l1onh3art88 Mar 16, 2025
9175bfe
cars have differnt lengths
l1onh3art88 Mar 16, 2025
8e7382b
relative angle obs adjustment
l1onh3art88 Mar 17, 2025
9955a4c
relative angle for streets as well
l1onh3art88 Mar 17, 2025
1194612
fixed init bug no obs
l1onh3art88 Mar 18, 2025
818ef40
grid style colliions - progress
l1onh3art88 Mar 19, 2025
ab6a66b
collisions with grid style checking - obs not changed yet
l1onh3art88 Mar 19, 2025
e612bb5
small optimizations
l1onh3art88 Mar 19, 2025
fa5aee1
300k sps env?
l1onh3art88 Mar 19, 2025
e834b5f
performance questions
l1onh3art88 Mar 19, 2025
306b77d
removed active agents from grid cells
l1onh3art88 Mar 20, 2025
9e0072a
check neighbor function generalized
l1onh3art88 Mar 20, 2025
7d5c785
bigger obs
l1onh3art88 Mar 20, 2025
7997928
obs in but not confident they are accurate
l1onh3art88 Mar 20, 2025
a6da367
self and other cars
l1onh3art88 Mar 20, 2025
307cd2c
wrong indexign of obs
l1onh3art88 Mar 20, 2025
e9a6fd9
obs big test
l1onh3art88 Mar 20, 2025
99cc80e
9x9 obs
l1onh3art88 Mar 21, 2025
7a4e7a6
200 max map obs
l1onh3art88 Mar 21, 2025
76e3783
big obs is back baby
l1onh3art88 Mar 21, 2025
2945cde
bigger obs
l1onh3art88 Mar 21, 2025
1e00f57
memory debugging
l1onh3art88 Mar 21, 2025
fd701d0
cleaning up obs and checkign performance
l1onh3art88 Mar 22, 2025
742ad0a
big refactor + cacheing - half working
l1onh3art88 Mar 23, 2025
17209f6
cleaning up obs and -1 for not visible segments
l1onh3art88 Mar 23, 2025
e9459e6
21 window + penlaty for collsions added back
l1onh3art88 Mar 23, 2025
8191966
added other cars as other agent type of obs
l1onh3art88 Mar 23, 2025
620a1e9
vehuicle collisions and wall collisions fixed
l1onh3art88 Mar 23, 2025
6ac73d9
added more self obs
l1onh3art88 Mar 23, 2025
630f92b
obs was bugged now its back
l1onh3art88 Mar 23, 2025
64caf17
forgot this file
l1onh3art88 Mar 23, 2025
314b927
reward collision rate as param
l1onh3art88 Mar 23, 2025
b05db09
modified car partner obs speed as absolute and not relative and lower…
l1onh3art88 Mar 23, 2025
fb03fd4
normalized obs
l1onh3art88 Mar 23, 2025
d9cdeb3
reset obs at 0 each step
l1onh3art88 Mar 23, 2025
3e690ac
revrese normalize obs for visual
l1onh3art88 Mar 23, 2025
d8c0529
Merge branch 'gpudrive' of https://github.com/l1onh3art88/PufferLib i…
l1onh3art88 Mar 23, 2025
2cc5399
first crack at network design here
l1onh3art88 Mar 24, 2025
9374a78
latest
l1onh3art88 Mar 25, 2025
2c46c69
reshape and max pool
l1onh3art88 Mar 25, 2025
b5d6b53
accepting incomign changes
l1onh3art88 Mar 26, 2025
ce1e3b8
Merge branch 'gpudrive' of https://github.com/l1onh3art88/PufferLib i…
l1onh3art88 Mar 26, 2025
8dca10a
trying to train
l1onh3art88 Mar 26, 2025
8d3407c
trying to get smart robot
l1onh3art88 Mar 26, 2025
3763eb2
changed dynamics
l1onh3art88 Mar 27, 2025
bdd2762
code cleanup
l1onh3art88 Mar 28, 2025
27fe514
shrink cars?
l1onh3art88 Mar 28, 2025
054c426
dont penalize after goal
l1onh3art88 Mar 28, 2025
99dea54
back to 50 vision
l1onh3art88 Mar 28, 2025
468b7ba
small obs edge case error
l1onh3art88 Mar 28, 2025
e952655
cleanup
l1onh3art88 Mar 28, 2025
4468b5f
multimap memory testing
l1onh3art88 Mar 30, 2025
fc9e281
multi map training test
l1onh3art88 Apr 2, 2025
8be8d9b
agent masking?
l1onh3art88 Apr 2, 2025
cebf562
skip movign if reached goal
l1onh3art88 Apr 2, 2025
5ed7883
zero out invisible obs
l1onh3art88 Apr 2, 2025
ff31fb4
adjusted active agent for always have at least 1 sdc
l1onh3art88 Apr 4, 2025
33e651c
sweeping for all sweeps
l1onh3art88 Apr 6, 2025
b969109
cache looping optimizations + removing goku render code
l1onh3art88 Apr 10, 2025
2a363a5
less evil render code
l1onh3art88 Apr 11, 2025
3ef7e76
formatting
l1onh3art88 Apr 11, 2025
6df5c9d
collision missed possible
l1onh3art88 Apr 11, 2025
98ef8c3
fixed slow cache bug
l1onh3art88 Apr 11, 2025
ae1bff2
screwed with obs
l1onh3art88 Apr 15, 2025
51f84b7
200 road obs
l1onh3art88 Apr 18, 2025
920d0ff
small tower climb c nn issue solve
l1onh3art88 Apr 19, 2025
b812692
1M sps env speed consistent now, but train is stuck at lower speed
l1onh3art88 Apr 21, 2025
90a7334
new network arch
l1onh3art88 Apr 22, 2025
20aa442
dealing with dataset bugs
l1onh3art88 Apr 23, 2025
44eac32
cleaned up dataset some more
l1onh3art88 Apr 23, 2025
6978f5b
masking off and reward every step you are at goal range
l1onh3art88 Apr 23, 2025
34d5b41
fix collision edge case
l1onh3art88 Apr 23, 2025
a602f59
render changes
l1onh3art88 Apr 24, 2025
322b1d3
in progress render improvements
l1onh3art88 Apr 25, 2025
f5f841f
rendering improvements
l1onh3art88 Apr 25, 2025
436cd3d
exact obs as gpudrive og
l1onh3art88 May 5, 2025
8b93bb8
setting up baselines
l1onh3art88 May 6, 2025
f605a11
latest with dev - has bugs
l1onh3art88 May 6, 2025
0c0b982
Merge branch 'PufferAI:dev' into dev
l1onh3art88 May 6, 2025
e68d9ef
latest with dev
l1onh3art88 May 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions config/ocean/gpudrive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,30 @@ package = ocean
env_name = puffer_gpudrive
policy_name = GPUDrive
rnn_name = Recurrent
vec = multiprocessing
vec = native

[policy]
input_size = 256
hidden_size = 256
input_size = 64
hidden_size = 128

[rnn]
input_size = 256
hidden_size = 256
input_size = 64
hidden_size = 128

[env]
num_envs = 256
reward_vehicle_collision = -0.25
reward_offroad_collision = -0.25
num_envs = 75
reward_vehicle_collision = 0
reward_offroad_collision = 0

[train]
total_timesteps = 250_000_000
learning_rate = 0.05
minibatch_size = 32768
num_workers = 2
num_envs = 2
total_timesteps = 150_000_000
learning_rate = 0.005
num_workers = 1
num_envs = 1
env_batch_size = 1
anneal_lr = True



[sweep.env.reward_vehicle_collision]
distribution = uniform
Expand Down
29 changes: 29 additions & 0 deletions gpudrive_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "gpudrive_test.h"
#include <time.h>

void demo(){
long test_time = 10;
test_struct env = {
.num_agents = 128,
.active_agents =15,
};
init(&env);
long start = time(NULL);
int i = 0;
int mean_obs = 4000;
int max_obs = 20000;
while (time(NULL) - start < test_time) {
step(&env);
// return;
i++;
}
long end = time(NULL);
printf("SPS: %ld\n", (i *env.active_agents) / (end - start));
free_initialized(&env);

}

int main() {
demo();
return 0;
}
126 changes: 126 additions & 0 deletions gpudrive_test.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include <string.h>
#define SELF_OBS 7
#define OTHER_OBS 11
#define MAP_OBS 3000

static int new_map_obs[MAP_OBS];

typedef struct test_struct test_struct;
struct test_struct {
int* observations;
int* agent_states;
int num_agents;
int active_agents;
};

void print_obs(test_struct* env, int agent_idx) {
printf("agent %d obs: ", agent_idx);
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
for (int i = 0; i < obs_size_per_agent; i++) {
printf("%d ", env->observations[agent_idx*obs_size_per_agent + i]);
}
printf("\n");
}

void print_map_obs(test_struct* env) {
printf("map obs: ");
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
for (int i = 0; i < (MAP_OBS); i++) {
printf("%d ", env->observations[env->active_agents*obs_size_per_agent + i]);
}
printf("\n");
}

void add_obs(test_struct* env) {
int obs_idx = 0;
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
for (int i = 0; i < env->active_agents * obs_size_per_agent; i++) {
if (i % obs_size_per_agent == 0 && i != 0) {
obs_idx++;
}
env->observations[i] = obs_idx;
}
for (int i = 0; i < (MAP_OBS); i++) {
env->observations[env->active_agents*obs_size_per_agent + i] = rand() % 100;
}
}

void set_agents(test_struct* env) {
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
for(int i=0;i<env->num_agents * (obs_size_per_agent);i++) {
env->agent_states[i] = rand() % 100;
}
}

void init(test_struct* env) {
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
env->agent_states = (int*)calloc(env->num_agents * obs_size_per_agent, sizeof(int));
env->observations = (int*)calloc(env->active_agents*obs_size_per_agent + MAP_OBS, sizeof(int));
add_obs(env);
}

void free_initialized(test_struct* env) {
free(env->observations);
free(env->agent_states);
}

void compute_observations(test_struct* env, int* rand_agents){
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
for(int i=0;i<env->active_agents;i++) {
// printf("selected agents %d\n", rand_agents[i]);
// printf("old obs\n");
// print_obs(env, i);
memcpy(env->observations + i*obs_size_per_agent, env->agent_states + rand_agents[i]*obs_size_per_agent, obs_size_per_agent*sizeof(int));
// printf("new obs\n");
// print_obs(env, i);
memcpy(env->observations + env->active_agents*obs_size_per_agent, new_map_obs, (MAP_OBS)*sizeof(int));

}


}
void change_world(test_struct* env, int* rand_agents){
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
for(int i=0;i<env->active_agents;i++) {
rand_agents[i] = rand() % env->num_agents;
for(int j = 0;j<SELF_OBS;j++) {
env->agent_states[rand_agents[i]*obs_size_per_agent + j] = 5;
}
for(int j = 0;j<OTHER_OBS*(env->active_agents-1);j++) {
env->agent_states[rand_agents[i]*obs_size_per_agent + SELF_OBS + j] = 2;
}

}
/*for (int j=0;j<MAP_OBS;j++) {
new_map_obs[j] = 2;
}
*/
}

void copy_world(test_struct* env, int* rand_agents){
int obs_size_per_agent = SELF_OBS + (env->active_agents-1) * (OTHER_OBS);
int* blah = (int*)calloc(obs_size_per_agent, sizeof(int));
//int* randoworld = (int*)calloc(MAP_OBS, sizeof(int));
for(int i =0;i<env->active_agents;i++){
rand_agents[i] = rand() % env->num_agents;
memcpy(env->agent_states+rand_agents[i]*obs_size_per_agent, blah, obs_size_per_agent*sizeof(int));
}
//memcpy(new_map_obs, randoworld, MAP_OBS*sizeof(int));
free(blah);
//free(randoworld);
}
void step(test_struct* env) {
int rand_agents[env->active_agents];
// do something
copy_world(env, rand_agents);
//change_world(env,rand_agents);
// compute new obs
compute_observations(env, rand_agents);
// print_obs(env, rand_agents[0]);
// print_obs(env, rand_agents[1]);
// print_map_obs(env);
}
50 changes: 34 additions & 16 deletions pufferlib/ocean/gpudrive/cy_gpudrive.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from libc.stdlib cimport calloc, free
from libc.stdlib cimport calloc, malloc, free
from libc.string cimport strcpy
import numpy as np
cdef extern from "gpudrive.h":
int LOG_BUFFER_SIZE
Expand Down Expand Up @@ -82,6 +83,8 @@ cdef extern from "gpudrive.h":
float reward_offroad_collision;
char* map_name;
char* reached_goal_this_turn;
float world_mean_x;
float world_mean_y;

ctypedef struct Client

Expand Down Expand Up @@ -179,47 +182,62 @@ cdef class CyGPUDrive:

self.client = NULL
self.num_envs = num_envs
self.envs = <GPUDrive*> calloc(num_envs, sizeof(GPUDrive))
cdef int num_clones
num_clones = 8
self.envs = <GPUDrive*> calloc(num_envs*num_clones, sizeof(GPUDrive))
self.agent_offsets = <int*> calloc(num_envs + 1, sizeof(int))
self.logs = allocate_logbuffer(LOG_BUFFER_SIZE)
cdef int i
for i in range(num_envs + 1):
self.agent_offsets[i] = offsets[i]
cdef int inc
for i in range(num_envs):
inc = self.agent_offsets[i]
print(inc)
map_file = f"resources/gpudrive/binaries/map_{i:03d}.bin".encode('utf-8')
cdef int index
cdef int total_envs
total_envs = num_envs * num_clones
cdef int total_agents
total_agents = self.agent_offsets[num_envs]
cdef char* c_map_file
for i in range(total_envs):
env_index = i % num_envs
clone_index = i // num_envs
inc = self.agent_offsets[env_index]
count = self.agent_offsets[env_index+1] - self.agent_offsets[env_index]
clone_agent_offset = clone_index * total_agents + inc
print("Env Index: ", env_index)
print("Increment: ", inc)
print("clone_agent_offset: ", clone_agent_offset)
map_file = f"resources/gpudrive/binaries/map_{env_index:03d}.bin".encode('utf-8')
c_map_file = <char*>malloc(len(map_file) + 1)
strcpy(c_map_file, map_file)
print("cython map_name", map_file)
self.envs[i] = GPUDrive(
observations=&observations[inc, 0],
actions=&actions[inc,0],
rewards=&rewards[inc],
masks=&masks[inc],
dones=&terminals[inc],
observations=&observations[clone_agent_offset, 0],
actions=&actions[clone_agent_offset,0],
rewards=&rewards[clone_agent_offset],
masks=&masks[clone_agent_offset],
dones=&terminals[clone_agent_offset],
log_buffer=self.logs,
human_agent_idx=human_agent_idx,
reward_vehicle_collision=reward_vehicle_collision,
reward_offroad_collision=reward_offroad_collision,
map_name = map_file
map_name = c_map_file
)
print("init")
init(&self.envs[i])
self.client = NULL


def reset(self):
cdef int i
for i in range(self.num_envs):
for i in range(self.num_envs*8):
c_reset(&self.envs[i])

def step(self):
cdef int i
for i in range(self.num_envs):
for i in range(self.num_envs*8):
c_step(&self.envs[i])

def render(self):
cdef GPUDrive* env = &self.envs[211]
cdef GPUDrive* env = &self.envs[11]
if self.client == NULL:
import os
cwd = os.getcwd()
Expand Down
4 changes: 2 additions & 2 deletions pufferlib/ocean/gpudrive/gpudrive.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ void performance_test() {
}

int main() {
demo();
//performance_test();
// demo();
performance_test();
return 0;
}
Loading
Loading