diff --git a/config/maze.ini b/config/maze.ini index 1289bf08a..b5d2450a6 100644 --- a/config/maze.ini +++ b/config/maze.ini @@ -8,7 +8,6 @@ num_buffers = 5.96311 num_threads = 2 [env] -max_size = 47 num_maps = 8192 map_size = -1 diff --git a/ocean/maze/binding.c b/ocean/maze/binding.c index b91ae9899..0abe8615d 100644 --- a/ocean/maze/binding.c +++ b/ocean/maze/binding.c @@ -16,7 +16,7 @@ Env* my_vec_init(int* num_envs_out, int* buffer_env_starts, int* buffer_env_coun int agents_per_buffer = total_agents / num_buffers; int num_envs = total_agents; - int max_size = (int)dict_get(env_kwargs, "max_size")->value; + int max_size = MAX_SIZE; int num_maps = (int)dict_get(env_kwargs, "num_maps")->value; int map_size = (int)dict_get(env_kwargs, "map_size")->value; @@ -28,11 +28,6 @@ Env* my_vec_init(int* num_envs_out, int* buffer_env_starts, int* buffer_env_coun // Generate maze levels (shared across all envs) State* levels = calloc(num_maps, sizeof(State)); - // Temporary env used to generate maps - Grid temp_env; - temp_env.max_size = max_size; - init_maze(&temp_env); - unsigned int map_rng = 42; for (int i = 0; i < num_maps; i++) { int sz = map_size; @@ -44,17 +39,14 @@ Env* my_vec_init(int* num_envs_out, int* buffer_env_starts, int* buffer_env_coun sz -= 1; } + State* level = &levels[i]; + level->width = sz; + level->height = sz; + float difficulty = (float)rand_r(&map_rng) / (float)(RAND_MAX); - create_maze_level(&temp_env, sz, sz, difficulty, i); - init_state(&levels[i], max_size, 1); - get_state(&temp_env, &levels[i]); + create_maze_level(level, difficulty, i); } - // Free temp env internal allocations - free(temp_env.maze); - free(temp_env.counts); - free(temp_env.agents); - // Allocate all environments Env* envs = (Env*)calloc(num_envs, sizeof(Env)); @@ -63,14 +55,13 @@ Env* my_vec_init(int* num_envs_out, int* buffer_env_starts, int* buffer_env_coun buffer_env_starts[0] = 0; buffer_env_counts[0] = 0; + unsigned int env_rng = 42; for (int i = 0; i < num_envs; i++) { Env* env = &envs[i]; - env->rng = i; - env->max_size = max_size; - env->num_maps = num_maps; + env->num_levels = num_maps; env->num_agents = 1; env->levels = levels; - init_maze(env); + env->rng = rand_r(&env_rng); buf_agents += env->num_agents; buffer_env_counts[buf]++; @@ -91,10 +82,8 @@ void my_vec_close(Env* envs) { } void my_init(Env* env, Dict* kwargs) { - env->max_size = (int)dict_get(kwargs, "max_size")->value; - env->num_maps = (int)dict_get(kwargs, "num_maps")->value; + env->num_levels = (int)dict_get(kwargs, "num_maps")->value; env->num_agents = 1; - init_maze(env); } void my_log(Log* log, Dict* out) { diff --git a/ocean/maze/maze.c b/ocean/maze/maze.c index 6c49af92b..923818a7c 100644 --- a/ocean/maze/maze.c +++ b/ocean/maze/maze.c @@ -6,55 +6,54 @@ void demo() { int logit_sizes[1] = {5}; PufferNet* net = make_puffernet(weights, 1, 121, 512, 5, logit_sizes, 1); - int max_size = 47; int num_maps = 64; - int num_agents = 1; int horizon = 256; float speed = 1; int vision = 5; bool discretize = true; - Grid* env = allocate_maze(max_size, num_agents, horizon, - vision, speed, discretize); + Grid* env = (Grid*)calloc(1, sizeof(Grid)); + env->num_agents = 1; + env->rng = 73; + env->observations = calloc(WINDOW*WINDOW, sizeof(unsigned char)); + env->actions = calloc(1, sizeof(float)); + env->rewards = calloc(1, sizeof(float)); + env->terminals = calloc(1, sizeof(float)); // Generate maps matching binding.c: random odd sizes, random difficulty State* levels = calloc(num_maps, sizeof(State)); - Grid temp_env; - temp_env.max_size = max_size; - init_maze(&temp_env); unsigned int map_rng = 42; for (int i = 0; i < num_maps; i++) { - int sz = 5 + (rand_r(&map_rng) % (max_size - 5)); + int sz = 5 + (rand_r(&map_rng) % (MAX_SIZE - 5)); if (sz % 2 == 0) sz -= 1; float difficulty = (float)rand_r(&map_rng) / (float)(RAND_MAX); - create_maze_level(&temp_env, sz, sz, difficulty, i); - init_state(&levels[i], max_size, num_agents); - get_state(&temp_env, &levels[i]); + State* level = &levels[i]; + level->width = sz; + level->height = sz; + create_maze_level(level, difficulty, i); } - free(temp_env.maze); - env->num_maps = num_maps; + env->num_levels = num_maps; env->levels = levels; c_reset(env); c_render(env); while (!WindowShouldClose()) { env->actions[0] = ATN_PASS; - Agent* agent = &env->agents[0]; + env->actions[0] = ATN_SOUTH; + State* s = &env->state; if (IsKeyDown(KEY_LEFT_SHIFT)) { if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)){ - agent->direction = 3.0*PI/2.0; - env->actions[0] = ATN_FORWARD; + env->actions[0] = ATN_NORTH; } else if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) { - agent->direction = PI/2.0; - env->actions[0] = ATN_FORWARD; + env->actions[0] = ATN_SOUTH; } else if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) { - agent->direction = PI; - env->actions[0] = ATN_FORWARD; + s->direction = PI; + env->actions[0] = ATN_WEST; } else if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) { - agent->direction = 0; - env->actions[0] = ATN_FORWARD; + s->direction = 0; + env->actions[0] = ATN_EAST; } else { env->actions[0] = ATN_PASS; } @@ -70,8 +69,11 @@ void demo() { free_puffernet(net); free(weights); - free_allocated_maze(env); - for (int i = 0; i < num_maps; i++) free_state(&levels[i]); + free(env->observations); + free(env->actions); + free(env->rewards); + free(env->terminals); + c_close(env); free(levels); } diff --git a/ocean/maze/maze.h b/ocean/maze/maze.h index 55b4b200d..f61aaa004 100644 --- a/ocean/maze/maze.h +++ b/ocean/maze/maze.h @@ -7,31 +7,20 @@ #include "raylib.h" #define TWO_PI 2.0*PI -#define MAX_SIZE 40 #define ATN_PASS 0 -#define ATN_FORWARD 1 -#define ATN_LEFT 2 -#define ATN_RIGHT 3 -#define ATN_BACK 4 - -#define DIR_WEST 0.0; -#define DIR_NORTH PI/2.0; -#define DIR_EAST PI; -#define DIR_SOUTH 3.0*PI/2.0; - +#define ATN_EAST 1 +#define ATN_NORTH 2 +#define ATN_WEST 3 +#define ATN_SOUTH 4 #define EMPTY 0 #define WALL 1 -#define LAVA 2 -#define GOAL 3 -#define REWARD 4 -#define OBJECT 5 -#define AGENT 6 -#define KEY 14 -#define DOOR_LOCKED 20 -#define DOOR_OPEN 26 +#define AGENT 2 +#define GOAL 4 -#define LOG_BUFFER_SIZE 4096 +#define VISION 5 +#define WINDOW (2*VISION + 1) +#define MAX_SIZE 47 typedef struct Log Log; struct Log { @@ -42,121 +31,48 @@ struct Log { float n; }; -// 8 unique agents -bool is_agent(int idx) { - return idx >= AGENT && idx < AGENT + 8; -} -int rand_color(unsigned int* rng) { - return AGENT + rand_r(rng)%8; -} - -// 6 unique keys and doors -bool is_key(int idx) { - return idx >= KEY && idx < KEY + 6; -} -bool is_locked_door(int idx) { - return idx >= DOOR_LOCKED && idx < DOOR_LOCKED + 6; -} -bool is_open_door(int idx) { - return idx >= DOOR_OPEN && idx <= DOOR_OPEN + 6; -} -bool is_correct_key(int key, int door) { - return key == door - 6; -} - -typedef struct Agent Agent; -struct Agent { - float y; - float x; - float prev_y; - float prev_x; - float spawn_y; - float spawn_x; - int color; - float direction; - int held; -}; +typedef struct { + int cell_size; + int width; + int height; + Texture2D puffer; + float* overlay; +} Renderer; -typedef struct Renderer Renderer; -typedef struct State State; -typedef struct Grid Grid; -struct Grid{ - Renderer* renderer; - State* levels; - int num_maps; +typedef struct { int width; int height; + int spawn_x; + int spawn_y; + int x; + int y; + int direction; + unsigned char maze[MAX_SIZE*MAX_SIZE]; +} State; + +typedef struct { + Renderer* renderer; + State* levels; + State state; + Log log; + int num_levels; int num_agents; - int horizon; - int vision; int tick; - float speed; - int obs_size; - int max_size; - bool discretize; - Log log; - unsigned int rng; - Agent* agents; - unsigned char* maze; - int* counts; unsigned char* observations; float* actions; float* rewards; float* terminals; -}; - -void init_maze(Grid* env) { - env->num_agents = 1; - env->vision = 5; - env->speed = 1; - env->discretize = true; - env->obs_size = 2*env->vision + 1; - int env_mem= env->max_size * env->max_size; - env->maze = calloc(env_mem, sizeof(unsigned char)); - env->counts = calloc(env_mem, sizeof(int)); - env->agents = calloc(env->num_agents, sizeof(Agent)); -} - -Grid* allocate_maze(int max_size, int num_agents, int horizon, - int vision, float speed, bool discretize) { - Grid* env = (Grid*)calloc(1, sizeof(Grid)); - env->max_size = max_size; - env->num_agents = num_agents; - env->horizon = horizon; - env->vision = vision; - env->speed = speed; - env->discretize = discretize; - int obs_size = 2*vision + 1; - env->observations = calloc( - num_agents*obs_size*obs_size, sizeof(unsigned char)); - env->actions = calloc(num_agents, sizeof(double)); - env->rewards = calloc(num_agents, sizeof(float)); - env->terminals = calloc(num_agents, sizeof(float)); - init_maze(env); - return env; -} - -void c_close(Grid* env) { - free(env->maze); - free(env->counts); - free(env->agents); -} + unsigned int rng; +} Grid; -void free_allocated_maze(Grid* env) { - free(env->observations); - free(env->actions); - free(env->rewards); - free(env->terminals); - c_close(env); -} +void c_close(Grid* env) {} -bool in_bounds(Grid* env, int y, int c) { - return (y >= 0 && y <= env->height - && c >= 0 && c <= env->width); +bool in_bounds(State* s, int y, int c) { + return (y >= 0 && y <= s->height && c >= 0 && c <= s->width); } -int maze_offset(Grid* env, int y, int x) { - return y*env->max_size + x; +int maze_offset(int y, int x) { + return y*MAX_SIZE + x; } void add_log(Grid* env, int idx) { @@ -168,305 +84,120 @@ void add_log(Grid* env, int idx) { } void compute_observations(Grid* env) { - memset(env->observations, 0, env->obs_size*env->obs_size*env->num_agents); + memset(env->observations, 0, WINDOW*WINDOW*env->num_agents); + State* s = &env->state; for (int agent_idx = 0; agent_idx < env->num_agents; agent_idx++) { - Agent* agent = &env->agents[agent_idx]; - float y = agent->y; - float x = agent->x; - int start_r = y - env->vision; + int x = s->x; + int y = s->y; + int start_r = y - VISION; if (start_r < 0) { start_r = 0; } - int start_c = x - env->vision; + int start_c = x - VISION; if (start_c < 0) { start_c = 0; } - int end_r = y + env->vision; - if (end_r >= env->max_size) { - end_r = env->max_size - 1; + int end_r = y + VISION; + if (end_r >= MAX_SIZE) { + end_r = MAX_SIZE - 1; } - int end_c = x + env->vision; - if (end_c >= env->max_size) { - end_c = env->max_size - 1; + int end_c = x + VISION; + if (end_c >= MAX_SIZE) { + end_c = MAX_SIZE - 1; } - int obs_offset = agent_idx*env->obs_size*env->obs_size; + int obs_offset = agent_idx*WINDOW*WINDOW; for (int r = start_r; r <= end_r; r++) { for (int c = start_c; c <= end_c; c++) { - int r_idx = r - y + env->vision; - int c_idx = c - x + env->vision; - int obs_adr = obs_offset + r_idx*env->obs_size + c_idx; - int adr = maze_offset(env, r, c); - env->observations[obs_adr] = env->maze[adr]; + int r_idx = r - y + VISION; + int c_idx = c - x + VISION; + int obs_adr = obs_offset + r_idx*WINDOW + c_idx; + int adr = maze_offset(r, c); + env->observations[obs_adr] = s->maze[adr]; } } - /* - int obs_adr = 0; - for (int r = 0; r < env->obs_size; r++) { - for (int c = 0; c < env->obs_size; c++) { - printf("%d ", env->observations[obs_adr]); - obs_adr++; - } - printf("\n"); - } - */ - } -} - -void make_border(Grid*env) { - for (int r = 0; r < env->height; r++) { - int adr = maze_offset(env, r, 0); - env->maze[adr] = WALL; - adr = maze_offset(env, r, env->width-1); - env->maze[adr] = WALL; } - for (int c = 0; c < env->width; c++) { - int adr = maze_offset(env, 0, c); - env->maze[adr] = WALL; - adr = maze_offset(env, env->height-1, c); - env->maze[adr] = WALL; - } -} - -void spawn_agent(Grid* env, int idx, int x, int y) { - Agent* agent = &env->agents[idx]; - int spawn_y = y; - int spawn_x = x; - assert(in_bounds(env, spawn_y, spawn_x)); - int adr = maze_offset(env, spawn_y, spawn_x); - assert(env->maze[adr] == EMPTY); - agent->spawn_y = spawn_y; - agent->spawn_x = spawn_x; - agent->y = agent->spawn_y; - agent->x = agent->spawn_x; - agent->prev_y = agent->y; - agent->prev_x = agent->x; - env->maze[adr] = agent->color; - agent->direction = 0; - agent->held = -1; - agent->color = AGENT; -} - -struct State { - int width; - int height; - int num_agents; - Agent* agents; - unsigned char* maze; -}; - -void init_state(State* state, int max_size, int num_agents) { - state->agents = calloc(num_agents, sizeof(Agent)); - state->maze = calloc(max_size*max_size, sizeof(unsigned char)); -} - -void free_state(State* state) { - free(state->agents); - free(state->maze); - free(state); -} - -void get_state(Grid* env, State* state) { - state->width = env->width; - state->height = env->height; - state->num_agents = env->num_agents; - memcpy(state->agents, env->agents, env->num_agents*sizeof(Agent)); - memcpy(state->maze, env->maze, env->max_size*env->max_size); -} - -void set_state(Grid* env, State* state) { - env->width = state->width; - env->height = state->height; - env->horizon = 2*env->width*env->height; - env->num_agents = state->num_agents; - memcpy(env->agents, state->agents, env->num_agents*sizeof(Agent)); - memcpy(env->maze, state->maze, env->max_size*env->max_size); } void c_reset(Grid* env) { - memset(env->maze, 0, env->max_size*env->max_size); - memset(env->counts, 0, env->max_size*env->max_size*sizeof(int)); env->tick = 0; - int idx = rand_r(&env->rng) % env->num_maps; - set_state(env, &env->levels[idx]); + int idx = rand_r(&env->rng) % env->num_levels; + env->state = env->levels[idx]; compute_observations(env); } int move_to(Grid* env, int agent_idx, float y, float x) { - Agent* agent = &env->agents[agent_idx]; - if (!in_bounds(env, y, x)) { + if (!in_bounds(&env->state, y, x)) { return 1; } - int adr = maze_offset(env, round(y), round(x)); - int dest = env->maze[adr]; + State* s = &env->state; + int adr = maze_offset(round(y), round(x)); + int dest = s->maze[adr]; if (dest == WALL) { return 1; - } else if (dest == REWARD || dest == GOAL) { + } else if (dest == GOAL) { env->rewards[agent_idx] = 1.0; env->terminals[agent_idx] = 1.0f; add_log(env, agent_idx); - } else if (is_key(dest)) { - if (agent->held != -1) { - return 1; - } - agent->held = dest; - } else if (is_locked_door(dest)) { if (!is_correct_key(agent->held, dest)) { return 1; - } - agent->held = -1; - env->maze[adr] = EMPTY; } - int start_y = round(agent->y); - int start_x = round(agent->x); - int start_adr = maze_offset(env, start_y, start_x); - env->maze[start_adr] = EMPTY; - - env->maze[adr] = agent->color; - agent->y = y; - agent->x = x; + int start_adr = maze_offset(s->y, s->x); + s->maze[start_adr] = EMPTY; + s->maze[adr] = AGENT; + s->y = y; + s->x = x; return 0; } -bool step_agent(Grid* env, int idx) { - Agent* agent = &env->agents[idx]; - agent->prev_y = agent->y; - agent->prev_x = agent->x; - - double atn = env->actions[idx]; - float direction = agent->direction; - - if (env->discretize) { - int iatn = (int)atn; - if (iatn == ATN_PASS) { - return true; - } else if (iatn == ATN_FORWARD) { - } else if (iatn == ATN_LEFT) { - direction -= PI/2.0; - } else if (iatn == ATN_RIGHT) { - direction += PI/2.0; - } else if (iatn == ATN_BACK) { - direction += PI; - } else { - printf("Invalid action: %f\n", atn); - exit(1); - } - if (direction < 0) { - direction += TWO_PI; - } else if (direction >= TWO_PI) { - direction -= TWO_PI; - } - } else { - assert(atn >= -1.0); - assert(atn <= 1.0); - direction += PI*atn; - } - - float x = agent->x; - float y = agent->y; - float dx = env->speed*cos(direction); - float dy = env->speed*sin(direction); - agent->direction = direction; - if (env->discretize) { - float dest_x = x + dx; - float dest_y = y + dy; - if (!in_bounds(env, dest_y, dest_x)) { - return false; - } - int err = move_to(env, idx, dest_y, dest_x); - if (err) { - return false; - } - } else { - for (int substep = 1; substep <= 4; substep++) { - float dest_x = x + dx/(float)substep; - float dest_y = y + dy/(float)substep; - int err = move_to(env, idx, dest_y, dest_x); - if (!err) { - continue; - } else if (substep == 1) { - return false; - } else { - break; - } - } - } - - int x_int = agent->x; - int y_int = agent->y; - int adr = maze_offset(env, y_int, x_int); - env->counts[adr]++; - //env->rewards[idx] += 0.01 / (float)env->counts[adr]; - //env->log.episode_return += 0.01 / (float)env->counts[adr]; - return true; -} - void c_step(Grid* env) { - memset(env->terminals, 0, env->num_agents * sizeof(float)); - memset(env->rewards, 0, env->num_agents*sizeof(float)); + env->terminals[0] = 0.0f; + env->rewards[0] = 0.0f; + + State* s = &env->state; env->tick++; - for (int i = 0; i < env->num_agents; i++) { - step_agent(env, i); + int atn = env->actions[0]; + int direction = s->direction; + if (atn != ATN_PASS) { + direction = atn; } - compute_observations(env); - bool done = true; - for (int i = 0; i < env->num_agents; i++) { - if (!env->terminals[i]) { - done = false; - break; - } + int x = s->x; + int y = s->y; + int dest_x = x; + int dest_y = y; + if (direction == ATN_EAST) { + dest_x = x + 1; + } else if (direction == ATN_NORTH) { + dest_y = y - 1; + } else if (direction == ATN_WEST) { + dest_x = x - 1; + } else if (direction == ATN_SOUTH) { + dest_y = y + 1; + } + if (in_bounds(&env->state, dest_y, dest_x)) { + int err = move_to(env, 0, dest_y, dest_x); } - if (env->tick >= env->horizon) { - done = true; + compute_observations(env); + + if (env->tick >= 2*s->width*s->height) { + env->terminals[0] = 1.0f; add_log(env, 0); } - if (done) { + if (env->terminals[0]) { c_reset(env); - int idx = rand_r(&env->rng) % env->num_maps; - set_state(env, &env->levels[idx]); + int idx = rand_r(&env->rng) % env->num_levels; + env->state = env->levels[idx]; compute_observations(env); } } -// Raylib client -Color COLORS[] = { - (Color){6, 24, 24, 255}, - (Color){0, 0, 255, 255}, - (Color){0, 128, 255, 255}, - (Color){128, 128, 128, 255}, - (Color){255, 0, 0, 255}, - (Color){255, 255, 255, 255}, - (Color){255, 85, 85, 255}, - (Color){170, 170, 170, 255}, - (Color){0, 255, 255, 255}, - (Color){255, 255, 0, 255}, -}; - -Rectangle UV_COORDS[7] = { - (Rectangle){0, 0, 0, 0}, - (Rectangle){512, 0, 128, 128}, - (Rectangle){0, 0, 0, 0}, - (Rectangle){0, 0, 128, 128}, - (Rectangle){128, 0, 128, 128}, - (Rectangle){256, 0, 128, 128}, - (Rectangle){384, 0, 128, 128}, -}; - -struct Renderer { - int cell_size; - int width; - int height; - Texture2D puffer; - float* overlay; -}; - Renderer* init_renderer(int cell_size, int width, int height) { Renderer* renderer = (Renderer*)calloc(1, sizeof(Renderer)); renderer->cell_size = cell_size; @@ -493,11 +224,9 @@ void close_renderer(Renderer* renderer) { } void c_render(Grid* env) { - // TODO: fractional rendering - float frac = 0.0; float overlay = 0.0; if (env->renderer == NULL) { - env->renderer = init_renderer(16, env->max_size, env->max_size); + env->renderer = init_renderer(16, MAX_SIZE, MAX_SIZE); } Renderer* renderer = env->renderer; @@ -505,22 +234,19 @@ void c_render(Grid* env) { exit(0); } - Agent* agent = &env->agents[0]; - int r = agent->y; - int c = agent->x; - int adr = maze_offset(env, r, c); - //renderer->overlay[adr] = overlay; - //renderer->overlay[adr] -= 0.1; - //renderer->overlay[adr] = -1 + 1.0/(float)env->counts[adr]; + State* s = &env->state; + int r = s->y; + int c = s->x; + int adr = maze_offset(r, c); BeginDrawing(); ClearBackground((Color){6, 24, 24, 255}); int ts = renderer->cell_size; - for (int r = 0; r < env->height; r++) { - for (int c = 0; c < env->width; c++){ - adr = maze_offset(env, r, c); - int tile = env->maze[adr]; + for (int r = 0; r < s->height; r++) { + for (int c = 0; c < s->width; c++){ + adr = maze_offset(r, c); + int tile = s->maze[adr]; if (tile == EMPTY) { continue; overlay = renderer->overlay[adr]; @@ -543,15 +269,6 @@ void c_render(Grid* env) { color = (Color){128, 128, 128, 255}; } else if (tile == GOAL) { color = GREEN; - } else if (is_locked_door(tile)) { - int weight = 40*(tile - DOOR_LOCKED); - color = (Color){weight, 0, 0, 255}; - } else if (is_open_door(tile)) { - int weight = 40*(tile - DOOR_OPEN); - color = (Color){0, weight, 0, 255}; - } else if (is_key(tile)) { - int weight = 40*(tile - KEY); - color = (Color){0, 0, weight, 255}; } else { continue; } @@ -560,79 +277,14 @@ void c_render(Grid* env) { } } - for (int i = 0; i < env->num_agents; i++) { - agent = &env->agents[0]; - float y = agent->y + (frac - 1)*(agent->y - agent->prev_y); - float x = agent->x + (frac - 1)*(agent->x - agent->prev_x); - int u = 0; - int v = 0; - Rectangle source_rect = (Rectangle){u, v, 128, 128}; - Rectangle dest_rect = (Rectangle){x*ts, y*ts, ts, ts}; - DrawTexturePro(renderer->puffer, source_rect, dest_rect, - (Vector2){0, 0}, 0, WHITE); - } - - EndDrawing(); -} + float y = s->y; + float x = s->x; + Rectangle source_rect = (Rectangle){0, 0, 128, 128}; + Rectangle dest_rect = (Rectangle){x*ts, y*ts, ts, ts}; + DrawTexturePro(renderer->puffer, source_rect, dest_rect, + (Vector2){0, 0}, 0, WHITE); -void generate_locked_room(Grid* env) { - assert(env->max_size >= 19); - env->width = 19; - env->height = 19; - env->num_agents = 1; - env->horizon = 1000; - env->speed = 1; - env->vision = 3; - env->discretize = true; - - Agent* agent = &env->agents[0]; - agent->x = 9; - agent->y = 9; - agent->prev_x = 9; - agent->prev_y = 9; - agent->spawn_y = 9; - agent->spawn_x = 9; - agent->color = 6; - agent->held = -1; - - make_border(env); - - for (int r = 0; r < env->height; r++) { - int adr = maze_offset(env, r, 7); - env->maze[adr] = WALL; - adr = maze_offset(env, r, 11); - env->maze[adr] = WALL; - } - for (int c = 0; c < 7; c++) { - int adr = maze_offset(env, 6, c); - env->maze[adr] = WALL; - adr = maze_offset(env, 12, c); - env->maze[adr] = WALL; - } - for (int c = 11; c < env->width; c++) { - int adr = maze_offset(env, 6, c); - env->maze[adr] = WALL; - adr = maze_offset(env, 12, c); - env->maze[adr] = WALL; - } - int adr = maze_offset(env, 3, 7); - env->maze[adr] = DOOR_OPEN; - adr = maze_offset(env, 9, 7); - env->maze[adr] = DOOR_OPEN + 1; - adr = maze_offset(env, 15, 7); - env->maze[adr] = DOOR_OPEN + 2; - adr = maze_offset(env, 3, 11); - env->maze[adr] = DOOR_OPEN + 3; - adr = maze_offset(env, 9, 11); - env->maze[adr] = DOOR_OPEN + 4; - adr = maze_offset(env, 15, 11); - env->maze[adr] = DOOR_LOCKED + 5; - - adr = maze_offset(env, 4, 15); - env->maze[adr] = KEY + 5; - - adr = maze_offset(env, 16, 17); - env->maze[adr] = GOAL; + EndDrawing(); } void generate_growing_tree_maze(unsigned char* maze, @@ -672,10 +324,6 @@ void generate_growing_tree_maze(unsigned char* maze, cells[0] = x_init; cells[1] = y_init; - //int cell = 32; - //InitWindow(width*cell, height*cell, "PufferLib Ray Grid"); - //SetTargetFPS(60); - while (num_cells > 0) { if (rand_r(&rng) % 1000 > 1000*difficulty) { int i = rand_r(&rng) % num_cells; @@ -728,26 +376,6 @@ void generate_growing_tree_maze(unsigned char* maze, num_cells++; made_path = true; - - /* - if (IsKeyPressed(KEY_ESCAPE)) { - exit(0); - } - BeginDrawing(); - ClearBackground((Color){6, 24, 24, 255}); - Color color = (Color){128, 128, 128, 255}; - for (int r = 0; r < height; r++) { - for (int c = 0; c < width; c++){ - int adr = r*max_size + c; - int tile = maze[adr]; - if (tile == WALL) { - DrawRectangle(c*cell, r*cell, cell, cell, color); - } - } - } - EndDrawing(); - */ - break; } if (!made_path) { @@ -756,12 +384,39 @@ void generate_growing_tree_maze(unsigned char* maze, } } -void create_maze_level(Grid* env, int width, int height, float difficulty, int seed) { - env->width = width; - env->height = height; - generate_growing_tree_maze(env->maze, width, height, env->max_size, difficulty, seed); - make_border(env); - spawn_agent(env, 0, 1, 1); - int goal_adr = maze_offset(env, env->height - 2, env->width - 2); - env->maze[goal_adr] = GOAL; +void make_border(State* s) { + for (int r = 0; r < s->height; r++) { + int adr = maze_offset(r, 0); + s->maze[adr] = WALL; + adr = maze_offset(r, s->width-1); + s->maze[adr] = WALL; + } + for (int c = 0; c < s->width; c++) { + int adr = maze_offset(0, c); + s->maze[adr] = WALL; + adr = maze_offset(s->height-1, c); + s->maze[adr] = WALL; + } +} + +void spawn_agent(State* s, int idx, int x, int y) { + int spawn_y = y; + int spawn_x = x; + assert(in_bounds(s, spawn_y, spawn_x)); + int adr = maze_offset(spawn_y, spawn_x); + assert(s->maze[adr] == EMPTY); + s->spawn_y = spawn_y; + s->spawn_x = spawn_x; + s->y = spawn_y; + s->x = spawn_x; + s->maze[adr] = AGENT; + s->direction = 0; +} + +void create_maze_level(State* s, float difficulty, int seed) { + generate_growing_tree_maze(s->maze, s->width, s->height, MAX_SIZE, difficulty, seed); + make_border(s); + spawn_agent(s, 0, 1, 1); + int goal_adr = maze_offset(s->height - 2, s->width - 2); + s->maze[goal_adr] = GOAL; } diff --git a/ocean/minimal/minimal.c b/ocean/minimal/minimal.c index 0fe6b4725..d279cfadc 100644 --- a/ocean/minimal/minimal.c +++ b/ocean/minimal/minimal.c @@ -39,4 +39,3 @@ int main() { free(env.terminals); c_close(&env); } - diff --git a/pufferlib/models.py b/pufferlib/models.py index dc06eb5fb..b0785a5d7 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -35,6 +35,28 @@ def __init__(self, obs_size, hidden_size=128): def forward(self, observations): return self.encoder(observations.view(observations.shape[0], -1).float()) +class MinimalEntityEncoder(nn.Module): + def __init__(self, obs_size, hidden_size=128): + super().__init__() + self.self_obs_size = 2 + self.point_obs_size = 4 + self.num_points = 16 + self.hidden_size = hidden_size + + self.encoder = nn.Sequential( + nn.Linear(self.self_obs_size + self.point_obs_size, 16), + nn.ReLU(), + nn.Linear(16, hidden_size), + ) + + def forward(self, observations): + self_obs = observations[:, :self.self_obs_size].unsqueeze(1).expand( + observations.shape[0], self.num_points, self.self_obs_size) + point_obs = observations[:, self.self_obs_size:].reshape( + observations.shape[0], self.num_points, self.point_obs_size) + cat = torch.cat([self_obs, point_obs], dim=-1) + return self.encoder(cat).max(dim=1)[0] + class DefaultDecoder(nn.Module): def __init__(self, nvec, hidden_size=128): super().__init__() diff --git a/pufferlib/torch_pufferl.py b/pufferlib/torch_pufferl.py index efe042b10..4cf618448 100644 --- a/pufferlib/torch_pufferl.py +++ b/pufferlib/torch_pufferl.py @@ -99,6 +99,11 @@ def __init__(self, ptr, shape, dtype): torch.float32: ctypes.c_float, } +def _actions_for_vec_step(action): + if action.dim() == 1: + action = action.unsqueeze(-1) + return action.to(dtype=torch.float32).contiguous() + def _cpu_tensor(ptr, shape, dtype): '''Zero-copy CPU tensor from a raw pointer via ctypes.''' ctype = _TORCH_TO_CTYPE[dtype] @@ -227,7 +232,7 @@ def rollouts(self): self.values[t] = value.flatten() prof.mark(2) - actions_flat = (action.T if action.dim() > 1 else action.unsqueeze(-1)).to(dtype=torch.float32).contiguous() + actions_flat = _actions_for_vec_step(action) if self.gpu: actions_flat = actions_flat.cuda() self._vec.gpu_step(actions_flat.data_ptr()) diff --git a/tests/point_linear_max_kernel.cu b/tests/point_linear_max_kernel.cu new file mode 100644 index 000000000..45b32f5ec --- /dev/null +++ b/tests/point_linear_max_kernel.cu @@ -0,0 +1,93 @@ +#include + +#include +#include + +namespace { + +constexpr int THREADS = 256; + +__global__ void point_linear_max_forward_kernel( + const float* __restrict__ observations, + const float* __restrict__ weight, + const float* __restrict__ bias, + float* __restrict__ output, + int batch_size, + int self_dim, + int point_dim, + int num_points, + int hidden_size) { + int batch_idx = blockIdx.x; + int tid = threadIdx.x; + int input_dim = self_dim + num_points * point_dim; + int point_input_dim = self_dim + point_dim; + + if (batch_idx >= batch_size) { + return; + } + + const float* obs_row = observations + (int64_t)batch_idx * input_dim; + + for (int hidden_idx = tid; hidden_idx < hidden_size; hidden_idx += blockDim.x) { + const float* row = weight + (int64_t)hidden_idx * point_input_dim; + float base = bias[hidden_idx]; + for (int d = 0; d < self_dim; ++d) { + base += row[d] * obs_row[d]; + } + + float max_val = -FLT_MAX; + for (int point_idx = 0; point_idx < num_points; ++point_idx) { + const float* point = obs_row + self_dim + (int64_t)point_idx * point_dim; + float sum = base; + for (int d = 0; d < point_dim; ++d) { + sum += row[self_dim + d] * point[d]; + } + if (sum > max_val) { + max_val = sum; + } + } + + output[(int64_t)batch_idx * hidden_size + hidden_idx] = max_val; + } +} + +} // namespace + +extern "C" { + +int point_linear_max_forward( + void* output, + const void* observations, + const void* weight, + const void* bias, + int batch_size, + int self_dim, + int point_dim, + int num_points, + int hidden_size) { + dim3 block(THREADS); + dim3 grid(batch_size); + + point_linear_max_forward_kernel<<>>( + (const float*)observations, + (const float*)weight, + (const float*)bias, + (float*)output, + batch_size, + self_dim, + point_dim, + num_points, + hidden_size); + + return (int)cudaGetLastError(); +} + +int point_linear_max_synchronize() { + return (int)cudaDeviceSynchronize(); +} + +const char* point_linear_max_error_string(int code) { + return cudaGetErrorString((cudaError_t)code); +} + +} // extern "C" diff --git a/tests/point_linear_max_reference.py b/tests/point_linear_max_reference.py new file mode 100644 index 000000000..33a680f29 --- /dev/null +++ b/tests/point_linear_max_reference.py @@ -0,0 +1,34 @@ +import torch +import torch.nn as nn + + +def input_dim(self_dim, point_dim, num_points): + return self_dim + num_points * point_dim + + +class PointLinearMaxEncoder(nn.Module): + def __init__(self, self_dim=2, point_dim=4, num_points=16, hidden_size=128): + super().__init__() + self.hidden_size = hidden_size + self.self_obs_size = self_dim + self.point_obs_size = point_dim + self.num_points = num_points + self.linear = nn.Linear(self.self_obs_size + self.point_obs_size, hidden_size) + + def forward(self, observations): + observations = observations.float() + point_obs = observations[:, self.self_obs_size:].reshape( + observations.shape[0], self.num_points, self.point_obs_size) + self_obs = observations[:, :self.self_obs_size].unsqueeze(1).expand( + observations.shape[0], self.num_points, self.self_obs_size) + point_inputs = torch.cat([self_obs, point_obs], dim=-1) + return self.linear(point_inputs).max(dim=1)[0] + + +class FlatLinearEncoder(nn.Module): + def __init__(self, input_size, hidden_size=128): + super().__init__() + self.linear = nn.Linear(input_size, hidden_size) + + def forward(self, observations): + return self.linear(observations.float()) diff --git a/tests/test_point_linear_max.py b/tests/test_point_linear_max.py new file mode 100644 index 000000000..a901115e2 --- /dev/null +++ b/tests/test_point_linear_max.py @@ -0,0 +1,181 @@ +import argparse +import ctypes +import os +import subprocess + +import torch + +from point_linear_max_reference import FlatLinearEncoder, PointLinearMaxEncoder, input_dim + + +ROOT = os.path.dirname(os.path.abspath(__file__)) +SRC = os.path.join(ROOT, "point_linear_max_kernel.cu") +SO = os.path.join(ROOT, "point_linear_max_kernel.so") + + +def build(force=False): + if not force and os.path.exists(SO) and os.path.getmtime(SO) >= os.path.getmtime(SRC): + return + + cmd = ["nvcc", "-shared", "-o", SO, SRC, "-Xcompiler", "-fPIC", "-O2"] + print(f"Building: {' '.join(cmd)}") + subprocess.check_call(cmd) + + +def load_lib(): + lib = ctypes.CDLL(SO) + vp = ctypes.c_void_p + lib.point_linear_max_forward.argtypes = [ + vp, vp, vp, vp, + ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, + ] + lib.point_linear_max_forward.restype = ctypes.c_int + lib.point_linear_max_synchronize.restype = ctypes.c_int + lib.point_linear_max_error_string.argtypes = [ctypes.c_int] + lib.point_linear_max_error_string.restype = ctypes.c_char_p + return lib + + +def check_cuda(code, lib, where): + if code != 0: + msg = lib.point_linear_max_error_string(code).decode("utf-8") + raise RuntimeError(f"{where} failed: {msg}") + + +def ptr(tensor): + return ctypes.c_void_p(tensor.data_ptr()) + + +def run_kernel(lib, output, observations, encoder): + code = lib.point_linear_max_forward( + ptr(output), + ptr(observations), + ptr(encoder.linear.weight), + ptr(encoder.linear.bias), + observations.shape[0], + encoder.self_obs_size, + encoder.point_obs_size, + encoder.num_points, + encoder.hidden_size, + ) + check_cuda(code, lib, "kernel launch") + + +def synchronize(lib): + check_cuda(lib.point_linear_max_synchronize(), lib, "device synchronize") + + +def generate_observations(batch_size, obs_dim, device): + return torch.randn(batch_size, obs_dim, device=device, dtype=torch.float32) + + +def check_close(name, got, ref, atol=1e-6, rtol=1e-5): + diff = (got - ref).abs() + print(f" [{name}] max={diff.max().item():.6e} mean={diff.mean().item():.6e}") + if not torch.allclose(got, ref, atol=atol, rtol=rtol): + idx = diff.argmax().item() + row = idx // got.shape[1] + col = idx % got.shape[1] + raise AssertionError( + f"{name} mismatch at ({row}, {col}): got={got[row, col].item():.6f} ref={ref[row, col].item():.6f}" + ) + + +def make_encoders(args, device): + obs_dim = input_dim(args.self_dim, args.point_dim, args.num_points) + point_encoder = PointLinearMaxEncoder( + self_dim=args.self_dim, + point_dim=args.point_dim, + num_points=args.num_points, + hidden_size=args.hidden_size, + ).to(device).float().eval() + flat_encoder = FlatLinearEncoder(obs_dim, hidden_size=args.hidden_size).to(device).float().eval() + return obs_dim, point_encoder, flat_encoder + + +def test_correctness(lib, args, batches): + print("Correctness") + device = torch.device("cuda") + torch.manual_seed(0) + obs_dim, _, _ = make_encoders(args, device) + + for batch_size in batches: + _, encoder, _ = make_encoders(args, device) + observations = generate_observations(batch_size, obs_dim, device) + output = torch.empty(batch_size, args.hidden_size, device=device, dtype=torch.float32) + run_kernel(lib, output, observations, encoder) + synchronize(lib) + with torch.no_grad(): + reference = encoder(observations) + check_close(f"B={batch_size}", output, reference) + + +def benchmark_one(name, fn, warmup, iters): + for _ in range(warmup): + fn() + torch.cuda.synchronize() + + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(iters): + fn() + end.record() + torch.cuda.synchronize() + ms = start.elapsed_time(end) / iters + print(f" {name:<20} {ms:8.3f} ms") + return ms + + +def benchmark(lib, args): + print( + f"Benchmark B={args.benchmark_batch} H={args.hidden_size} " + f"self={args.self_dim} point={args.point_dim} npoints={args.num_points}" + ) + device = torch.device("cuda") + torch.manual_seed(0) + obs_dim, encoder, flat_encoder = make_encoders(args, device) + observations = generate_observations(args.benchmark_batch, obs_dim, device) + kernel_output = torch.empty(args.benchmark_batch, args.hidden_size, device=device, dtype=torch.float32) + + def fused_kernel(): + run_kernel(lib, kernel_output, observations, encoder) + + def torch_pointwise(): + encoder(observations) + + def torch_flat(): + flat_encoder(observations) + + kernel_ms = benchmark_one("fused kernel", fused_kernel, args.warmup, args.iters) + torch_ms = benchmark_one("torch pointwise", torch_pointwise, args.warmup, args.iters) + flat_ms = benchmark_one("torch flat", torch_flat, args.warmup, args.iters) + + print(f" speedup vs torch: {torch_ms / kernel_ms:.2f}x") + print(f" relative to flat: {kernel_ms / flat_ms:.2f}x") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--self-dim", type=int, default=2) + parser.add_argument("--point-dim", type=int, default=4) + parser.add_argument("--num-points", type=int, default=16) + parser.add_argument("--hidden-size", type=int, default=128) + parser.add_argument("--correctness-batches", type=int, nargs="+", default=[1, 17, 257, 4096]) + parser.add_argument("--benchmark-batch", type=int, default=4096) + parser.add_argument("--warmup", type=int, default=20) + parser.add_argument("--iters", type=int, default=100) + parser.add_argument("--force-build", action="store_true") + args = parser.parse_args() + + if not torch.cuda.is_available(): + raise RuntimeError("CUDA is required for this test") + + build(force=args.force_build) + lib = load_lib() + test_correctness(lib, args, args.correctness_batches) + benchmark(lib, args) + + +if __name__ == "__main__": + main()