Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions config/laser_puzzle.ini
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ num_layers = 2
total_timesteps = 125_000_000
horizon = 48
minibatch_size = 12288
gamma = 0.985
gae_lambda = 0.965
gamma = 0.99
gae_lambda = 0.98
learning_rate = 0.004
ent_coef = 0.025
vf_coef = 1.0
1 change: 0 additions & 1 deletion ocean/laser_puzzle/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

void my_init(Env* env, Dict* kwargs) {
// kwargs are passed in py the config .ini file, can set them here, will ignore for now
(void)kwargs;
env->num_agents = 1;
env->ROWS = INIT_ROWS;
env->COLS = INIT_COLS;
Expand Down
1 change: 1 addition & 0 deletions ocean/laser_puzzle/laser_puzzle.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ int demo() {
LaserPuzzle env = {0};

// allocate memory, initialize the client
allocate(&env);
c_reset(&env);
env.client = make_client();

Expand Down
188 changes: 68 additions & 120 deletions ocean/laser_puzzle/laser_puzzle.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include "level_generation/puzzle_types.h"

#define BOARD_IDX(cols, r, c) ((r) * (cols) + (c))
// Level generation scripts are kept outside pufferlib for now; this env loads pre-generated levels.
#define LASER_PUZZLE_LEVELS_PATH "resources/laser_puzzle/laser_puzzle_levels.bin"

// observations: 6*6 board, one byte per cell:
Expand All @@ -20,14 +19,12 @@
#define OBS_MIRROR_RIGHT (OBS_SENSOR + MAX_LASERS)
#define OBS_MIRROR_LEFT (OBS_MIRROR_RIGHT + 1)


// actions: 4 * 4 * 3, set mirror to none, left or right for each interior cell. discrete actions
#define ACTIONS_PER_CELL 3
#define INNER_ROWS (INIT_ROWS - 2)
#define INNER_COLS (INIT_COLS - 2)
#define NUM_ACTIONS (ACTIONS_PER_CELL * INNER_ROWS * INNER_COLS)


static const int CELL_SIZE = 80;
static const Color LASER_COLORS[] = {SKYBLUE, RED, GREEN, YELLOW, BLUE, ORANGE, PURPLE, MAGENTA};

Expand Down Expand Up @@ -55,8 +52,7 @@ typedef struct {
} LaserPuzzleLevel;

typedef struct {
// this will store the log resutls for only the completed episodes
Log log;
Log log; // only stores results for completed episodes
Client* client;

unsigned char* observations;
Expand All @@ -69,14 +65,9 @@ typedef struct {
unsigned int rng;
int owns_buffers;

// length of the current episode
int episode_length;

// max actions allowed before the episode is over
int max_steps;

// return for this episode
float episode_return;
int max_steps; // max actions allowed before the episode is over
float episode_return; // return for this episode

// env specific
int ROWS;
Expand All @@ -94,31 +85,13 @@ typedef struct {
} LaserPuzzle;

void load_laser_puzzle_levels(LaserPuzzle* env, const char* path) {
static LaserPuzzleLevel* levels = NULL;
static int level_count = 0;
if (levels != NULL) {
env->levels = levels;
env->num_levels = level_count;
return;
}

FILE* file = fopen(path, "rb");
if (file == NULL) {
perror("Failed to open laser puzzle levels");
env->num_levels = 0;
return;
}

uint32_t header[3] = {0};
if (fread(header, sizeof(uint32_t), 3, file) != 3 || header[2] == 0) {
fprintf(stderr, "Failed to read laser puzzle level count from %s\n", path);
fclose(file);
env->num_levels = 0;
return;
}
fread(header, sizeof(uint32_t), 3, file);

level_count = (int)header[2];
levels = (LaserPuzzleLevel*)calloc((size_t)level_count, sizeof(LaserPuzzleLevel));
int level_count = (int)header[2];
LaserPuzzleLevel* levels = (LaserPuzzleLevel*)calloc((size_t)level_count, sizeof(LaserPuzzleLevel));

for (int i = 0; i < level_count; i++) {
fread(&levels[i].optimal_mirrors, sizeof(int), 1, file);
Expand All @@ -142,7 +115,6 @@ void load_laser_puzzle_levels(LaserPuzzle* env, const char* path) {
}


// Environment lifecycle
// This allocate function only runs in the standalone demo since puffer vecenv already allocates memory.
void allocate(LaserPuzzle* env) {
env->ROWS = INIT_ROWS;
Expand All @@ -164,8 +136,8 @@ void allocate(LaserPuzzle* env) {

// Called from c_close in both standalone and vecenv modes.
void deallocate(LaserPuzzle* env) {
// always free the board, puffer doesnt own it
free(env->board);
free(env->levels);

// check if we are in the standalone demo or puffer owns the buffers
if (env->owns_buffers) {
Expand All @@ -176,10 +148,12 @@ void deallocate(LaserPuzzle* env) {
}

env->board = NULL;
env->levels = NULL;
env->observations = NULL;
env->actions = NULL;
env->rewards = NULL;
env->terminals = NULL;
env->num_levels = 0;

env->owns_buffers = 0;
}
Expand All @@ -196,21 +170,17 @@ Client* make_client() {
}

void close_client(Client* client) {
if (client->assets_loaded) {
UnloadTexture(client->sprites);
if (client->background.id != 0) {
UnloadTexture(client->background);
}
UnloadFont(client->font);
client->assets_loaded = 0;
}
UnloadTexture(client->sprites);
UnloadTexture(client->background);
UnloadFont(client->font);
client->assets_loaded = 0;
if (IsWindowReady()) {
CloseWindow();
}
free(client);
}

// any closing preparations, free any allocated memory
// free alocated memory, unload raylib resources
void c_close(LaserPuzzle* env) {
if (env->client != NULL) {
close_client(env->client);
Expand All @@ -221,15 +191,13 @@ void c_close(LaserPuzzle* env) {
}

void add_log(LaserPuzzle* env) {
// takes into account sinks + mirros placed, normalized
float perf = 0.0f;
float perf = 0.0f; // takes into account sinks + mirros placed, normalized
if (env->mirrors_placed > 0) {
perf = ((float)env->sinks_found * (float)env->optimal_mirrors)
/ ((float)env->total_sinks * (float)env->mirrors_placed);
}

// takes into account sinks + mirros placed, unnormalized
float score = 0.0f;
float score = 0.0f; // takes into account sinks + mirros placed, unnormalized
if (env->mirrors_placed > 0) {
score = (float)env->sinks_found
* ((float)env->optimal_mirrors / (float)env->mirrors_placed);
Expand Down Expand Up @@ -278,47 +246,32 @@ void compute_observations(LaserPuzzle* env) {
}
}

// Episode lifecycle
// reset the game state, start a new game
// reset the env state (ignore rewards, terminals --> handled by c_step)
void c_reset(LaserPuzzle* env) {
// check if memory has been allocated for the env variable (by puffer venvc), if not allocate
if (env->board == NULL) {
allocate(env);
}

env->sinks_found = 0;
env->mirrors_placed = 0;
env->moves_made = 0;
env->episode_length = 0;
env->episode_return = 0.0f;
env->pending_reset = 0;
for (int i = 0; i < MAX_LASERS; i++) {
env->sink_hit_before[i] = 0;
}

if (env->num_levels > 0) {
int level_index = rand_r(&env->rng) % env->num_levels;
const LaserPuzzleLevel* level = &env->levels[level_index];
env->total_sinks = level->sensor_count;
env->optimal_mirrors = level->optimal_mirrors;
memset(env->sink_hit_before, 0, sizeof(env->sink_hit_before));

for (int r = 0; r < env->ROWS; r++) {
for (int c = 0; c < env->COLS; c++) {
env->board[BOARD_IDX(env->COLS, r, c)] = level->puzzle[r][c];
}
}
}
int level_index = rand_r(&env->rng) % env->num_levels;
const LaserPuzzleLevel* level = &env->levels[level_index];
env->total_sinks = level->sensor_count;
env->optimal_mirrors = level->optimal_mirrors;

memcpy(env->board, level->puzzle, sizeof(level->puzzle));

compute_observations(env);
}

// advance state
void c_step(LaserPuzzle* env) {
if (env->client && env->pending_reset) {
// When we have a client, since we deferred reset to display the terminal state, reset now.
// When we have a client, since we deferred reset to display the terminal state, reset now. This also menas we are skipping an action given by puffernet. Not really an issue since this block only runs with puffer eval and standalone demo, not in training
c_reset(env);
env->rewards[0] = 0.0f;
env->terminals[0] = 0.0f;
return;
}

Expand All @@ -336,46 +289,46 @@ void c_step(LaserPuzzle* env) {
env->mirrors_placed++;
}

if (boardCell.type == LASER) {
int laserId = boardCell.id;
int curR = r;
int curC = c;
int dr = 0;
int dc = 0;

if (curR == 0) {
dr = 1;
} else if (curR == env->ROWS - 1) {
dr = -1;
} else if (curC == 0) {
dc = 1;
} else if (curC == env->COLS - 1) {
dc = -1;
}
if (boardCell.type != LASER) {
continue;
}

int laserId = boardCell.id;
int curR = r;
int curC = c;
int dr = 0;
int dc = 0;

if (curR == 0) {
dr = 1;
} else if (curR == env->ROWS - 1) {
dr = -1;
} else if (curC == 0) {
dc = 1;
} else if (curC == env->COLS - 1) {
dc = -1;
}

while (curR + dr >= 0 && curR + dr < env->ROWS && curC + dc >= 0 && curC + dc < env->COLS) {
curR += dr;
curC += dc;

while (curR + dr >= 0 && curR + dr < env->ROWS && curC + dc >= 0 && curC + dc < env->COLS) {
curR += dr;
curC += dc;

Cell hitCell = env->board[BOARD_IDX(env->COLS, curR, curC)];
if (hitCell.type == SENSOR) {
if (hitCell.id == laserId) {
env->sinks_found++;

if (!env->sink_hit_before[laserId]) {
env->sink_hit_before[laserId] = 1;
new_sinks_hit++;
}
}
} else if (hitCell.mirror == MIRROR_LEFT) {
int oldDr = dr;
dr = dc;
dc = oldDr;
} else if (hitCell.mirror == MIRROR_RIGHT) {
int oldDr = dr;
dr = -dc;
dc = -oldDr;
Cell hitCell = env->board[BOARD_IDX(env->COLS, curR, curC)];
if (hitCell.type == SENSOR && hitCell.id == laserId) {
env->sinks_found++;

if (!env->sink_hit_before[laserId]) {
env->sink_hit_before[laserId] = 1;
new_sinks_hit++;
}
} else if (hitCell.mirror == MIRROR_LEFT) {
int oldDr = dr;
dr = dc;
dc = oldDr;
} else if (hitCell.mirror == MIRROR_RIGHT) {
int oldDr = dr;
dr = -dc;
dc = -oldDr;
}
}
}
Expand All @@ -384,10 +337,9 @@ void c_step(LaserPuzzle* env) {
// handle the rewards, episode_length, terminal, episode_return
// rewards: +1 for ending the episode optimally (minimal mirrors), +0.6 for ending the episode suboptimally, -0.01 per move, +0.3 for first time laser hit
env->episode_length++;
env->rewards[0] = -0.01f + 0.3f * (float)new_sinks_hit;
env->rewards[0] = 0.3f * (float)new_sinks_hit;
env->terminals[0] = 0.0f;

// end episode when all the sinks have been found (assign diff rewards based on whether the optimal //amount of mirros has been used)
if (env->sinks_found == env->total_sinks) {
env->terminals[0] = 1.0f;
if (env->mirrors_placed == env->optimal_mirrors) {
Expand All @@ -400,9 +352,7 @@ void c_step(LaserPuzzle* env) {
}

env->episode_return += env->rewards[0];
compute_observations(env);

// update the logs, should be updated on every episode termination (should also only be floats used')
if (env->terminals[0]) {
// we defer reset so that client can display the terminal state without it being immediately reset
add_log(env);
Expand All @@ -412,10 +362,10 @@ void c_step(LaserPuzzle* env) {
c_reset(env);
}
}
}

compute_observations(env);
}

// Rendering
void trace_laser(LaserPuzzle * env, int r, int c) {
Cell laser = env->board[BOARD_IDX(env->COLS, r, c)];
Color laserColor = LASER_COLORS[laser.id % 8];
Expand Down Expand Up @@ -465,7 +415,7 @@ void trace_laser(LaserPuzzle * env, int r, int c) {
// update current cell
curR = nextR;
curC = nextC;

// update direction
Cell cell = env->board[BOARD_IDX(env->COLS, curR, curC)];
if (cell.mirror == MIRROR_LEFT) {
Expand Down Expand Up @@ -578,11 +528,9 @@ void c_render(LaserPuzzle* env) {
DrawTextEx(client->font, movesText, (Vector2){GetScreenWidth() - movesSize.x - 16, GetScreenHeight() - fontSize - 16}, fontSize, spacing, RAYWHITE);
DrawTextEx(client->font, mirrorsText, (Vector2){GetScreenWidth() - mirrorsSize.x - 16, 14}, fontSize, spacing, RAYWHITE);


// level if we foudn the optimal mirror count)
if (env->sinks_found == env->total_sinks) {
const char* solvedText = "Puzzle solved! Can you do it with less mirrors?";
if (env->sinks_found == env->total_sinks && env->mirrors_placed == env->optimal_mirrors) {
if (env->mirrors_placed == env->optimal_mirrors) {
solvedText = "Optimal solve! Press R for the next puzzle.";
}

Expand Down
Loading
Loading