Skip to content

Commit 68e45f5

Browse files
committed
Tower climb new binds
1 parent f27f847 commit 68e45f5

4 files changed

Lines changed: 77 additions & 100 deletions

File tree

pufferlib/config/ocean/tower_climb.ini

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package = ocean
33
env_name = puffer_tower_climb
44
policy_name = TowerClimb
55
rnn_name = TowerClimbLSTM
6-
vec = multiprocessing
76

87
[env]
98
num_envs = 4096
@@ -18,9 +17,6 @@ gamma = 0.98
1817
learning_rate = 0.05
1918
minibatch_size = 32768
2019

21-
[sweep.metric]
22-
metric = environment/levels_completed
23-
2420
[sweep.parameters.train.parameters.total_timesteps]
2521
distribution = uniform
2622
min = 50_000_000

pufferlib/ocean/tower_climb/tower_climb.c

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -103,34 +103,47 @@ void free_tower_climb_net(TowerClimbNet* net) {
103103
void demo() {
104104
Weights* weights = load_weights("resources/tower_climb_weights.bin", 560407);
105105
TowerClimbNet* net = init_tower_climb_net(weights, 1);
106+
107+
int num_maps = 10;
108+
Level* levels = calloc(num_maps, sizeof(Level));
109+
PuzzleState* puzzle_states = calloc(num_maps, sizeof(PuzzleState));
110+
111+
for (int i = 0; i < num_maps; i++) {
112+
int goal_height = rand() % 4 + 5;
113+
int min_moves = 10;
114+
int max_moves = 15;
115+
init_level(&levels[i]);
116+
init_puzzle_state(&puzzle_states[i]);
117+
cy_init_random_level(&levels[i], goal_height, max_moves, min_moves, i);
118+
levelToPuzzleState(&levels[i], &puzzle_states[i]);
119+
}
120+
106121
CTowerClimb* env = allocate();
122+
env->num_maps = num_maps;
123+
env->all_levels = levels;
124+
env->all_puzzles = puzzle_states;
125+
107126
int seed = 0;
108127
srand(time(NULL));
109128
int random_level = 5 + (rand() % 4);
110129
init_random_level(env, random_level, 15, 10, seed);
111-
Client* client = make_client(env);
130+
c_reset(env);
131+
c_render(env);
132+
Client* client = env->client;
112133
client->enable_animations = 1;
113134
int tick = 0;
114135
while (!WindowShouldClose()) {
115-
int done = 0;
116136
if (tick % 6 == 0 && !client->isMoving) {
117137
tick = 0;
118138
int human_action = env->actions[0];
119139
forward(net, env->observations, env->actions);
120140
if (IsKeyDown(KEY_LEFT_SHIFT)) {
121141
env->actions[0] = human_action;
122142
}
123-
done = c_step(env);
143+
c_step(env);
124144
if (IsKeyDown(KEY_LEFT_SHIFT)) {
125145
env->actions[0] = NOOP;
126146
}
127-
if (done) {
128-
seed++;
129-
c_reset(env);
130-
srand(time(NULL));
131-
int random_level_next = 5 + (rand() % 4);
132-
init_random_level(env, random_level_next, 15, 10, seed);
133-
}
134147
}
135148
tick++;
136149
if (IsKeyDown(KEY_LEFT_SHIFT)) {
@@ -154,7 +167,7 @@ void demo() {
154167
env->actions[0] = DROP;
155168
}
156169
}
157-
c_render(client, env);
170+
c_render(env);
158171
}
159172
close_client(client);
160173
free_allocated(env);
@@ -169,13 +182,7 @@ void performance_test() {
169182
int i = 0;
170183
while (time(NULL) - start < test_time) {
171184
env->actions[0] = rand() % 5;
172-
int done = 0;
173-
done = c_step(env);
174-
if (done) {
175-
seed++;
176-
c_reset(env);
177-
init_random_level(env, 8, 25, 15, seed);
178-
}
185+
c_step(env);
179186
i++;
180187
}
181188
long end = time(NULL);

pufferlib/ocean/tower_climb/tower_climb.h

Lines changed: 52 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -142,60 +142,24 @@ struct Log {
142142
float score;
143143
float episode_return;
144144
float episode_length;
145+
float n;
145146
};
146147

147-
typedef struct LogBuffer LogBuffer;
148-
struct LogBuffer {
149-
Log* logs;
150-
int length;
151-
int idx;
152-
};
153-
154-
LogBuffer* allocate_logbuffer(int size) {
155-
LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer));
156-
logs->logs = (Log*)calloc(size, sizeof(Log));
157-
logs->length = size;
158-
logs->idx = 0;
159-
return logs;
160-
}
161-
162-
void free_logbuffer(LogBuffer* buffer) {
163-
free(buffer->logs);
164-
free(buffer);
165-
}
166-
167-
void add_log(LogBuffer* logs, Log* log) {
168-
if (logs->idx == logs->length) {
169-
return;
170-
}
171-
logs->logs[logs->idx] = *log;
172-
logs->idx += 1;
173-
}
174-
175-
Log aggregate_and_clear(LogBuffer* logs) {
176-
Log log = {0};
177-
if (logs->idx == 0) return log; // Avoid division by zero
178-
179-
for (int i = 0; i < logs->idx; i++) {
180-
log.episode_return += logs->logs[i].episode_return / logs->idx;
181-
log.episode_length += logs->logs[i].episode_length / logs->idx;
182-
log.score += logs->logs[i].score / logs->idx;
183-
log.perf += logs->logs[i].perf / logs->idx;
184-
}
185-
186-
logs->idx = 0;
187-
return log;
188-
}
189-
148+
typedef struct Client Client;
190149
typedef struct CTowerClimb CTowerClimb;
191150
struct CTowerClimb {
151+
Client* client;
192152
unsigned char* observations;
193153
int* actions;
194154
float* rewards;
195-
unsigned char* dones;
196-
LogBuffer* log_buffer;
155+
unsigned char* terminals;
156+
unsigned char* truncations;
197157
Log log;
158+
Log buffer;
198159
float score;
160+
int num_maps;
161+
Level* all_levels;
162+
PuzzleState* all_puzzles;
199163
Level* level;
200164
PuzzleState* state; // Contains blocks bitmask, position, orientation, etc.
201165
int rows_cleared;
@@ -205,6 +169,15 @@ struct CTowerClimb {
205169
float reward_move_block;
206170
};
207171

172+
void add_log(CTowerClimb* env) {
173+
env->log.perf += env->buffer.perf;
174+
env->log.score += env->buffer.score;
175+
env->log.episode_return += env->buffer.episode_return;
176+
env->log.episode_length += env->buffer.episode_length;
177+
env->log.n += 1.0;
178+
env->buffer = (Log){0};
179+
}
180+
208181
void levelToPuzzleState(Level* level, PuzzleState* state) {
209182
memset(state->blocks, 0, BLOCK_BYTES);
210183
for (int i = 0; i < level->total_length; i++) {
@@ -247,24 +220,22 @@ CTowerClimb* allocate() {
247220
env->observations = (unsigned char*)calloc(OBS_VISION+PLAYER_OBS, sizeof(unsigned char));
248221
env->actions = (int*)calloc(1, sizeof(int));
249222
env->rewards = (float*)calloc(1, sizeof(float));
250-
env->dones = (unsigned char*)calloc(1, sizeof(unsigned char));
251-
env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE);
223+
env->terminals = (unsigned char*)calloc(1, sizeof(unsigned char));
252224
return env;
253225
}
254226

255-
void free_initialized(CTowerClimb* env) {
227+
void c_close(CTowerClimb* env) {
256228
free_level(env->level);
257229
free_puzzle_state(env->state);
230+
free(env);
258231
}
259232

260233
void free_allocated(CTowerClimb* env) {
261234
free(env->actions);
262235
free(env->observations);
263-
free(env->dones);
236+
free(env->terminals);
264237
free(env->rewards);
265-
free_logbuffer(env->log_buffer);
266-
free_initialized(env);
267-
free(env);
238+
c_close(env);
268239
}
269240

270241
void calculate_window_bounds(int* bounds, int center_pos, int window_size, int max_size) {
@@ -337,23 +308,24 @@ void compute_observations(CTowerClimb* env) {
337308
}
338309

339310
void c_reset(CTowerClimb* env) {
340-
env->log = (Log){0};
341-
env->dones[0] = 0;
311+
env->terminals[0] = 0;
342312
env->rows_cleared = 0;
343313
memset(env->state->blocks, 0, BLOCK_BYTES * sizeof(unsigned char));
314+
int idx = rand() % env->num_maps;
315+
setPuzzle(env, &env->all_puzzles[idx], &env->all_levels[idx]);
344316
compute_observations(env);
345317
}
346318

347319
void illegal_move(CTowerClimb* env){
348320
env->rewards[0] = env->reward_illegal_move;
349-
env->log.episode_return += env->reward_illegal_move;
321+
env->buffer.episode_return += env->reward_illegal_move;
350322
}
351323

352324
void death(CTowerClimb* env){
353325
env->rewards[0] = -1;
354-
env->log.episode_return -= 1;
355-
env->log.perf = 0;
356-
add_log(env->log_buffer, &env->log);
326+
env->buffer.episode_return -= 1;
327+
env->buffer.perf = 0;
328+
add_log(env);
357329
}
358330

359331
int isGoal( PuzzleState* s, Level* lvl) {
@@ -382,8 +354,8 @@ int climb(PuzzleState* outState, int action, int mode, CTowerClimb* env, const L
382354
if(mode == RL_MODE && floor_cleared > env->rows_cleared){
383355
env->rows_cleared = floor_cleared;
384356
env->rewards[0] = env->reward_climb_row;
385-
env->log.episode_return += env->reward_climb_row;
386-
env->log.score = floor_cleared;
357+
env->buffer.episode_return += env->reward_climb_row;
358+
env->buffer.score = floor_cleared;
387359
}
388360
outState->robot_position = cell_next_above;
389361
outState->robot_state = 0;
@@ -398,7 +370,7 @@ int drop(PuzzleState* outState, int action, int mode, CTowerClimb* env, const Le
398370
int step_down = next_double_below_cell >= 0 && TEST_BIT(outState->blocks, next_double_below_cell);
399371
if(mode == RL_MODE){
400372
env->rewards[0] = env->reward_fall_row;
401-
env->log.episode_return += env->reward_fall_row;
373+
env->buffer.episode_return += env->reward_fall_row;
402374
}
403375
if (step_down){
404376
outState->robot_position = next_below_cell;
@@ -776,45 +748,44 @@ int applyAction(PuzzleState* outState, int action, Level* lvl, int mode, CTower
776748
}
777749
if (mode == RL_MODE && result == 1){
778750
env->rewards[0] = env->reward_move_block;
779-
env->log.episode_return += env->reward_move_block;
751+
env->buffer.episode_return += env->reward_move_block;
780752
}
781753
return result;
782754
}
783755
return 0;
784756
}
785757

786-
int c_step(CTowerClimb* env) {
787-
env->log.episode_length += 1.0;
758+
void c_step(CTowerClimb* env) {
759+
env->buffer.episode_length += 1.0;
788760
env->rewards[0] = 0.0;
789-
if(env->log.episode_length > 60){
761+
if(env->buffer.episode_length > 60){
790762
env->rewards[0] = 0;
791-
env->log.perf = 0;
792-
add_log(env->log_buffer, &env->log);
793-
return 1;
763+
env->buffer.perf = 0;
764+
add_log(env);
765+
c_reset(env);
794766
}
795767
// Create next state
796768
int move_result = applyAction(env->state, env->actions[0], env->level, RL_MODE, env);
797769
if (move_result == MOVE_ILLEGAL) {
798770
illegal_move(env);
799-
return 0;
771+
return;
800772
}
801773
if (move_result == MOVE_DEATH){
802774
death(env);
803-
return 1;
775+
c_reset(env);
804776
}
805777

806778
// Check for goal state
807779
if (isGoal(env->state, env->level)) {
808780
env->rewards[0] = 1.0;
809-
env->log.episode_return +=1.0;
810-
env->log.perf = 1.0;
811-
add_log(env->log_buffer, &env->log);
812-
return 1;
781+
env->buffer.episode_return +=1.0;
782+
env->buffer.perf = 1.0;
783+
add_log(env);
784+
c_reset(env);
813785
}
814786

815787
// Update observations
816788
compute_observations(env);
817-
return 0;
818789
}
819790

820791
typedef struct BFSNode {
@@ -1176,7 +1147,6 @@ typedef enum {
11761147
ANIM_SHIMMY_LEFT,
11771148
} AnimationState;
11781149

1179-
typedef struct Client Client;
11801150
struct Client {
11811151
float width;
11821152
float height;
@@ -1504,7 +1474,12 @@ static void render_scene(Client* client, CTowerClimb* env) {
15041474
EndDrawing();
15051475
}
15061476

1507-
void c_render(Client* client, CTowerClimb* env) {
1477+
void c_render(CTowerClimb* env) {
1478+
if (env->client == NULL) {
1479+
env->client = make_client(env);
1480+
}
1481+
Client* client = env->client;
1482+
15081483
if (IsKeyDown(KEY_ESCAPE)) exit(0);
15091484
// Handle state transitions - drop animation
15101485
if (env->state->robot_state == DEFAULT && client->animState == ANIM_HANGING && client->enable_animations) {

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
# easier to debug (you can run gdb --args python ...)
3636
cython_extension_paths = [
3737
'pufferlib/ocean/moba/cy_moba',
38-
'pufferlib/ocean/tower_climb/cy_tower_climb',
3938
]
4039

4140
# Build raylib for your platform

0 commit comments

Comments
 (0)