@@ -142,60 +142,24 @@ struct Log {
142142 float score ;
143143 float episode_return ;
144144 float episode_length ;
145+ float n ;
145146};
146147
147- typedef struct LogBuffer LogBuffer ;
148- struct LogBuffer {
149- Log * logs ;
150- int length ;
151- int idx ;
152- };
153-
154- LogBuffer * allocate_logbuffer (int size ) {
155- LogBuffer * logs = (LogBuffer * )calloc (1 , sizeof (LogBuffer ));
156- logs -> logs = (Log * )calloc (size , sizeof (Log ));
157- logs -> length = size ;
158- logs -> idx = 0 ;
159- return logs ;
160- }
161-
162- void free_logbuffer (LogBuffer * buffer ) {
163- free (buffer -> logs );
164- free (buffer );
165- }
166-
167- void add_log (LogBuffer * logs , Log * log ) {
168- if (logs -> idx == logs -> length ) {
169- return ;
170- }
171- logs -> logs [logs -> idx ] = * log ;
172- logs -> idx += 1 ;
173- }
174-
175- Log aggregate_and_clear (LogBuffer * logs ) {
176- Log log = {0 };
177- if (logs -> idx == 0 ) return log ; // Avoid division by zero
178-
179- for (int i = 0 ; i < logs -> idx ; i ++ ) {
180- log .episode_return += logs -> logs [i ].episode_return / logs -> idx ;
181- log .episode_length += logs -> logs [i ].episode_length / logs -> idx ;
182- log .score += logs -> logs [i ].score / logs -> idx ;
183- log .perf += logs -> logs [i ].perf / logs -> idx ;
184- }
185-
186- logs -> idx = 0 ;
187- return log ;
188- }
189-
148+ typedef struct Client Client ;
190149typedef struct CTowerClimb CTowerClimb ;
191150struct CTowerClimb {
151+ Client * client ;
192152 unsigned char * observations ;
193153 int * actions ;
194154 float * rewards ;
195- unsigned char * dones ;
196- LogBuffer * log_buffer ;
155+ unsigned char * terminals ;
156+ unsigned char * truncations ;
197157 Log log ;
158+ Log buffer ;
198159 float score ;
160+ int num_maps ;
161+ Level * all_levels ;
162+ PuzzleState * all_puzzles ;
199163 Level * level ;
200164 PuzzleState * state ; // Contains blocks bitmask, position, orientation, etc.
201165 int rows_cleared ;
@@ -205,6 +169,15 @@ struct CTowerClimb {
205169 float reward_move_block ;
206170};
207171
172+ void add_log (CTowerClimb * env ) {
173+ env -> log .perf += env -> buffer .perf ;
174+ env -> log .score += env -> buffer .score ;
175+ env -> log .episode_return += env -> buffer .episode_return ;
176+ env -> log .episode_length += env -> buffer .episode_length ;
177+ env -> log .n += 1.0 ;
178+ env -> buffer = (Log ){0 };
179+ }
180+
208181void levelToPuzzleState (Level * level , PuzzleState * state ) {
209182 memset (state -> blocks , 0 , BLOCK_BYTES );
210183 for (int i = 0 ; i < level -> total_length ; i ++ ) {
@@ -247,24 +220,22 @@ CTowerClimb* allocate() {
247220 env -> observations = (unsigned char * )calloc (OBS_VISION + PLAYER_OBS , sizeof (unsigned char ));
248221 env -> actions = (int * )calloc (1 , sizeof (int ));
249222 env -> rewards = (float * )calloc (1 , sizeof (float ));
250- env -> dones = (unsigned char * )calloc (1 , sizeof (unsigned char ));
251- env -> log_buffer = allocate_logbuffer (LOG_BUFFER_SIZE );
223+ env -> terminals = (unsigned char * )calloc (1 , sizeof (unsigned char ));
252224 return env ;
253225}
254226
255- void free_initialized (CTowerClimb * env ) {
227+ void c_close (CTowerClimb * env ) {
256228 free_level (env -> level );
257229 free_puzzle_state (env -> state );
230+ free (env );
258231}
259232
260233void free_allocated (CTowerClimb * env ) {
261234 free (env -> actions );
262235 free (env -> observations );
263- free (env -> dones );
236+ free (env -> terminals );
264237 free (env -> rewards );
265- free_logbuffer (env -> log_buffer );
266- free_initialized (env );
267- free (env );
238+ c_close (env );
268239}
269240
270241void calculate_window_bounds (int * bounds , int center_pos , int window_size , int max_size ) {
@@ -337,23 +308,24 @@ void compute_observations(CTowerClimb* env) {
337308}
338309
339310void c_reset (CTowerClimb * env ) {
340- env -> log = (Log ){0 };
341- env -> dones [0 ] = 0 ;
311+ env -> terminals [0 ] = 0 ;
342312 env -> rows_cleared = 0 ;
343313 memset (env -> state -> blocks , 0 , BLOCK_BYTES * sizeof (unsigned char ));
314+ int idx = rand () % env -> num_maps ;
315+ setPuzzle (env , & env -> all_puzzles [idx ], & env -> all_levels [idx ]);
344316 compute_observations (env );
345317}
346318
347319void illegal_move (CTowerClimb * env ){
348320 env -> rewards [0 ] = env -> reward_illegal_move ;
349- env -> log .episode_return += env -> reward_illegal_move ;
321+ env -> buffer .episode_return += env -> reward_illegal_move ;
350322}
351323
352324void death (CTowerClimb * env ){
353325 env -> rewards [0 ] = -1 ;
354- env -> log .episode_return -= 1 ;
355- env -> log .perf = 0 ;
356- add_log (env -> log_buffer , & env -> log );
326+ env -> buffer .episode_return -= 1 ;
327+ env -> buffer .perf = 0 ;
328+ add_log (env );
357329}
358330
359331int isGoal ( PuzzleState * s , Level * lvl ) {
@@ -382,8 +354,8 @@ int climb(PuzzleState* outState, int action, int mode, CTowerClimb* env, const L
382354 if (mode == RL_MODE && floor_cleared > env -> rows_cleared ){
383355 env -> rows_cleared = floor_cleared ;
384356 env -> rewards [0 ] = env -> reward_climb_row ;
385- env -> log .episode_return += env -> reward_climb_row ;
386- env -> log .score = floor_cleared ;
357+ env -> buffer .episode_return += env -> reward_climb_row ;
358+ env -> buffer .score = floor_cleared ;
387359 }
388360 outState -> robot_position = cell_next_above ;
389361 outState -> robot_state = 0 ;
@@ -398,7 +370,7 @@ int drop(PuzzleState* outState, int action, int mode, CTowerClimb* env, const Le
398370 int step_down = next_double_below_cell >= 0 && TEST_BIT (outState -> blocks , next_double_below_cell );
399371 if (mode == RL_MODE ){
400372 env -> rewards [0 ] = env -> reward_fall_row ;
401- env -> log .episode_return += env -> reward_fall_row ;
373+ env -> buffer .episode_return += env -> reward_fall_row ;
402374 }
403375 if (step_down ){
404376 outState -> robot_position = next_below_cell ;
@@ -776,45 +748,44 @@ int applyAction(PuzzleState* outState, int action, Level* lvl, int mode, CTower
776748 }
777749 if (mode == RL_MODE && result == 1 ){
778750 env -> rewards [0 ] = env -> reward_move_block ;
779- env -> log .episode_return += env -> reward_move_block ;
751+ env -> buffer .episode_return += env -> reward_move_block ;
780752 }
781753 return result ;
782754 }
783755 return 0 ;
784756}
785757
786- int c_step (CTowerClimb * env ) {
787- env -> log .episode_length += 1.0 ;
758+ void c_step (CTowerClimb * env ) {
759+ env -> buffer .episode_length += 1.0 ;
788760 env -> rewards [0 ] = 0.0 ;
789- if (env -> log .episode_length > 60 ){
761+ if (env -> buffer .episode_length > 60 ){
790762 env -> rewards [0 ] = 0 ;
791- env -> log .perf = 0 ;
792- add_log (env -> log_buffer , & env -> log );
793- return 1 ;
763+ env -> buffer .perf = 0 ;
764+ add_log (env );
765+ c_reset ( env ) ;
794766 }
795767 // Create next state
796768 int move_result = applyAction (env -> state , env -> actions [0 ], env -> level , RL_MODE , env );
797769 if (move_result == MOVE_ILLEGAL ) {
798770 illegal_move (env );
799- return 0 ;
771+ return ;
800772 }
801773 if (move_result == MOVE_DEATH ){
802774 death (env );
803- return 1 ;
775+ c_reset ( env ) ;
804776 }
805777
806778 // Check for goal state
807779 if (isGoal (env -> state , env -> level )) {
808780 env -> rewards [0 ] = 1.0 ;
809- env -> log .episode_return += 1.0 ;
810- env -> log .perf = 1.0 ;
811- add_log (env -> log_buffer , & env -> log );
812- return 1 ;
781+ env -> buffer .episode_return += 1.0 ;
782+ env -> buffer .perf = 1.0 ;
783+ add_log (env );
784+ c_reset ( env ) ;
813785 }
814786
815787 // Update observations
816788 compute_observations (env );
817- return 0 ;
818789}
819790
820791typedef struct BFSNode {
@@ -1176,7 +1147,6 @@ typedef enum {
11761147 ANIM_SHIMMY_LEFT ,
11771148} AnimationState ;
11781149
1179- typedef struct Client Client ;
11801150struct Client {
11811151 float width ;
11821152 float height ;
@@ -1504,7 +1474,12 @@ static void render_scene(Client* client, CTowerClimb* env) {
15041474 EndDrawing ();
15051475}
15061476
1507- void c_render (Client * client , CTowerClimb * env ) {
1477+ void c_render (CTowerClimb * env ) {
1478+ if (env -> client == NULL ) {
1479+ env -> client = make_client (env );
1480+ }
1481+ Client * client = env -> client ;
1482+
15081483 if (IsKeyDown (KEY_ESCAPE )) exit (0 );
15091484 // Handle state transitions - drop animation
15101485 if (env -> state -> robot_state == DEFAULT && client -> animState == ANIM_HANGING && client -> enable_animations ) {
0 commit comments