diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 24217703a5..cb20aaa398 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -667,7 +667,7 @@ static PyModuleDef module = { methods }; -PyMODINIT_FUNC PyInit_binding(void) { \ - import_array(); \ - return PyModule_Create(&module); \ +PyMODINIT_FUNC PyInit_binding(void) { + import_array(); + return PyModule_Create(&module); } diff --git a/pufferlib/ocean/tripletriad/binding.c b/pufferlib/ocean/tripletriad/binding.c new file mode 100644 index 0000000000..4c725d3130 --- /dev/null +++ b/pufferlib/ocean/tripletriad/binding.c @@ -0,0 +1,22 @@ +#include "tripletriad.h" + +#define Env CTripleTriad +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->width = unpack(kwargs, "width"); + env->height = unpack(kwargs, "height"); + env->card_width = unpack(kwargs, "card_width"); + env->card_height = unpack(kwargs, "card_height"); + init_ctripletriad(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "n", log->n); + return 0; +} diff --git a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx b/pufferlib/ocean/tripletriad/cy_tripletriad.pyx deleted file mode 100644 index 2ee110e861..0000000000 --- a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx +++ /dev/null @@ -1,112 +0,0 @@ -from libc.stdlib cimport calloc, free - -cdef extern from "tripletriad.h": - int LOG_BUFFER_SIZE - - ctypedef struct Log: - float perf; - float score; - float episode_return; - float episode_length; - - ctypedef struct LogBuffer - LogBuffer* allocate_logbuffer(int) - void free_logbuffer(LogBuffer*) - Log aggregate_and_clear(LogBuffer*) - - ctypedef struct CTripleTriad: - float* observations - int* actions - float* rewards - unsigned char* dones - LogBuffer* log_buffer - Log log - int card_width; - int card_height; - float* board_x; - float* board_y; - int** board_states; - int width; - int height; - int game_over; - int num_cards; - int*** cards_in_hand; - int* card_selected; - int** card_locations; - int* action_masks; - int*** board_card_values; - int* score; - - ctypedef struct Client - - CTripleTriad* init_ctripletriad(CTripleTriad* env) - void free_ctripletriad(CTripleTriad* env) - - Client* make_client(float width, float height) - void close_client(Client* client) - void c_render(Client* client, CTripleTriad* env) - void c_reset(CTripleTriad* env) - void c_step(CTripleTriad* env) - -cdef class CyTripleTriad: - cdef: - CTripleTriad* envs - Client* client - LogBuffer* logs - int num_envs - - def __init__(self, float[:, :] observations, int[:] actions, - float[:] rewards, unsigned char[:] terminals, int num_envs, - int width, int height, int card_width, int card_height): - - self.num_envs = num_envs - self.client = NULL - self.envs = calloc(num_envs, sizeof(CTripleTriad)) - self.logs = allocate_logbuffer(LOG_BUFFER_SIZE) - - cdef int i - for i in range(num_envs): - self.envs[i] = CTripleTriad( - observations=&observations[i, 0], - actions=&actions[i], - rewards=&rewards[i], - dones=&terminals[i], - log_buffer=self.logs, - width=width, - height=height, - card_width=card_width, - card_height=card_height, - num_cards=10, - ) - init_ctripletriad(&self.envs[i]) - - def reset(self): - cdef int i - for i in range(self.num_envs): - c_reset(&self.envs[i]) - - def step(self): - cdef int i - for i in range(self.num_envs): - c_step(&self.envs[i]) - - def render(self): - cdef CTripleTriad* env = &self.envs[0] - if self.client == NULL: - self.client = make_client(env.width, env.height) - - c_render(self.client, env) - - def close(self): - if self.client != NULL: - close_client(self.client) - self.client = NULL - - # Todo: clean - for i in range(self.num_envs): - free_ctripletriad(&self.envs[i]) - free(self.envs) - - def log(self): - cdef Log log = aggregate_and_clear(self.logs) - return log diff --git a/pufferlib/ocean/tripletriad/tripletriad.c b/pufferlib/ocean/tripletriad/tripletriad.c index 6d6b5a2a20..de2aedc1e4 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.c +++ b/pufferlib/ocean/tripletriad/tripletriad.c @@ -1,7 +1,10 @@ #include "tripletriad.h" #include "puffernet.h" +#include -int main() { +#define NOOP -1 + +void interactive() { Weights* weights = load_weights("resources/tripletriad_weights.bin", 148880); LinearLSTM* net = make_linearlstm(weights, 1, 114, 15); @@ -15,12 +18,12 @@ int main() { }; allocate_ctripletriad(&env); c_reset(&env); - Client* client = make_client(env.width, env.height); + env.client = make_client(env.width, env.height); int tick = 0; int action; while (!WindowShouldClose()) { - action = -1; + action = NOOP; // User can take control of the player if (IsKeyDown(KEY_LEFT_SHIFT)) { @@ -61,15 +64,45 @@ int main() { } tick = (tick + 1) % 45; + if (env.actions[0] != NOOP) { c_step(&env); } - c_render(client, &env); + c_render(&env); } free_linearlstm(net); free(weights); - close_client(client); + close_client(env.client); + free_allocated_ctripletriad(&env); +} + +void performance_test() { + long test_time = 10; + CTripleTriad env = { + .width = 990, + .height = 690, + .card_width = 576 / 3, + .card_height = 672 / 3, + .game_over = 0, + .num_cards = 10, + }; + allocate_ctripletriad(&env); + c_reset(&env); + + long start = time(NULL); + int i = 0; + while (time(NULL) - start < test_time) { + c_step(&env); + i++; + } + long end = time(NULL); + printf("SPS: %ld\n", i / (end - start)); free_allocated_ctripletriad(&env); +} + +int main() { + // performance_test(); + interactive(); return 0; } diff --git a/pufferlib/ocean/tripletriad/tripletriad.h b/pufferlib/ocean/tripletriad/tripletriad.h index 300104aece..e4db2e753c 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.h +++ b/pufferlib/ocean/tripletriad/tripletriad.h @@ -27,71 +27,22 @@ const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; // how to start game compile - LD_LIBRARY_PATH=raylib-5.0_linux_amd64/lib ./tripletriadgame -#define LOG_BUFFER_SIZE 1024 - typedef struct Log Log; struct Log { float perf; float score; float episode_return; float episode_length; + float n; }; -typedef struct LogBuffer LogBuffer; -struct LogBuffer { - Log* logs; - int length; - int idx; -}; - -LogBuffer* allocate_logbuffer(int size) { - LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer)); - logs->logs = (Log*)calloc(size, sizeof(Log)); - logs->length = size; - logs->idx = 0; - return logs; -} - -void free_logbuffer(LogBuffer* buffer) { - free(buffer->logs); - free(buffer); -} - -void add_log(LogBuffer* logs, Log* log) { - if (logs->idx == logs->length) { - return; - } - logs->logs[logs->idx] = *log; - logs->idx += 1; - //printf("Log: %f, %f, %f\n", log->episode_return, log->episode_length, log->score); -} - -Log aggregate_and_clear(LogBuffer* logs) { - Log log = {0}; - if (logs->idx == 0) { - return log; - } - for (int i = 0; i < logs->idx; i++) { - log.episode_return += logs->logs[i].episode_return; - log.episode_length += logs->logs[i].episode_length; - log.perf += logs->logs[i].perf; - log.score += logs->logs[i].score; - } - log.episode_return /= logs->idx; - log.episode_length /= logs->idx; - log.perf /= logs->idx; - log.score /= logs->idx; - logs->idx = 0; - return log; -} - +typedef struct Client Client; typedef struct CTripleTriad CTripleTriad; struct CTripleTriad { float* observations; int* actions; float* rewards; - unsigned char* dones; - LogBuffer* log_buffer; + unsigned char* terminals; Log log; int card_width; int card_height; @@ -108,8 +59,21 @@ struct CTripleTriad { int* action_masks; int*** board_card_values; int* score; + int tick; + float perf; + float episode_return; + float episode_length; + Client* client; }; +void add_log(CTripleTriad* env) { + env->log.perf += env->perf; + env->log.score += env->score[0]; + env->log.episode_return += env->episode_return; + env->log.episode_length += env->episode_length; + env->log.n += 1; +} + void generate_board_positions(CTripleTriad* env) { for (int row = 0; row < 3; row++) { for (int col = 0; col < 3; col++) { @@ -209,9 +173,8 @@ void init_ctripletriad(CTripleTriad* env) { void allocate_ctripletriad(CTripleTriad* env) { env->actions = (int*)calloc(1, sizeof(int)); env->observations = (float*)calloc(env->width*env->height, sizeof(float)); - env->dones = (unsigned char*)calloc(1, sizeof(unsigned char)); + env->terminals = (unsigned char*)calloc(1, sizeof(unsigned char)); env->rewards = (float*)calloc(1, sizeof(float)); - env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE); init_ctripletriad(env); } @@ -246,9 +209,8 @@ void free_ctripletriad(CTripleTriad* env) { void free_allocated_ctripletriad(CTripleTriad* env) { free(env->actions); free(env->observations); - free(env->dones); + free(env->terminals); free(env->rewards); - free_logbuffer(env->log_buffer); free_ctripletriad(env); } @@ -298,7 +260,6 @@ void compute_observations(CTripleTriad* env) { } void c_reset(CTripleTriad* env) { - env->log = (Log){0}; env->game_over = 0; for(int i=0; i< 2; i++) { for(int j=0; j< 5; j++) { @@ -333,8 +294,11 @@ void c_reset(CTripleTriad* env) { for(int i=0; i< 2; i++) { env->score[i] = 5; } - env->dones[0] = 0; + env->terminals[0] = 0; compute_observations(env); + env->tick = 0; + env->episode_length = 0; + env->episode_return = 0; } void select_card(CTripleTriad* env, int card_selected, int player) { @@ -365,8 +329,8 @@ void update_action_masks(CTripleTriad* env) { for (int i = 0; i < 2; i++) { for (int j = 0; j < 5; j++) { if (env->card_locations[i][j] != 0) { - int action_idx = env->card_locations[i][j] + 5; - if (action_idx >= 6 && action_idx < 15) { + int action_idx = env->card_locations[i][j] + 4; + if (action_idx >= 5 && action_idx < 14) { env->action_masks[action_idx] = 1; // Mark as unavailable } } @@ -386,14 +350,14 @@ void check_win_condition(CTripleTriad* env, int player) { if (count == 9) { // add a draw condition and winner value is 0 if (env->score[0] == env->score[1]) { - env->dones[0] = 1; + env->terminals[0] = 1; env->rewards[0] = 0.0; env->game_over = 1; } else { int winner = env->score[0] > env->score[1] ? 1 : -1; - env->dones[0] = 1; + env->terminals[0] = 1; env->rewards[0] = winner; // 1 for player win, -1 for opponent win - env->log.episode_return += winner; + env->episode_return += winner; env->game_over = 1; } } @@ -405,9 +369,9 @@ int get_bot_card_placement(CTripleTriad* env) { int num_valid_placements = 0; // Find valid placements - for (int i = 6; i < 15; i++) { + for (int i = 5; i < 14; i++) { if (env->action_masks[i] == 0) { - valid_placements[num_valid_placements++] = i - 5; + valid_placements[num_valid_placements++] = i - 4; if (num_valid_placements == 9) break; // Safety check } } @@ -494,34 +458,33 @@ void check_card_conversions(CTripleTriad* env, int card_placement, int player) { } void c_step(CTripleTriad* env) { - env->log.episode_length += 1; + env->episode_length += 1; env->rewards[0] = 0.0; int action = env->actions[0]; - if (env->log.episode_length >= MAX_EPISODE_LENGTH) { + if (env->episode_length >= MAX_EPISODE_LENGTH) { env->game_over = 1; - env->log.episode_return -= 1.0; + env->episode_return -= 1.0; env->rewards[0] -= 1.0; } // reset the game if game over if (env->game_over == 1) { - env->log.score = env->score[0]; - env->log.perf = (env->score[0] > env->score[1]) ? 1.0 : 0.0; - add_log(env->log_buffer, &env->log); - //printf("Log: %f, %f, %f\n", env->log.episode_return, env->log.episode_length, env->log.score); + env->perf = (env->score[0] > env->score[1]) ? 1.0 : 0.0; + add_log(env); c_reset(env); return; } // select a card if the card is in the range of 1-5 and the card is not placed if (action >= SELECT_CARD_1 && action <= SELECT_CARD_5 ) { // Prevent model from just swapping between selected cards to avoid playing - env->log.episode_return -= 0.1; + env->episode_return -= 0.1; env->rewards[0] -= 0.1; int card_selected = action + 1; + if(env->card_locations[0][card_selected-1] == 0) { - select_card(env,card_selected, 1); + select_card(env, card_selected, 1); } } // place a card if the card is in the range of 1-9 and the card is selected @@ -537,16 +500,16 @@ void c_step(CTripleTriad* env) { env->card_selected[0] = -1; card_placed = true; } else { - env->log.episode_return -= 0.1; + env->episode_return -= 0.1; env->rewards[0] -= 0.1; } } else { - env->log.episode_return -= 0.1; + env->episode_return -= 0.1; env->rewards[0] -= 0.1; } // opponent turn - if (env->dones[0] == 0 && card_placed == true ) { + if (env->terminals[0] == 0 && card_placed == true ) { int bot_card_selected = get_bot_card_selection(env); if(bot_card_selected > 0) { select_card(env,bot_card_selected, -1); @@ -560,7 +523,7 @@ void c_step(CTripleTriad* env) { } } - if (env->dones[0] == 1) { + if (env->terminals[0] == 1) { env->game_over=1; } compute_observations(env); @@ -583,11 +546,15 @@ Client* make_client(int width, int height) { return client; } -void c_render(Client* client, CTripleTriad* env) { +void c_render(CTripleTriad* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } + if (env->client == NULL) { + env->client = make_client(env->width, env->height); + } + BeginDrawing(); ClearBackground(PUFF_BACKGROUND); diff --git a/pufferlib/ocean/tripletriad/tripletriad.py b/pufferlib/ocean/tripletriad/tripletriad.py index 42a8befdea..fce49c4ca5 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.py +++ b/pufferlib/ocean/tripletriad/tripletriad.py @@ -2,11 +2,11 @@ import gymnasium import pufferlib -from pufferlib.ocean.tripletriad.cy_tripletriad import CyTripleTriad +from pufferlib.ocean.tripletriad import binding class TripleTriad(pufferlib.PufferEnv): def __init__(self, num_envs=1, render_mode=None, report_interval=1, - width=990, height=690, piece_width=192, piece_height=224, buf=None, seed=0): + width=990, height=690, card_width=192, card_height=224, buf=None, seed=0): self.single_observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(114,), dtype=np.float32) self.single_action_space = gymnasium.spaces.Discrete(14) @@ -15,41 +15,42 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1, self.num_agents = num_envs super().__init__(buf=buf) - self.c_envs = CyTripleTriad(self.observations, self.actions, - self.rewards, self.terminals, num_envs, width, height, - piece_width, piece_height) + self.c_envs = binding.vec_init(self.observations, self.actions, + self.rewards, self.terminals, self.truncations, num_envs, seed, width=width, height=height, + card_width=card_width, card_height=card_height) def reset(self, seed=None): - self.c_envs.reset() self.tick = 0 + if seed is None: + binding.vec_reset(self.c_envs, 0) + else: + binding.vec_reset(self.c_envs, seed) return self.observations, [] def step(self, actions): self.actions[:] = actions - self.c_envs.step() + binding.vec_step(self.c_envs) self.tick += 1 info = [] if self.tick % self.report_interval == 0: - log = self.c_envs.log() - if log['episode_length'] > 0: - info.append(log) + info.append(binding.vec_log(self.c_envs)) return (self.observations, self.rewards, self.terminals, self.truncations, info) def render(self): - self.c_envs.render() + binding.vec_render(self.c_envs, 0) def close(self): - self.c_envs.close() + binding.vec_close(self.c_envs) def test_performance(timeout=10, atn_cache=1024): env = TripleTriad(num_envs=1000) env.reset() tick = 0 - actions = np.random.randint(0, 2, (atn_cache, env.num_envs)) + actions = np.random.randint(0, 2, (atn_cache, env.num_agents)) import time start = time.time() @@ -58,7 +59,7 @@ def test_performance(timeout=10, atn_cache=1024): env.step(atn) tick += 1 - print(f'SPS: %f', env.num_envs * tick / (time.time() - start)) + print(f'SPS: {env.num_agents * tick / (time.time() - start):,}') if __name__ == '__main__': test_performance() diff --git a/setup.py b/setup.py index be69614034..bda69766f1 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,6 @@ cython_extension_paths = [ 'pufferlib/ocean/moba/cy_moba', 'pufferlib/ocean/snake/cy_snake', - 'pufferlib/ocean/tripletriad/cy_tripletriad', 'pufferlib/ocean/rware/cy_rware', 'pufferlib/ocean/trash_pickup/cy_trash_pickup', 'pufferlib/ocean/cpr/cy_cpr',