From f169b5624f0ef5db5ffc4bacd957ac86c31f9092 Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Sun, 27 Apr 2025 04:47:12 +0000 Subject: [PATCH 1/8] snake newbind refactor local --- pufferlib/ocean/env_binding.h | 84 +++++++++++++------ pufferlib/ocean/snake/binding.c | 74 +++++++++++++++++ pufferlib/ocean/snake/cy_snake.pyx | 128 ----------------------------- pufferlib/ocean/snake/snake.c | 11 +-- pufferlib/ocean/snake/snake.h | 121 ++++++++++++--------------- pufferlib/ocean/snake/snake.py | 34 ++++---- setup.py | 4 +- 7 files changed, 212 insertions(+), 244 deletions(-) create mode 100644 pufferlib/ocean/snake/binding.c delete mode 100644 pufferlib/ocean/snake/cy_snake.pyx diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 2230c53692..a067c8b9d9 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -215,6 +215,52 @@ static VecEnv* unpack_vecenv(PyObject* args) { return vec; } +static double unpack_with_index(PyObject* kwargs, char* key, int env_index) { + PyObject* val = PyDict_GetItemString(kwargs, key); + if (val == NULL) { + // If the key doesn't exist, don't set an error - this allows optional parameters + // Just return a default value that the caller can check for + return 0.0; + } + + // If val is a list, extract the element at env_index + if (PyList_Check(val)) { + if (env_index >= 0 && env_index < PyList_Size(val)) { + val = PyList_GetItem(val, env_index); + } else { + // If index is out of bounds, use the first element + val = PyList_GetItem(val, 0); + } + } + + if (PyLong_Check(val)) { + long out = PyLong_AsLong(val); + if (out > INT_MAX || out < INT_MIN) { + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Value %ld of integer argument %s is out of range", out, key); + PyErr_SetString(PyExc_TypeError, error_msg); + return 1; + } + // Cast on return. Safe because double can represent all 32-bit ints exactly + return out; + } + if (PyFloat_Check(val)) { + return PyFloat_AsDouble(val); + } + if (PyBool_Check(val)) { + return PyObject_IsTrue(val); + } + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Failed to unpack keyword %s as int", key); + PyErr_SetString(PyExc_TypeError, error_msg); + return 1; +} + +// Original unpack function for backward compatibility +static double unpack(PyObject* kwargs, char* key) { + return unpack_with_index(kwargs, key, 0); +} + static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) { if (PyTuple_Size(args) != 7) { PyErr_SetString(PyExc_TypeError, "vec_init requires 6 arguments"); @@ -328,6 +374,7 @@ static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) { Py_INCREF(kwargs); // We need to increment the reference since we'll be modifying it } + // Add an env_index to kwargs for use by my_init for (int i = 0; i < num_envs; i++) { Env* env = (Env*)calloc(1, sizeof(Env)); if (!env) { @@ -360,6 +407,16 @@ static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) { } Py_DECREF(py_seed); + // Add the environment index to kwargs + PyObject* py_env_index = PyLong_FromLong(i); + if (PyDict_SetItemString(kwargs, "env_index", py_env_index) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to set env_index in kwargs"); + Py_DECREF(py_env_index); + Py_DECREF(kwargs); + return NULL; + } + Py_DECREF(py_env_index); + PyObject* empty_args = PyTuple_New(0); if (my_init(env, empty_args, kwargs)) { PyErr_SetString(PyExc_TypeError, "env_init failed"); @@ -527,33 +584,6 @@ static PyObject* vec_close(PyObject* self, PyObject* args) { Py_RETURN_NONE; } -static double unpack(PyObject* kwargs, char* key) { - PyObject* val = PyDict_GetItemString(kwargs, key); - if (val == NULL) { - // If the key doesn't exist, don't set an error - this allows optional parameters - // Just return a default value that the caller can check for - return 0.0; - } - if (PyLong_Check(val)) { - long out = PyLong_AsLong(val); - if (out > INT_MAX || out < INT_MIN) { - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Value %ld of integer argument %s is out of range", out, key); - PyErr_SetString(PyExc_TypeError, error_msg); - return 1; - } - // Cast on return. Safe because double can represent all 32-bit ints exactly - return out; - } - if (PyFloat_Check(val)) { - return PyFloat_AsDouble(val); - } - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Failed to unpack keyword %s as int", key); - PyErr_SetString(PyExc_TypeError, error_msg); - return 1; -} - // Method table static PyMethodDef methods[] = { {"env_init", (PyCFunction)env_init, METH_VARARGS | METH_KEYWORDS, "Init environment with observation, action, reward, terminal, truncation arrays"}, diff --git a/pufferlib/ocean/snake/binding.c b/pufferlib/ocean/snake/binding.c new file mode 100644 index 0000000000..ccb7d5c61b --- /dev/null +++ b/pufferlib/ocean/snake/binding.c @@ -0,0 +1,74 @@ +#include "snake.h" + +#define Env CSnake +#include "../env_binding.h" + +// Helper function to extract an int from a Python object, handling lists +static int extract_int(PyObject* kwargs, const char* key, int default_value) { + PyObject* obj = PyDict_GetItemString(kwargs, key); + if (obj != NULL) { + if (PyList_Check(obj)) { + obj = PyList_GetItem(obj, 0); + } + return PyLong_AsLong(obj); + } + return default_value; +} + +// Helper function to extract a float from a Python object, handling lists +static float extract_float(PyObject* kwargs, const char* key, float default_value) { + PyObject* obj = PyDict_GetItemString(kwargs, key); + if (obj != NULL) { + if (PyList_Check(obj)) { + obj = PyList_GetItem(obj, 0); + } + return PyFloat_AsDouble(obj); + } + return default_value; +} + +// Helper function to extract a bool from a Python object, handling lists +static int extract_bool(PyObject* kwargs, const char* key, int default_value) { + PyObject* obj = PyDict_GetItemString(kwargs, key); + if (obj != NULL) { + if (PyList_Check(obj)) { + obj = PyList_GetItem(obj, 0); + } + return PyObject_IsTrue(obj); + } + return default_value; +} + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + // Get the environment index from kwargs + int env_index = 0; + PyObject* env_index_obj = PyDict_GetItemString(kwargs, "env_index"); + if (env_index_obj != NULL) { + env_index = PyLong_AsLong(env_index_obj); + } + + // Use unpack_with_index to properly handle lists + env->width = unpack_with_index(kwargs, "width", env_index); + env->height = unpack_with_index(kwargs, "height", env_index); + env->num_snakes = unpack_with_index(kwargs, "num_snakes", env_index); + env->vision = unpack_with_index(kwargs, "vision", env_index); + env->leave_corpse_on_death = unpack_with_index(kwargs, "leave_corpse_on_death", env_index); + env->food = unpack_with_index(kwargs, "num_food", env_index); + env->reward_food = unpack_with_index(kwargs, "reward_food", env_index); + env->reward_corpse = unpack_with_index(kwargs, "reward_corpse", env_index); + env->reward_death = unpack_with_index(kwargs, "reward_death", env_index); + env->max_snake_length = unpack_with_index(kwargs, "max_snake_length", env_index); + env->cell_size = unpack_with_index(kwargs, "cell_size", env_index); + + init_csnake(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "n", log->n); + return 0; +} diff --git a/pufferlib/ocean/snake/cy_snake.pyx b/pufferlib/ocean/snake/cy_snake.pyx deleted file mode 100644 index b0d880a02b..0000000000 --- a/pufferlib/ocean/snake/cy_snake.pyx +++ /dev/null @@ -1,128 +0,0 @@ -cimport numpy as cnp -from libc.stdlib cimport calloc, free - -cdef extern from "snake.h": - cdef: - int LOG_BUFFER_SIZE - - ctypedef struct Log: - float perf - float score - float episode_return - float episode_length - - ctypedef struct LogBuffer - LogBuffer* allocate_logbuffer(int) - void free_logbuffer(LogBuffer*) - Log aggregate_and_clear(LogBuffer*) - - ctypedef struct CSnake: - char* observations - int* actions - float* rewards - unsigned char* terminals - LogBuffer* log_buffer - Log* logs - char* grid - int* snake - int* snake_lengths - int* snake_ptr - int* snake_lifetimes - int* snake_colors - int num_snakes - int width - int height - int max_snake_length - int food - int vision - int window - int obs_size - unsigned char leave_corpse_on_death - float reward_food - float reward_corpse - float reward_death - - ctypedef struct CSnake - void init_csnake(CSnake* env) - void step_all(CSnake* env) - void compute_observations(CSnake* env) - void spawn_snake(CSnake* env, int snake_id) - void spawn_food(CSnake* env) - void c_reset(CSnake* env) - void step_snake(CSnake* env, int i) - void c_step(CSnake* env) - ctypedef struct Client - Client* make_client(int cell_size, int width, int height) - void c_render(Client* client, CSnake* env) - void close_client(Client* client) - -cdef class CySnake: - cdef: - CSnake *envs - Client* client - LogBuffer* logs - int num_envs - - def __init__(self, char[:, :, :] observations, int[:] actions, - float[:] rewards, unsigned char[:] terminals, - list widths, list heights, list num_snakes, - list num_food, int vision, int max_snake_length, - bint leave_corpse_on_death, float reward_food, - float reward_corpse, float reward_death): - - self.num_envs = len(num_snakes) - self.envs = calloc(self.num_envs, sizeof(CSnake)) - self.logs = allocate_logbuffer(LOG_BUFFER_SIZE) - self.client = NULL - - cdef int i - cdef int n = 0 - for i in range(self.num_envs): - self.envs[i] = CSnake( - observations=&observations[n, 0, 0], - actions=&actions[n], - rewards=&rewards[n], - terminals=&terminals[n], - log_buffer=self.logs, - width=widths[i], - height=heights[i], - num_snakes=num_snakes[i], - food=num_food[i], - vision=vision, - max_snake_length=max_snake_length, - leave_corpse_on_death=leave_corpse_on_death, - reward_food=reward_food, - reward_corpse=reward_corpse, - reward_death=reward_death, - ) - init_csnake(&self.envs[i]) - n += num_snakes[i] - - def reset(self): - cdef int i - for i in range(self.num_envs): - c_reset(&self.envs[i]) - - def step(self): - cdef int i - for i in range(self.num_envs): - c_step(&self.envs[i]) - - def render(self, cell_size=8): - cdef CSnake* env = &self.envs[0] - if self.client == NULL: - self.client = make_client(cell_size, env.width, env.height) - - c_render(self.client, env) - - def close(self): - if self.client != NULL: - close_client(self.client) - self.client = NULL - - # TODO: free - #free(self.envs) - - def log(self): - cdef Log log = aggregate_and_clear(self.logs) - return log diff --git a/pufferlib/ocean/snake/snake.c b/pufferlib/ocean/snake/snake.c index 76b791dcba..d685915dc9 100644 --- a/pufferlib/ocean/snake/snake.c +++ b/pufferlib/ocean/snake/snake.c @@ -20,7 +20,7 @@ int demo() { Weights* weights = load_weights("resources/snake_weights.bin", 148357); LinearLSTM* net = make_linearlstm(weights, env.num_snakes, env.obs_size, 4); - Client* client = make_client(2, env.width, env.height); + env.client = make_client(2, env.width, env.height); while (!WindowShouldClose()) { // User can take control of the first snake @@ -36,11 +36,11 @@ int demo() { forward_linearlstm(net, net->obs, env.actions); } c_step(&env); - c_render(client, &env); + c_render(&env); } free_linearlstm(net); free(weights); - close_client(client); + close_client(env.client); free_csnake(&env); return 0; } @@ -71,11 +71,12 @@ void test_performance(float test_time) { i++; } int end = time(NULL); + free_csnake(&env); printf("SPS: %f\n", (float)env.num_snakes*i / (end - start)); } int main() { - demo(); - //test_performance(30); + // demo(); + test_performance(30); return 0; } diff --git a/pufferlib/ocean/snake/snake.h b/pufferlib/ocean/snake/snake.h index 74defca3f5..30bfe7062e 100644 --- a/pufferlib/ocean/snake/snake.h +++ b/pufferlib/ocean/snake/snake.h @@ -9,70 +9,24 @@ #define CORPSE 2 #define WALL 3 -#define LOG_BUFFER_SIZE 8192 - typedef struct Log Log; struct Log { float perf; float score; float episode_return; float episode_length; + float n; }; -typedef struct LogBuffer LogBuffer; -struct LogBuffer { - Log* logs; - int length; - int idx; -}; - -LogBuffer* allocate_logbuffer(int size) { - LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer)); - logs->logs = (Log*)calloc(size, sizeof(Log)); - logs->length = size; - logs->idx = 0; - return logs; -} - -void free_logbuffer(LogBuffer* buffer) { - free(buffer->logs); - free(buffer); -} - -void add_log(LogBuffer* logs, Log* log) { - if (logs->idx == logs->length) { - return; - } - logs->logs[logs->idx] = *log; - logs->idx += 1; -} - -Log aggregate_and_clear(LogBuffer* logs) { - Log log = {0}; - if (logs->idx == 0) { - return log; - } - for (int i = 0; i < logs->idx; i++) { - log.episode_return += logs->logs[i].episode_return; - log.episode_length += logs->logs[i].episode_length; - log.score += logs->logs[i].score; - log.perf += logs->logs[i].score / logs->logs[i].episode_length; - } - log.episode_return /= logs->idx; - log.episode_length /= logs->idx; - log.score /= logs->idx; - log.perf /= logs->idx; - logs->idx = 0; - return log; -} - -typedef struct { +typedef struct Client Client; +typedef struct CSnake CSnake; +struct CSnake { char* observations; int* actions; float* rewards; unsigned char* terminals; - LogBuffer* log_buffer; - Log* logs; + Log log; + Log* snake_logs; char* grid; int* snake; int* snake_lengths; @@ -91,7 +45,23 @@ typedef struct { float reward_food; float reward_corpse; float reward_death; -} CSnake; + int tick; + int cell_size; + Client* client; +}; + +/** + * Add a snake's log to the main log when the snake's episode ends (dies or hits a wall). + * This should only be called during termination/truncation conditions for a specific snake. + * Accumulates the snake's stats into the main log and resets the snake's individual log. + */ +void add_log(CSnake* env, int snake_id) { + env->log.perf += env->snake_logs[snake_id].perf; + env->log.score += env->snake_logs[snake_id].score; + env->log.episode_return += env->snake_logs[snake_id].episode_return; + env->log.episode_length += env->snake_logs[snake_id].episode_length; + env->log.n += 1; +} void init_csnake(CSnake* env) { env->grid = (char*)calloc(env->width*env->height, sizeof(char)); @@ -100,7 +70,9 @@ void init_csnake(CSnake* env) { env->snake_ptr = (int*)calloc(env->num_snakes, sizeof(int)); env->snake_lifetimes = (int*)calloc(env->num_snakes, sizeof(int)); env->snake_colors = (int*)calloc(env->num_snakes, sizeof(int)); - env->logs = calloc(env->num_snakes, sizeof(Log)); + env->snake_logs = (Log*)calloc(env->num_snakes, sizeof(Log)); + env->tick = 0; + env->client = NULL; env->snake_colors[0] = 7; for (int i = 1; inum_snakes; i++) env->snake_colors[i] = i%4 + 4; // Randomize snake colors @@ -110,8 +82,7 @@ void allocate_csnake(CSnake* env) { int obs_size = (2*env->vision + 1) * (2*env->vision + 1); env->observations = (char*)calloc(env->num_snakes*obs_size, sizeof(char)); env->actions = (int*)calloc(env->num_snakes, sizeof(int)); - env->rewards = (float*)calloc(env->num_snakes, sizeof(float)); - env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE); + env->rewards = (float*)calloc(env->num_snakes, sizeof(float)); init_csnake(env); } @@ -128,8 +99,7 @@ void free_csnake(CSnake* env) { free(env->snake_colors); free(env->actions); free(env->rewards); - free_logbuffer(env->log_buffer); - free(env->logs); + free(env->snake_logs); } void compute_observations(CSnake* env) { @@ -168,7 +138,6 @@ void delete_snake(CSnake* env, int snake_id) { } void spawn_snake(CSnake* env, int snake_id) { - env->logs[snake_id] = (Log){0}; int head_r, head_c, tile, grid_idx; delete_snake(env, snake_id); do { @@ -184,6 +153,7 @@ void spawn_snake(CSnake* env, int snake_id) { env->snake_ptr[snake_id] = 0; env->snake_lifetimes[snake_id] = 0; env->grid[grid_idx] = env->snake_colors[snake_id]; + env->snake_logs[snake_id] = (Log){0}; } void spawn_food(CSnake* env) { @@ -200,6 +170,11 @@ void spawn_food(CSnake* env) { void c_reset(CSnake* env) { env->window = 2*env->vision+1; env->obs_size = env->window*env->window; + env->tick = 0; + env->log = (Log){0}; + + for (int i = 0; i < env->num_snakes; i++) + env->snake_logs[i] = (Log){0}; for (int r = 0; r < env->vision; r++) { for (int c = 0; c < env->width; c++) @@ -224,7 +199,7 @@ void c_reset(CSnake* env) { } void step_snake(CSnake* env, int i) { - env->logs[i].episode_length += 1; + env->snake_logs[i].episode_length += 1; int atn = env->actions[i]; int dr = 0; int dc = 0; @@ -255,9 +230,10 @@ void step_snake(CSnake* env, int i) { int tile = env->grid[next_r*env->width + next_c]; if (tile >= WALL) { env->rewards[i] = env->reward_death; - env->logs[i].episode_return += env->reward_death; - env->logs[i].score = env->snake_lengths[i]; - add_log(env->log_buffer, &env->logs[i]); + env->snake_logs[i].episode_return += env->reward_death; + env->snake_logs[i].score = env->snake_lengths[i]; + env->snake_logs[i].perf = env->snake_logs[i].score / env->snake_logs[i].episode_length; + add_log(env, i); spawn_snake(env, i); return; } @@ -274,12 +250,12 @@ void step_snake(CSnake* env, int i) { bool grow; if (tile == FOOD) { env->rewards[i] = env->reward_food; - env->logs[i].episode_return += env->reward_food; + env->snake_logs[i].episode_return += env->reward_food; spawn_food(env); grow = true; } else if (tile == CORPSE) { env->rewards[i] = env->reward_corpse; - env->logs[i].episode_return += env->reward_corpse; + env->snake_logs[i].episode_return += env->reward_corpse; grow = true; } else { env->rewards[i] = 0.0; @@ -304,6 +280,7 @@ void step_snake(CSnake* env, int i) { } void c_step(CSnake* env){ + env->tick++; for (int i = 0; i < env->num_snakes; i++) step_snake(env, i); @@ -324,11 +301,12 @@ Color COLORS[] = { (Color){255, 255, 0, 255}, }; -typedef struct { +typedef struct Client Client; +struct Client { int cell_size; int width; int height; -} Client; +}; Client* make_client(int cell_size, int width, int height) { Client* client= (Client*)malloc(sizeof(Client)); @@ -345,10 +323,17 @@ void close_client(Client* client) { free(client); } -void c_render(Client* client, CSnake* env) { +void c_render(CSnake* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } + + if (env->client == NULL) { + env->client = make_client(env->cell_size, env->width, env->height); + } + + Client* client = env->client; + BeginDrawing(); ClearBackground(COLORS[0]); int sz = client->cell_size; diff --git a/pufferlib/ocean/snake/snake.py b/pufferlib/ocean/snake/snake.py index 00888f9867..5473fd1621 100644 --- a/pufferlib/ocean/snake/snake.py +++ b/pufferlib/ocean/snake/snake.py @@ -5,7 +5,7 @@ import pufferlib from pufferlib.exceptions import APIUsageError -from pufferlib.ocean.snake.cy_snake import CySnake +from pufferlib.ocean.snake import binding class Snake(pufferlib.PufferEnv): def __init__(self, num_envs=16, width=640, height=360, @@ -41,45 +41,51 @@ def __init__(self, num_envs=16, width=640, height=360, self.render_mode = render_mode self.tick = 0 + # Calculate cell_size for rendering self.cell_size = int(np.ceil(1280 / max(max(width), max(height)))) super().__init__(buf) - self.c_envs = CySnake(self.observations, self.actions, - self.rewards, self.terminals, width, height, - num_snakes, num_food, vision, max_snake_length, - leave_corpse_on_death, reward_food, reward_corpse, - reward_death) + self.c_envs = binding.vec_init(self.observations, self.actions, + self.rewards, self.terminals, self.truncations, + num_envs, seed, width=width, height=height, + num_snakes=num_snakes, num_food=num_food, vision=vision, + max_snake_length=max_snake_length, + leave_corpse_on_death=leave_corpse_on_death, + reward_food=reward_food, reward_corpse=reward_corpse, + reward_death=reward_death, cell_size=self.cell_size) def reset(self, seed=None): - self.c_envs.reset() self.tick = 0 + if seed is None: + binding.vec_reset(self.c_envs, 0) + else: + binding.vec_reset(self.c_envs, seed) return self.observations, [] def step(self, actions): self.actions[:] = actions - self.c_envs.step() + self.tick += 1 + binding.vec_step(self.c_envs) info = [] if self.tick % self.report_interval == 0: - log = self.c_envs.log() - if log['episode_length'] > 0: - info.append(log) + info.append(binding.vec_log(self.c_envs)) return (self.observations, self.rewards, self.terminals, self.truncations, info) def render(self): - self.c_envs.render(self.cell_size) + binding.vec_render(self.c_envs, self.cell_size) def close(self): - self.c_envs.close() + binding.vec_close(self.c_envs) def test_performance(timeout=10, atn_cache=1024): env = Snake() env.reset() tick = 0 - total_snakes = sum(env.num_snakes) + total_snakes = env.num_agents actions = np.random.randint(0, 4, (atn_cache, total_snakes)) import time diff --git a/setup.py b/setup.py index d7db6d52a6..db2d44603a 100644 --- a/setup.py +++ b/setup.py @@ -269,7 +269,7 @@ 'pufferlib/ocean/moba/cy_moba', # 'pufferlib/ocean/tactical/c_tactical', #'pufferlib/ocean/squared/cy_squared', - 'pufferlib/ocean/snake/cy_snake', + #'pufferlib/ocean/snake/cy_snake', 'pufferlib/ocean/gpudrive/cy_gpudrive', #'pufferlib/ocean/pong/cy_pong', # 'pufferlib/ocean/breakout/cy_breakout', @@ -315,7 +315,7 @@ #c_args += "-Wsign-compare -DNDEBUG -g -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC".split() -pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole'] +pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole', 'snake'] extensions += [ Extension( From 570b3c2da778e444511f123bb395f82b925ddd8f Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Tue, 29 Apr 2025 23:33:28 +0000 Subject: [PATCH 2/8] tripletriad binding refactor. note: local only (box6 is still down), so training may not match pufferbox. indeed, baseline did seem pretty shaky. baseline: https://wandb.ai/xinpw8/pufferlib/runs/m3frlbjf refactor: https://wandb.ai/xinpw8/pufferlib/runs/58ro6521 also, some offsets weren't adjusted when NOOP was removed (commit e39332b); i updated those as well. interestingly, it doesn't seem to really have improved training, however. --- pufferlib/ocean/tripletriad/binding.c | 22 +++ .../ocean/tripletriad/cy_tripletriad.pyx | 112 ---------------- pufferlib/ocean/tripletriad/tripletriad.c | 45 ++++++- pufferlib/ocean/tripletriad/tripletriad.h | 126 +++++++----------- pufferlib/ocean/tripletriad/tripletriad.py | 22 ++- setup.py | 4 +- 6 files changed, 120 insertions(+), 211 deletions(-) create mode 100644 pufferlib/ocean/tripletriad/binding.c delete mode 100644 pufferlib/ocean/tripletriad/cy_tripletriad.pyx diff --git a/pufferlib/ocean/tripletriad/binding.c b/pufferlib/ocean/tripletriad/binding.c new file mode 100644 index 0000000000..4c725d3130 --- /dev/null +++ b/pufferlib/ocean/tripletriad/binding.c @@ -0,0 +1,22 @@ +#include "tripletriad.h" + +#define Env CTripleTriad +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->width = unpack(kwargs, "width"); + env->height = unpack(kwargs, "height"); + env->card_width = unpack(kwargs, "card_width"); + env->card_height = unpack(kwargs, "card_height"); + init_ctripletriad(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "n", log->n); + return 0; +} diff --git a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx b/pufferlib/ocean/tripletriad/cy_tripletriad.pyx deleted file mode 100644 index 2ee110e861..0000000000 --- a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx +++ /dev/null @@ -1,112 +0,0 @@ -from libc.stdlib cimport calloc, free - -cdef extern from "tripletriad.h": - int LOG_BUFFER_SIZE - - ctypedef struct Log: - float perf; - float score; - float episode_return; - float episode_length; - - ctypedef struct LogBuffer - LogBuffer* allocate_logbuffer(int) - void free_logbuffer(LogBuffer*) - Log aggregate_and_clear(LogBuffer*) - - ctypedef struct CTripleTriad: - float* observations - int* actions - float* rewards - unsigned char* dones - LogBuffer* log_buffer - Log log - int card_width; - int card_height; - float* board_x; - float* board_y; - int** board_states; - int width; - int height; - int game_over; - int num_cards; - int*** cards_in_hand; - int* card_selected; - int** card_locations; - int* action_masks; - int*** board_card_values; - int* score; - - ctypedef struct Client - - CTripleTriad* init_ctripletriad(CTripleTriad* env) - void free_ctripletriad(CTripleTriad* env) - - Client* make_client(float width, float height) - void close_client(Client* client) - void c_render(Client* client, CTripleTriad* env) - void c_reset(CTripleTriad* env) - void c_step(CTripleTriad* env) - -cdef class CyTripleTriad: - cdef: - CTripleTriad* envs - Client* client - LogBuffer* logs - int num_envs - - def __init__(self, float[:, :] observations, int[:] actions, - float[:] rewards, unsigned char[:] terminals, int num_envs, - int width, int height, int card_width, int card_height): - - self.num_envs = num_envs - self.client = NULL - self.envs = calloc(num_envs, sizeof(CTripleTriad)) - self.logs = allocate_logbuffer(LOG_BUFFER_SIZE) - - cdef int i - for i in range(num_envs): - self.envs[i] = CTripleTriad( - observations=&observations[i, 0], - actions=&actions[i], - rewards=&rewards[i], - dones=&terminals[i], - log_buffer=self.logs, - width=width, - height=height, - card_width=card_width, - card_height=card_height, - num_cards=10, - ) - init_ctripletriad(&self.envs[i]) - - def reset(self): - cdef int i - for i in range(self.num_envs): - c_reset(&self.envs[i]) - - def step(self): - cdef int i - for i in range(self.num_envs): - c_step(&self.envs[i]) - - def render(self): - cdef CTripleTriad* env = &self.envs[0] - if self.client == NULL: - self.client = make_client(env.width, env.height) - - c_render(self.client, env) - - def close(self): - if self.client != NULL: - close_client(self.client) - self.client = NULL - - # Todo: clean - for i in range(self.num_envs): - free_ctripletriad(&self.envs[i]) - free(self.envs) - - def log(self): - cdef Log log = aggregate_and_clear(self.logs) - return log diff --git a/pufferlib/ocean/tripletriad/tripletriad.c b/pufferlib/ocean/tripletriad/tripletriad.c index 6d6b5a2a20..61c2477802 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.c +++ b/pufferlib/ocean/tripletriad/tripletriad.c @@ -1,7 +1,10 @@ #include "tripletriad.h" #include "puffernet.h" +#include -int main() { +#define NOOP -1 + +void interactive() { Weights* weights = load_weights("resources/tripletriad_weights.bin", 148880); LinearLSTM* net = make_linearlstm(weights, 1, 114, 15); @@ -15,12 +18,12 @@ int main() { }; allocate_ctripletriad(&env); c_reset(&env); - Client* client = make_client(env.width, env.height); + env.client = make_client(env.width, env.height); int tick = 0; int action; while (!WindowShouldClose()) { - action = -1; + action = NOOP; // User can take control of the player if (IsKeyDown(KEY_LEFT_SHIFT)) { @@ -61,15 +64,45 @@ int main() { } tick = (tick + 1) % 45; - if (env.actions[0] != NOOP) { + if (action != NOOP) { + env.actions[0] = action; c_step(&env); } - c_render(client, &env); + c_render(&env); } free_linearlstm(net); free(weights); - close_client(client); + close_client(env.client); free_allocated_ctripletriad(&env); +} + +void performance_test() { + long test_time = 10; + CTripleTriad env = { + .width = 990, + .height = 690, + .card_width = 576 / 3, + .card_height = 672 / 3, + .game_over = 0, + .num_cards = 10, + }; + allocate_ctripletriad(&env); + c_reset(&env); + + long start = time(NULL); + int i = 0; + while (time(NULL) - start < test_time) { + c_step(&env); + i++; + } + long end = time(NULL); + printf("SPS: %ld\n", i / (end - start)); + free_allocated_ctripletriad(&env); +} + +int main() { + //performance_test(); + interactive(); return 0; } diff --git a/pufferlib/ocean/tripletriad/tripletriad.h b/pufferlib/ocean/tripletriad/tripletriad.h index 300104aece..03e80f0c44 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.h +++ b/pufferlib/ocean/tripletriad/tripletriad.h @@ -27,71 +27,22 @@ const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; // how to start game compile - LD_LIBRARY_PATH=raylib-5.0_linux_amd64/lib ./tripletriadgame -#define LOG_BUFFER_SIZE 1024 - typedef struct Log Log; struct Log { float perf; float score; float episode_return; float episode_length; + float n; }; -typedef struct LogBuffer LogBuffer; -struct LogBuffer { - Log* logs; - int length; - int idx; -}; - -LogBuffer* allocate_logbuffer(int size) { - LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer)); - logs->logs = (Log*)calloc(size, sizeof(Log)); - logs->length = size; - logs->idx = 0; - return logs; -} - -void free_logbuffer(LogBuffer* buffer) { - free(buffer->logs); - free(buffer); -} - -void add_log(LogBuffer* logs, Log* log) { - if (logs->idx == logs->length) { - return; - } - logs->logs[logs->idx] = *log; - logs->idx += 1; - //printf("Log: %f, %f, %f\n", log->episode_return, log->episode_length, log->score); -} - -Log aggregate_and_clear(LogBuffer* logs) { - Log log = {0}; - if (logs->idx == 0) { - return log; - } - for (int i = 0; i < logs->idx; i++) { - log.episode_return += logs->logs[i].episode_return; - log.episode_length += logs->logs[i].episode_length; - log.perf += logs->logs[i].perf; - log.score += logs->logs[i].score; - } - log.episode_return /= logs->idx; - log.episode_length /= logs->idx; - log.perf /= logs->idx; - log.score /= logs->idx; - logs->idx = 0; - return log; -} - +typedef struct Client Client; typedef struct CTripleTriad CTripleTriad; struct CTripleTriad { float* observations; int* actions; float* rewards; - unsigned char* dones; - LogBuffer* log_buffer; + unsigned char* terminals; Log log; int card_width; int card_height; @@ -108,8 +59,21 @@ struct CTripleTriad { int* action_masks; int*** board_card_values; int* score; + int tick; + float perf; + float episode_return; + float episode_length; + Client* client; }; +void add_log(CTripleTriad* env) { + env->log.perf += env->perf; + env->log.score += env->score[0]; + env->log.episode_return += env->episode_return; + env->log.episode_length += env->episode_length; + env->log.n += 1; +} + void generate_board_positions(CTripleTriad* env) { for (int row = 0; row < 3; row++) { for (int col = 0; col < 3; col++) { @@ -209,9 +173,8 @@ void init_ctripletriad(CTripleTriad* env) { void allocate_ctripletriad(CTripleTriad* env) { env->actions = (int*)calloc(1, sizeof(int)); env->observations = (float*)calloc(env->width*env->height, sizeof(float)); - env->dones = (unsigned char*)calloc(1, sizeof(unsigned char)); + env->terminals = (unsigned char*)calloc(1, sizeof(unsigned char)); env->rewards = (float*)calloc(1, sizeof(float)); - env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE); init_ctripletriad(env); } @@ -246,9 +209,8 @@ void free_ctripletriad(CTripleTriad* env) { void free_allocated_ctripletriad(CTripleTriad* env) { free(env->actions); free(env->observations); - free(env->dones); + free(env->terminals); free(env->rewards); - free_logbuffer(env->log_buffer); free_ctripletriad(env); } @@ -298,7 +260,6 @@ void compute_observations(CTripleTriad* env) { } void c_reset(CTripleTriad* env) { - env->log = (Log){0}; env->game_over = 0; for(int i=0; i< 2; i++) { for(int j=0; j< 5; j++) { @@ -333,8 +294,11 @@ void c_reset(CTripleTriad* env) { for(int i=0; i< 2; i++) { env->score[i] = 5; } - env->dones[0] = 0; + env->terminals[0] = 0; compute_observations(env); + env->tick = 0; + env->episode_length = 0; + env->episode_return = 0; } void select_card(CTripleTriad* env, int card_selected, int player) { @@ -365,8 +329,8 @@ void update_action_masks(CTripleTriad* env) { for (int i = 0; i < 2; i++) { for (int j = 0; j < 5; j++) { if (env->card_locations[i][j] != 0) { - int action_idx = env->card_locations[i][j] + 5; - if (action_idx >= 6 && action_idx < 15) { + int action_idx = env->card_locations[i][j] + 4; + if (action_idx >= 5 && action_idx < 14) { env->action_masks[action_idx] = 1; // Mark as unavailable } } @@ -386,14 +350,14 @@ void check_win_condition(CTripleTriad* env, int player) { if (count == 9) { // add a draw condition and winner value is 0 if (env->score[0] == env->score[1]) { - env->dones[0] = 1; + env->terminals[0] = 1; env->rewards[0] = 0.0; env->game_over = 1; } else { int winner = env->score[0] > env->score[1] ? 1 : -1; - env->dones[0] = 1; + env->terminals[0] = 1; env->rewards[0] = winner; // 1 for player win, -1 for opponent win - env->log.episode_return += winner; + env->episode_return += winner; env->game_over = 1; } } @@ -405,9 +369,9 @@ int get_bot_card_placement(CTripleTriad* env) { int num_valid_placements = 0; // Find valid placements - for (int i = 6; i < 15; i++) { + for (int i = 5; i < 14; i++) { if (env->action_masks[i] == 0) { - valid_placements[num_valid_placements++] = i - 5; + valid_placements[num_valid_placements++] = i - 4; if (num_valid_placements == 9) break; // Safety check } } @@ -494,32 +458,30 @@ void check_card_conversions(CTripleTriad* env, int card_placement, int player) { } void c_step(CTripleTriad* env) { - env->log.episode_length += 1; + env->episode_length += 1; env->rewards[0] = 0.0; int action = env->actions[0]; - if (env->log.episode_length >= MAX_EPISODE_LENGTH) { + if (env->episode_length >= MAX_EPISODE_LENGTH) { env->game_over = 1; - env->log.episode_return -= 1.0; + env->episode_return -= 1.0; env->rewards[0] -= 1.0; } // reset the game if game over if (env->game_over == 1) { - env->log.score = env->score[0]; - env->log.perf = (env->score[0] > env->score[1]) ? 1.0 : 0.0; - add_log(env->log_buffer, &env->log); - //printf("Log: %f, %f, %f\n", env->log.episode_return, env->log.episode_length, env->log.score); + env->perf = (env->score[0] > env->score[1]) ? 1.0 : 0.0; + add_log(env); c_reset(env); return; } // select a card if the card is in the range of 1-5 and the card is not placed if (action >= SELECT_CARD_1 && action <= SELECT_CARD_5 ) { // Prevent model from just swapping between selected cards to avoid playing - env->log.episode_return -= 0.1; + env->episode_return -= 0.1; env->rewards[0] -= 0.1; - int card_selected = action + 1; + int card_selected = action; if(env->card_locations[0][card_selected-1] == 0) { select_card(env,card_selected, 1); } @@ -537,16 +499,16 @@ void c_step(CTripleTriad* env) { env->card_selected[0] = -1; card_placed = true; } else { - env->log.episode_return -= 0.1; + env->episode_return -= 0.1; env->rewards[0] -= 0.1; } } else { - env->log.episode_return -= 0.1; + env->episode_return -= 0.1; env->rewards[0] -= 0.1; } // opponent turn - if (env->dones[0] == 0 && card_placed == true ) { + if (env->terminals[0] == 0 && card_placed == true ) { int bot_card_selected = get_bot_card_selection(env); if(bot_card_selected > 0) { select_card(env,bot_card_selected, -1); @@ -560,7 +522,7 @@ void c_step(CTripleTriad* env) { } } - if (env->dones[0] == 1) { + if (env->terminals[0] == 1) { env->game_over=1; } compute_observations(env); @@ -583,11 +545,17 @@ Client* make_client(int width, int height) { return client; } -void c_render(Client* client, CTripleTriad* env) { +void c_render(CTripleTriad* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } + if (env->client == NULL) { + env->client = make_client(env->width, env->height); + } + + Client* client = env->client; + BeginDrawing(); ClearBackground(PUFF_BACKGROUND); diff --git a/pufferlib/ocean/tripletriad/tripletriad.py b/pufferlib/ocean/tripletriad/tripletriad.py index 42a8befdea..0d7a26ebc8 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.py +++ b/pufferlib/ocean/tripletriad/tripletriad.py @@ -2,11 +2,11 @@ import gymnasium import pufferlib -from pufferlib.ocean.tripletriad.cy_tripletriad import CyTripleTriad +from pufferlib.ocean.tripletriad import binding class TripleTriad(pufferlib.PufferEnv): def __init__(self, num_envs=1, render_mode=None, report_interval=1, - width=990, height=690, piece_width=192, piece_height=224, buf=None, seed=0): + width=990, height=690, card_width=192, card_height=224, buf=None, seed=0): self.single_observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(114,), dtype=np.float32) self.single_action_space = gymnasium.spaces.Discrete(14) @@ -15,34 +15,32 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1, self.num_agents = num_envs super().__init__(buf=buf) - self.c_envs = CyTripleTriad(self.observations, self.actions, - self.rewards, self.terminals, num_envs, width, height, - piece_width, piece_height) + self.c_envs = binding.vec_init(self.observations, self.actions, + self.rewards, self.terminals, self.truncations, num_envs, seed, width=width, height=height, + card_width=card_width, card_height=card_height) def reset(self, seed=None): - self.c_envs.reset() + binding.vec_reset(self.c_envs, seed) self.tick = 0 return self.observations, [] def step(self, actions): self.actions[:] = actions - self.c_envs.step() + binding.vec_step(self.c_envs) self.tick += 1 info = [] if self.tick % self.report_interval == 0: - log = self.c_envs.log() - if log['episode_length'] > 0: - info.append(log) + info.append(binding.vec_log(self.c_envs)) return (self.observations, self.rewards, self.terminals, self.truncations, info) def render(self): - self.c_envs.render() + binding.vec_render(self.c_envs, 0) def close(self): - self.c_envs.close() + binding.vec_close(self.c_envs) def test_performance(timeout=10, atn_cache=1024): env = TripleTriad(num_envs=1000) diff --git a/setup.py b/setup.py index db2d44603a..842424ebd1 100644 --- a/setup.py +++ b/setup.py @@ -276,7 +276,7 @@ # 'pufferlib/ocean/cartpole/cy_cartpole', 'pufferlib/ocean/connect4/cy_connect4', #'pufferlib/ocean/grid/cy_grid', - 'pufferlib/ocean/tripletriad/cy_tripletriad', + # 'pufferlib/ocean/tripletriad/cy_tripletriad', # 'pufferlib/ocean/go/cy_go', 'pufferlib/ocean/rware/cy_rware', 'pufferlib/ocean/trash_pickup/cy_trash_pickup', @@ -315,7 +315,7 @@ #c_args += "-Wsign-compare -DNDEBUG -g -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC".split() -pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole', 'snake'] +pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole', 'snake', 'tripletriad'] extensions += [ Extension( From d78a5a77cf2f83cb3c075129954763f94d27336d Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Tue, 29 Apr 2025 23:48:32 +0000 Subject: [PATCH 3/8] python perf test updated; int card_selected = action + 1 in tripletriad.h was off-by-one; c perf test added + local eval updated --- pufferlib/ocean/tripletriad/tripletriad.c | 2 +- pufferlib/ocean/tripletriad/tripletriad.h | 5 +++-- pufferlib/ocean/tripletriad/tripletriad.py | 9 ++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pufferlib/ocean/tripletriad/tripletriad.c b/pufferlib/ocean/tripletriad/tripletriad.c index 61c2477802..7917070f4d 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.c +++ b/pufferlib/ocean/tripletriad/tripletriad.c @@ -102,7 +102,7 @@ void performance_test() { } int main() { - //performance_test(); + // performance_test(); interactive(); return 0; } diff --git a/pufferlib/ocean/tripletriad/tripletriad.h b/pufferlib/ocean/tripletriad/tripletriad.h index 03e80f0c44..d02b480a95 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.h +++ b/pufferlib/ocean/tripletriad/tripletriad.h @@ -481,9 +481,10 @@ void c_step(CTripleTriad* env) { env->episode_return -= 0.1; env->rewards[0] -= 0.1; - int card_selected = action; + int card_selected = action + 1; + if(env->card_locations[0][card_selected-1] == 0) { - select_card(env,card_selected, 1); + select_card(env, card_selected, 1); } } // place a card if the card is in the range of 1-9 and the card is selected diff --git a/pufferlib/ocean/tripletriad/tripletriad.py b/pufferlib/ocean/tripletriad/tripletriad.py index 0d7a26ebc8..fce49c4ca5 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.py +++ b/pufferlib/ocean/tripletriad/tripletriad.py @@ -20,8 +20,11 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1, card_width=card_width, card_height=card_height) def reset(self, seed=None): - binding.vec_reset(self.c_envs, seed) self.tick = 0 + if seed is None: + binding.vec_reset(self.c_envs, 0) + else: + binding.vec_reset(self.c_envs, seed) return self.observations, [] def step(self, actions): @@ -47,7 +50,7 @@ def test_performance(timeout=10, atn_cache=1024): env.reset() tick = 0 - actions = np.random.randint(0, 2, (atn_cache, env.num_envs)) + actions = np.random.randint(0, 2, (atn_cache, env.num_agents)) import time start = time.time() @@ -56,7 +59,7 @@ def test_performance(timeout=10, atn_cache=1024): env.step(atn) tick += 1 - print(f'SPS: %f', env.num_envs * tick / (time.time() - start)) + print(f'SPS: {env.num_agents * tick / (time.time() - start):,}') if __name__ == '__main__': test_performance() From 462b7fe69b229e6d4c2a248059346a7250707a7f Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Fri, 2 May 2025 19:39:46 +0000 Subject: [PATCH 4/8] removed snake changes from tripletriad commit. binding refactor as-is. --- pufferlib/ocean/env_binding.h | 90 ++++++++--------------- pufferlib/ocean/tripletriad/tripletriad.c | 7 +- pufferlib/ocean/tripletriad/tripletriad.h | 2 - 3 files changed, 35 insertions(+), 64 deletions(-) diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index a067c8b9d9..882d547da4 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -215,52 +215,6 @@ static VecEnv* unpack_vecenv(PyObject* args) { return vec; } -static double unpack_with_index(PyObject* kwargs, char* key, int env_index) { - PyObject* val = PyDict_GetItemString(kwargs, key); - if (val == NULL) { - // If the key doesn't exist, don't set an error - this allows optional parameters - // Just return a default value that the caller can check for - return 0.0; - } - - // If val is a list, extract the element at env_index - if (PyList_Check(val)) { - if (env_index >= 0 && env_index < PyList_Size(val)) { - val = PyList_GetItem(val, env_index); - } else { - // If index is out of bounds, use the first element - val = PyList_GetItem(val, 0); - } - } - - if (PyLong_Check(val)) { - long out = PyLong_AsLong(val); - if (out > INT_MAX || out < INT_MIN) { - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Value %ld of integer argument %s is out of range", out, key); - PyErr_SetString(PyExc_TypeError, error_msg); - return 1; - } - // Cast on return. Safe because double can represent all 32-bit ints exactly - return out; - } - if (PyFloat_Check(val)) { - return PyFloat_AsDouble(val); - } - if (PyBool_Check(val)) { - return PyObject_IsTrue(val); - } - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Failed to unpack keyword %s as int", key); - PyErr_SetString(PyExc_TypeError, error_msg); - return 1; -} - -// Original unpack function for backward compatibility -static double unpack(PyObject* kwargs, char* key) { - return unpack_with_index(kwargs, key, 0); -} - static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) { if (PyTuple_Size(args) != 7) { PyErr_SetString(PyExc_TypeError, "vec_init requires 6 arguments"); @@ -374,7 +328,6 @@ static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) { Py_INCREF(kwargs); // We need to increment the reference since we'll be modifying it } - // Add an env_index to kwargs for use by my_init for (int i = 0; i < num_envs; i++) { Env* env = (Env*)calloc(1, sizeof(Env)); if (!env) { @@ -407,16 +360,6 @@ static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) { } Py_DECREF(py_seed); - // Add the environment index to kwargs - PyObject* py_env_index = PyLong_FromLong(i); - if (PyDict_SetItemString(kwargs, "env_index", py_env_index) < 0) { - PyErr_SetString(PyExc_RuntimeError, "Failed to set env_index in kwargs"); - Py_DECREF(py_env_index); - Py_DECREF(kwargs); - return NULL; - } - Py_DECREF(py_env_index); - PyObject* empty_args = PyTuple_New(0); if (my_init(env, empty_args, kwargs)) { PyErr_SetString(PyExc_TypeError, "env_init failed"); @@ -584,6 +527,33 @@ static PyObject* vec_close(PyObject* self, PyObject* args) { Py_RETURN_NONE; } +static double unpack(PyObject* kwargs, char* key) { + PyObject* val = PyDict_GetItemString(kwargs, key); + if (val == NULL) { + // If the key doesn't exist, don't set an error - this allows optional parameters + // Just return a default value that the caller can check for + return 0.0; + } + if (PyLong_Check(val)) { + long out = PyLong_AsLong(val); + if (out > INT_MAX || out < INT_MIN) { + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Value %ld of integer argument %s is out of range", out, key); + PyErr_SetString(PyExc_TypeError, error_msg); + return 1; + } + // Cast on return. Safe because double can represent all 32-bit ints exactly + return out; + } + if (PyFloat_Check(val)) { + return PyFloat_AsDouble(val); + } + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Failed to unpack keyword %s as int", key); + PyErr_SetString(PyExc_TypeError, error_msg); + return 1; +} + // Method table static PyMethodDef methods[] = { {"env_init", (PyCFunction)env_init, METH_VARARGS | METH_KEYWORDS, "Init environment with observation, action, reward, terminal, truncation arrays"}, @@ -611,7 +581,7 @@ static PyModuleDef module = { methods }; -PyMODINIT_FUNC PyInit_binding(void) { \ - import_array(); \ - return PyModule_Create(&module); \ +PyMODINIT_FUNC PyInit_binding(void) { + import_array(); + return PyModule_Create(&module); } diff --git a/pufferlib/ocean/tripletriad/tripletriad.c b/pufferlib/ocean/tripletriad/tripletriad.c index 7917070f4d..13d3f6d422 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.c +++ b/pufferlib/ocean/tripletriad/tripletriad.c @@ -64,11 +64,14 @@ void interactive() { } tick = (tick + 1) % 45; - if (action != NOOP) { - env.actions[0] = action; + + if (env.actions[0] != NOOP) { c_step(&env); } + check_win_condition(&env, 0); + check_win_condition(&env, 1); + c_render(&env); } free_linearlstm(net); diff --git a/pufferlib/ocean/tripletriad/tripletriad.h b/pufferlib/ocean/tripletriad/tripletriad.h index d02b480a95..e4db2e753c 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.h +++ b/pufferlib/ocean/tripletriad/tripletriad.h @@ -555,8 +555,6 @@ void c_render(CTripleTriad* env) { env->client = make_client(env->width, env->height); } - Client* client = env->client; - BeginDrawing(); ClearBackground(PUFF_BACKGROUND); From 7d026d96196b35113a5e43ae3b91d3462c713aeb Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Fri, 2 May 2025 19:48:56 +0000 Subject: [PATCH 5/8] remove debugging --- pufferlib/ocean/tripletriad/tripletriad.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/pufferlib/ocean/tripletriad/tripletriad.c b/pufferlib/ocean/tripletriad/tripletriad.c index 13d3f6d422..de2aedc1e4 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.c +++ b/pufferlib/ocean/tripletriad/tripletriad.c @@ -69,9 +69,6 @@ void interactive() { c_step(&env); } - check_win_condition(&env, 0); - check_win_condition(&env, 1); - c_render(&env); } free_linearlstm(net); From 5cbc73fae13fb4b1af2ad23f6b5a30bba31ac157 Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Sat, 3 May 2025 03:14:40 +0000 Subject: [PATCH 6/8] revert all snake changes --- pufferlib/ocean/snake/binding.c | 74 ----------------- pufferlib/ocean/snake/cy_snake.pyx | 128 +++++++++++++++++++++++++++++ pufferlib/ocean/snake/snake.c | 11 ++- pufferlib/ocean/snake/snake.h | 121 +++++++++++++++------------ pufferlib/ocean/snake/snake.py | 34 ++++---- 5 files changed, 215 insertions(+), 153 deletions(-) delete mode 100644 pufferlib/ocean/snake/binding.c create mode 100644 pufferlib/ocean/snake/cy_snake.pyx diff --git a/pufferlib/ocean/snake/binding.c b/pufferlib/ocean/snake/binding.c deleted file mode 100644 index ccb7d5c61b..0000000000 --- a/pufferlib/ocean/snake/binding.c +++ /dev/null @@ -1,74 +0,0 @@ -#include "snake.h" - -#define Env CSnake -#include "../env_binding.h" - -// Helper function to extract an int from a Python object, handling lists -static int extract_int(PyObject* kwargs, const char* key, int default_value) { - PyObject* obj = PyDict_GetItemString(kwargs, key); - if (obj != NULL) { - if (PyList_Check(obj)) { - obj = PyList_GetItem(obj, 0); - } - return PyLong_AsLong(obj); - } - return default_value; -} - -// Helper function to extract a float from a Python object, handling lists -static float extract_float(PyObject* kwargs, const char* key, float default_value) { - PyObject* obj = PyDict_GetItemString(kwargs, key); - if (obj != NULL) { - if (PyList_Check(obj)) { - obj = PyList_GetItem(obj, 0); - } - return PyFloat_AsDouble(obj); - } - return default_value; -} - -// Helper function to extract a bool from a Python object, handling lists -static int extract_bool(PyObject* kwargs, const char* key, int default_value) { - PyObject* obj = PyDict_GetItemString(kwargs, key); - if (obj != NULL) { - if (PyList_Check(obj)) { - obj = PyList_GetItem(obj, 0); - } - return PyObject_IsTrue(obj); - } - return default_value; -} - -static int my_init(Env* env, PyObject* args, PyObject* kwargs) { - // Get the environment index from kwargs - int env_index = 0; - PyObject* env_index_obj = PyDict_GetItemString(kwargs, "env_index"); - if (env_index_obj != NULL) { - env_index = PyLong_AsLong(env_index_obj); - } - - // Use unpack_with_index to properly handle lists - env->width = unpack_with_index(kwargs, "width", env_index); - env->height = unpack_with_index(kwargs, "height", env_index); - env->num_snakes = unpack_with_index(kwargs, "num_snakes", env_index); - env->vision = unpack_with_index(kwargs, "vision", env_index); - env->leave_corpse_on_death = unpack_with_index(kwargs, "leave_corpse_on_death", env_index); - env->food = unpack_with_index(kwargs, "num_food", env_index); - env->reward_food = unpack_with_index(kwargs, "reward_food", env_index); - env->reward_corpse = unpack_with_index(kwargs, "reward_corpse", env_index); - env->reward_death = unpack_with_index(kwargs, "reward_death", env_index); - env->max_snake_length = unpack_with_index(kwargs, "max_snake_length", env_index); - env->cell_size = unpack_with_index(kwargs, "cell_size", env_index); - - init_csnake(env); - return 0; -} - -static int my_log(PyObject* dict, Log* log) { - assign_to_dict(dict, "perf", log->perf); - assign_to_dict(dict, "score", log->score); - assign_to_dict(dict, "episode_return", log->episode_return); - assign_to_dict(dict, "episode_length", log->episode_length); - assign_to_dict(dict, "n", log->n); - return 0; -} diff --git a/pufferlib/ocean/snake/cy_snake.pyx b/pufferlib/ocean/snake/cy_snake.pyx new file mode 100644 index 0000000000..b0d880a02b --- /dev/null +++ b/pufferlib/ocean/snake/cy_snake.pyx @@ -0,0 +1,128 @@ +cimport numpy as cnp +from libc.stdlib cimport calloc, free + +cdef extern from "snake.h": + cdef: + int LOG_BUFFER_SIZE + + ctypedef struct Log: + float perf + float score + float episode_return + float episode_length + + ctypedef struct LogBuffer + LogBuffer* allocate_logbuffer(int) + void free_logbuffer(LogBuffer*) + Log aggregate_and_clear(LogBuffer*) + + ctypedef struct CSnake: + char* observations + int* actions + float* rewards + unsigned char* terminals + LogBuffer* log_buffer + Log* logs + char* grid + int* snake + int* snake_lengths + int* snake_ptr + int* snake_lifetimes + int* snake_colors + int num_snakes + int width + int height + int max_snake_length + int food + int vision + int window + int obs_size + unsigned char leave_corpse_on_death + float reward_food + float reward_corpse + float reward_death + + ctypedef struct CSnake + void init_csnake(CSnake* env) + void step_all(CSnake* env) + void compute_observations(CSnake* env) + void spawn_snake(CSnake* env, int snake_id) + void spawn_food(CSnake* env) + void c_reset(CSnake* env) + void step_snake(CSnake* env, int i) + void c_step(CSnake* env) + ctypedef struct Client + Client* make_client(int cell_size, int width, int height) + void c_render(Client* client, CSnake* env) + void close_client(Client* client) + +cdef class CySnake: + cdef: + CSnake *envs + Client* client + LogBuffer* logs + int num_envs + + def __init__(self, char[:, :, :] observations, int[:] actions, + float[:] rewards, unsigned char[:] terminals, + list widths, list heights, list num_snakes, + list num_food, int vision, int max_snake_length, + bint leave_corpse_on_death, float reward_food, + float reward_corpse, float reward_death): + + self.num_envs = len(num_snakes) + self.envs = calloc(self.num_envs, sizeof(CSnake)) + self.logs = allocate_logbuffer(LOG_BUFFER_SIZE) + self.client = NULL + + cdef int i + cdef int n = 0 + for i in range(self.num_envs): + self.envs[i] = CSnake( + observations=&observations[n, 0, 0], + actions=&actions[n], + rewards=&rewards[n], + terminals=&terminals[n], + log_buffer=self.logs, + width=widths[i], + height=heights[i], + num_snakes=num_snakes[i], + food=num_food[i], + vision=vision, + max_snake_length=max_snake_length, + leave_corpse_on_death=leave_corpse_on_death, + reward_food=reward_food, + reward_corpse=reward_corpse, + reward_death=reward_death, + ) + init_csnake(&self.envs[i]) + n += num_snakes[i] + + def reset(self): + cdef int i + for i in range(self.num_envs): + c_reset(&self.envs[i]) + + def step(self): + cdef int i + for i in range(self.num_envs): + c_step(&self.envs[i]) + + def render(self, cell_size=8): + cdef CSnake* env = &self.envs[0] + if self.client == NULL: + self.client = make_client(cell_size, env.width, env.height) + + c_render(self.client, env) + + def close(self): + if self.client != NULL: + close_client(self.client) + self.client = NULL + + # TODO: free + #free(self.envs) + + def log(self): + cdef Log log = aggregate_and_clear(self.logs) + return log diff --git a/pufferlib/ocean/snake/snake.c b/pufferlib/ocean/snake/snake.c index d685915dc9..76b791dcba 100644 --- a/pufferlib/ocean/snake/snake.c +++ b/pufferlib/ocean/snake/snake.c @@ -20,7 +20,7 @@ int demo() { Weights* weights = load_weights("resources/snake_weights.bin", 148357); LinearLSTM* net = make_linearlstm(weights, env.num_snakes, env.obs_size, 4); - env.client = make_client(2, env.width, env.height); + Client* client = make_client(2, env.width, env.height); while (!WindowShouldClose()) { // User can take control of the first snake @@ -36,11 +36,11 @@ int demo() { forward_linearlstm(net, net->obs, env.actions); } c_step(&env); - c_render(&env); + c_render(client, &env); } free_linearlstm(net); free(weights); - close_client(env.client); + close_client(client); free_csnake(&env); return 0; } @@ -71,12 +71,11 @@ void test_performance(float test_time) { i++; } int end = time(NULL); - free_csnake(&env); printf("SPS: %f\n", (float)env.num_snakes*i / (end - start)); } int main() { - // demo(); - test_performance(30); + demo(); + //test_performance(30); return 0; } diff --git a/pufferlib/ocean/snake/snake.h b/pufferlib/ocean/snake/snake.h index 30bfe7062e..74defca3f5 100644 --- a/pufferlib/ocean/snake/snake.h +++ b/pufferlib/ocean/snake/snake.h @@ -9,24 +9,70 @@ #define CORPSE 2 #define WALL 3 +#define LOG_BUFFER_SIZE 8192 + typedef struct Log Log; struct Log { float perf; float score; float episode_return; float episode_length; - float n; }; -typedef struct Client Client; -typedef struct CSnake CSnake; -struct CSnake { +typedef struct LogBuffer LogBuffer; +struct LogBuffer { + Log* logs; + int length; + int idx; +}; + +LogBuffer* allocate_logbuffer(int size) { + LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer)); + logs->logs = (Log*)calloc(size, sizeof(Log)); + logs->length = size; + logs->idx = 0; + return logs; +} + +void free_logbuffer(LogBuffer* buffer) { + free(buffer->logs); + free(buffer); +} + +void add_log(LogBuffer* logs, Log* log) { + if (logs->idx == logs->length) { + return; + } + logs->logs[logs->idx] = *log; + logs->idx += 1; +} + +Log aggregate_and_clear(LogBuffer* logs) { + Log log = {0}; + if (logs->idx == 0) { + return log; + } + for (int i = 0; i < logs->idx; i++) { + log.episode_return += logs->logs[i].episode_return; + log.episode_length += logs->logs[i].episode_length; + log.score += logs->logs[i].score; + log.perf += logs->logs[i].score / logs->logs[i].episode_length; + } + log.episode_return /= logs->idx; + log.episode_length /= logs->idx; + log.score /= logs->idx; + log.perf /= logs->idx; + logs->idx = 0; + return log; +} + +typedef struct { char* observations; int* actions; float* rewards; unsigned char* terminals; - Log log; - Log* snake_logs; + LogBuffer* log_buffer; + Log* logs; char* grid; int* snake; int* snake_lengths; @@ -45,23 +91,7 @@ struct CSnake { float reward_food; float reward_corpse; float reward_death; - int tick; - int cell_size; - Client* client; -}; - -/** - * Add a snake's log to the main log when the snake's episode ends (dies or hits a wall). - * This should only be called during termination/truncation conditions for a specific snake. - * Accumulates the snake's stats into the main log and resets the snake's individual log. - */ -void add_log(CSnake* env, int snake_id) { - env->log.perf += env->snake_logs[snake_id].perf; - env->log.score += env->snake_logs[snake_id].score; - env->log.episode_return += env->snake_logs[snake_id].episode_return; - env->log.episode_length += env->snake_logs[snake_id].episode_length; - env->log.n += 1; -} +} CSnake; void init_csnake(CSnake* env) { env->grid = (char*)calloc(env->width*env->height, sizeof(char)); @@ -70,9 +100,7 @@ void init_csnake(CSnake* env) { env->snake_ptr = (int*)calloc(env->num_snakes, sizeof(int)); env->snake_lifetimes = (int*)calloc(env->num_snakes, sizeof(int)); env->snake_colors = (int*)calloc(env->num_snakes, sizeof(int)); - env->snake_logs = (Log*)calloc(env->num_snakes, sizeof(Log)); - env->tick = 0; - env->client = NULL; + env->logs = calloc(env->num_snakes, sizeof(Log)); env->snake_colors[0] = 7; for (int i = 1; inum_snakes; i++) env->snake_colors[i] = i%4 + 4; // Randomize snake colors @@ -82,7 +110,8 @@ void allocate_csnake(CSnake* env) { int obs_size = (2*env->vision + 1) * (2*env->vision + 1); env->observations = (char*)calloc(env->num_snakes*obs_size, sizeof(char)); env->actions = (int*)calloc(env->num_snakes, sizeof(int)); - env->rewards = (float*)calloc(env->num_snakes, sizeof(float)); + env->rewards = (float*)calloc(env->num_snakes, sizeof(float)); + env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE); init_csnake(env); } @@ -99,7 +128,8 @@ void free_csnake(CSnake* env) { free(env->snake_colors); free(env->actions); free(env->rewards); - free(env->snake_logs); + free_logbuffer(env->log_buffer); + free(env->logs); } void compute_observations(CSnake* env) { @@ -138,6 +168,7 @@ void delete_snake(CSnake* env, int snake_id) { } void spawn_snake(CSnake* env, int snake_id) { + env->logs[snake_id] = (Log){0}; int head_r, head_c, tile, grid_idx; delete_snake(env, snake_id); do { @@ -153,7 +184,6 @@ void spawn_snake(CSnake* env, int snake_id) { env->snake_ptr[snake_id] = 0; env->snake_lifetimes[snake_id] = 0; env->grid[grid_idx] = env->snake_colors[snake_id]; - env->snake_logs[snake_id] = (Log){0}; } void spawn_food(CSnake* env) { @@ -170,11 +200,6 @@ void spawn_food(CSnake* env) { void c_reset(CSnake* env) { env->window = 2*env->vision+1; env->obs_size = env->window*env->window; - env->tick = 0; - env->log = (Log){0}; - - for (int i = 0; i < env->num_snakes; i++) - env->snake_logs[i] = (Log){0}; for (int r = 0; r < env->vision; r++) { for (int c = 0; c < env->width; c++) @@ -199,7 +224,7 @@ void c_reset(CSnake* env) { } void step_snake(CSnake* env, int i) { - env->snake_logs[i].episode_length += 1; + env->logs[i].episode_length += 1; int atn = env->actions[i]; int dr = 0; int dc = 0; @@ -230,10 +255,9 @@ void step_snake(CSnake* env, int i) { int tile = env->grid[next_r*env->width + next_c]; if (tile >= WALL) { env->rewards[i] = env->reward_death; - env->snake_logs[i].episode_return += env->reward_death; - env->snake_logs[i].score = env->snake_lengths[i]; - env->snake_logs[i].perf = env->snake_logs[i].score / env->snake_logs[i].episode_length; - add_log(env, i); + env->logs[i].episode_return += env->reward_death; + env->logs[i].score = env->snake_lengths[i]; + add_log(env->log_buffer, &env->logs[i]); spawn_snake(env, i); return; } @@ -250,12 +274,12 @@ void step_snake(CSnake* env, int i) { bool grow; if (tile == FOOD) { env->rewards[i] = env->reward_food; - env->snake_logs[i].episode_return += env->reward_food; + env->logs[i].episode_return += env->reward_food; spawn_food(env); grow = true; } else if (tile == CORPSE) { env->rewards[i] = env->reward_corpse; - env->snake_logs[i].episode_return += env->reward_corpse; + env->logs[i].episode_return += env->reward_corpse; grow = true; } else { env->rewards[i] = 0.0; @@ -280,7 +304,6 @@ void step_snake(CSnake* env, int i) { } void c_step(CSnake* env){ - env->tick++; for (int i = 0; i < env->num_snakes; i++) step_snake(env, i); @@ -301,12 +324,11 @@ Color COLORS[] = { (Color){255, 255, 0, 255}, }; -typedef struct Client Client; -struct Client { +typedef struct { int cell_size; int width; int height; -}; +} Client; Client* make_client(int cell_size, int width, int height) { Client* client= (Client*)malloc(sizeof(Client)); @@ -323,17 +345,10 @@ void close_client(Client* client) { free(client); } -void c_render(CSnake* env) { +void c_render(Client* client, CSnake* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } - - if (env->client == NULL) { - env->client = make_client(env->cell_size, env->width, env->height); - } - - Client* client = env->client; - BeginDrawing(); ClearBackground(COLORS[0]); int sz = client->cell_size; diff --git a/pufferlib/ocean/snake/snake.py b/pufferlib/ocean/snake/snake.py index 5473fd1621..00888f9867 100644 --- a/pufferlib/ocean/snake/snake.py +++ b/pufferlib/ocean/snake/snake.py @@ -5,7 +5,7 @@ import pufferlib from pufferlib.exceptions import APIUsageError -from pufferlib.ocean.snake import binding +from pufferlib.ocean.snake.cy_snake import CySnake class Snake(pufferlib.PufferEnv): def __init__(self, num_envs=16, width=640, height=360, @@ -41,51 +41,45 @@ def __init__(self, num_envs=16, width=640, height=360, self.render_mode = render_mode self.tick = 0 - # Calculate cell_size for rendering self.cell_size = int(np.ceil(1280 / max(max(width), max(height)))) super().__init__(buf) - self.c_envs = binding.vec_init(self.observations, self.actions, - self.rewards, self.terminals, self.truncations, - num_envs, seed, width=width, height=height, - num_snakes=num_snakes, num_food=num_food, vision=vision, - max_snake_length=max_snake_length, - leave_corpse_on_death=leave_corpse_on_death, - reward_food=reward_food, reward_corpse=reward_corpse, - reward_death=reward_death, cell_size=self.cell_size) + self.c_envs = CySnake(self.observations, self.actions, + self.rewards, self.terminals, width, height, + num_snakes, num_food, vision, max_snake_length, + leave_corpse_on_death, reward_food, reward_corpse, + reward_death) def reset(self, seed=None): + self.c_envs.reset() self.tick = 0 - if seed is None: - binding.vec_reset(self.c_envs, 0) - else: - binding.vec_reset(self.c_envs, seed) return self.observations, [] def step(self, actions): self.actions[:] = actions - self.tick += 1 - binding.vec_step(self.c_envs) + self.c_envs.step() info = [] if self.tick % self.report_interval == 0: - info.append(binding.vec_log(self.c_envs)) + log = self.c_envs.log() + if log['episode_length'] > 0: + info.append(log) return (self.observations, self.rewards, self.terminals, self.truncations, info) def render(self): - binding.vec_render(self.c_envs, self.cell_size) + self.c_envs.render(self.cell_size) def close(self): - binding.vec_close(self.c_envs) + self.c_envs.close() def test_performance(timeout=10, atn_cache=1024): env = Snake() env.reset() tick = 0 - total_snakes = env.num_agents + total_snakes = sum(env.num_snakes) actions = np.random.randint(0, 4, (atn_cache, total_snakes)) import time From 8c85ed73f16e4e8b067e93ac1de9ebdb0f30f77f Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Sat, 3 May 2025 03:37:05 +0000 Subject: [PATCH 7/8] snake change to setup.py reverted --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 842424ebd1..436b7ef44a 100644 --- a/setup.py +++ b/setup.py @@ -269,7 +269,7 @@ 'pufferlib/ocean/moba/cy_moba', # 'pufferlib/ocean/tactical/c_tactical', #'pufferlib/ocean/squared/cy_squared', - #'pufferlib/ocean/snake/cy_snake', + 'pufferlib/ocean/snake/cy_snake', 'pufferlib/ocean/gpudrive/cy_gpudrive', #'pufferlib/ocean/pong/cy_pong', # 'pufferlib/ocean/breakout/cy_breakout', From 86f950866342760413410f1fb0848c31233c3edc Mon Sep 17 00:00:00 2001 From: xinpw8 Date: Sat, 3 May 2025 19:22:32 +0000 Subject: [PATCH 8/8] remove snake from setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 436b7ef44a..717a9660e1 100644 --- a/setup.py +++ b/setup.py @@ -315,7 +315,7 @@ #c_args += "-Wsign-compare -DNDEBUG -g -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC".split() -pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole', 'snake', 'tripletriad'] +pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole', 'tripletriad'] extensions += [ Extension(