diff --git a/pufferlib/ocean/cartpole/binding.c b/pufferlib/ocean/cartpole/binding.c new file mode 100644 index 0000000000..8bda2cfd1b --- /dev/null +++ b/pufferlib/ocean/cartpole/binding.c @@ -0,0 +1,20 @@ +#include "cartpole.h" +#define Env Cartpole +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->continuous = unpack(kwargs, "continuous"); + init(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "x_threshold_termination", log->x_threshold_termination); + assign_to_dict(dict, "pole_angle_termination", log->pole_angle_termination); + assign_to_dict(dict, "max_steps_termination", log->max_steps_termination); + assign_to_dict(dict, "n", log->n); + return 0; +} diff --git a/pufferlib/ocean/cartpole/cartpole.c b/pufferlib/ocean/cartpole/cartpole.c index c5f1960b80..43cef0de2c 100644 --- a/pufferlib/ocean/cartpole/cartpole.c +++ b/pufferlib/ocean/cartpole/cartpole.c @@ -1,4 +1,4 @@ -// local compile/eval not implemented +// local compile/eval implemented for discrete actions only // eval with python demo.py --mode eval --env puffer_cartpole --eval-mode-path #include @@ -11,8 +11,8 @@ #define NUM_WEIGHTS 133123 #define OBSERVATIONS_SIZE 4 #define ACTIONS_SIZE 2 -#define CONTINUOUS 1 -const char* WEIGHTS_PATH = "/puffertank/pufferlib/pufferlib/resources/cartpole/cartpole_weights.bin"; +#define CONTINUOUS 0 +const char* WEIGHTS_PATH = "/puffertank/test_newbind/pufferlib/pufferlib/resources/cartpole/cartpole_weights.bin"; float movement(int discrete_action, int userControlMode) { if (userControlMode) { @@ -26,14 +26,15 @@ void demo() { Weights* weights = load_weights(WEIGHTS_PATH, NUM_WEIGHTS); LinearLSTM* net; - if (CONTINUOUS) { - net = make_linearlstm_float(weights, 1, OBSERVATIONS_SIZE, ACTIONS_SIZE, ACTION_TYPE_FLOAT); - } else { - net = make_linearlstm(weights, 1, OBSERVATIONS_SIZE, ACTIONS_SIZE, ACTION_TYPE_INT); - } + // if (CONTINUOUS) { + // net = make_linearlstm_float(weights, 1, OBSERVATIONS_SIZE, ACTIONS_SIZE); + // } else { + // net = make_linearlstm_int(weights, 1, OBSERVATIONS_SIZE, ACTIONS_SIZE); + // } - CartPole env = {0}; - env.continuous = CONTINUOUS; + net = make_linearlstm(weights, 1, OBSERVATIONS_SIZE, ACTIONS_SIZE); + Cartpole env = {0}; + env.is_continuous = CONTINUOUS; allocate(&env); Client* client = make_client(&env); c_reset(&env); @@ -46,14 +47,17 @@ void demo() { int userControlMode = IsKeyDown(KEY_LEFT_SHIFT); if (!userControlMode) { - if (CONTINUOUS) { - forward_linearlstm_float(net, env.observations, env.actions); - env.actions[0] = tanhf(env.actions[0]); - } else { - int action_value; - forward_linearlstm_int(net, env.observations, &action_value); - env.actions[0] = movement(action_value, 0); - } + // if (CONTINUOUS) { + // forward_linearlstm_float(net, env.observations, env.actions); + // env.actions[0] = tanhf(env.actions[0]); + // } else { + // int action_value; + // forward_linearlstm_int(net, env.observations, &action_value); + // env.actions[0] = movement(action_value, 0); + // } + int action_value; + forward_linearlstm(net, env.observations, &action_value); + env.actions[0] = movement(action_value, 0); } else { env.actions[0] = movement(env.actions[0], userControlMode); } @@ -64,11 +68,11 @@ void demo() { BeginDrawing(); ClearBackground(RAYWHITE); - c_render(client, &env); + c_render(&env); DrawText("Evaluating policy...", 10, 160, 20, DARKGRAY); EndDrawing(); - if (env.dones[0]) { + if (env.terminals[0]) { printf("Episode done. Steps: %d, Return: %.2f\n\n", episode_steps, episode_return); episode_steps = 0; episode_return = 0.0f; diff --git a/pufferlib/ocean/cartpole/cartpole.h b/pufferlib/ocean/cartpole/cartpole.h index cb3f6cf9c3..bbcc56879e 100644 --- a/pufferlib/ocean/cartpole/cartpole.h +++ b/pufferlib/ocean/cartpole/cartpole.h @@ -19,127 +19,74 @@ #define THETA_THRESHOLD_RADIANS (12 * 2 * M_PI / 360) #define MAX_STEPS 200 #define WIDTH 600 -#define HEIGHT 800 -#define SCALE 100 // scaling for rendering +#define HEIGHT 200 +#define SCALE 100 -typedef struct Log { +typedef struct Log Log; +struct Log { float perf; - float score; - float episode_return; float episode_length; - int x_threshold_termination; - int pole_angle_termination; - int max_steps_termination; -} Log; - -typedef struct LogBuffer { - Log* logs; - int length; - int idx; -} LogBuffer; - -LogBuffer* allocate_logbuffer(int size) { - LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer)); - logs->logs = (Log*)calloc(size, sizeof(Log)); - logs->length = size; - logs->idx = 0; - return logs; -} - -void free_logbuffer(LogBuffer* buffer) { - if (buffer) { - free(buffer->logs); - free(buffer); - } -} - -void add_log(LogBuffer* logs, Log* log) { - if (logs->idx == logs->length) { - return; - } - logs->logs[logs->idx] = *log; - logs->idx++; -} + float x_threshold_termination; + float pole_angle_termination; + float max_steps_termination; + float n; + float score; +}; -Log aggregate_and_clear(LogBuffer* logs) { - Log log = {0}; - if (logs->idx == 0) { - return log; - } - for (int i = 0; i < logs->idx; i++) { - log.episode_return += logs->logs[i].episode_return; - log.episode_length += logs->logs[i].episode_length; - log.x_threshold_termination += logs->logs[i].x_threshold_termination; - log.pole_angle_termination += logs->logs[i].pole_angle_termination; - log.max_steps_termination += logs->logs[i].max_steps_termination; - log.score += logs->logs[i].episode_length; - log.perf += logs->logs[i].episode_length / (float)MAX_STEPS; - } - log.episode_return /= logs->idx; - log.episode_length /= logs->idx; - log.x_threshold_termination /= logs->idx; - log.pole_angle_termination /= logs->idx; - log.max_steps_termination /= logs->idx; - log.score /= logs->idx; - log.perf /= logs->idx; - logs->idx = 0; - return log; -} +typedef struct Client Client; +struct Client { +}; -typedef struct CartPole { - float* observations; // [x, x_dot, theta, theta_dot] - float* actions; // float for cont support. action: 0 (L) or 1 (R) +typedef struct Cartpole Cartpole; +struct Cartpole { + float* observations; + float* actions; float* rewards; - unsigned char* dones; - LogBuffer* log_buffer; + unsigned char* terminals; + unsigned char* truncations; Log log; + Client* client; + float x; + float x_dot; + float theta; + float theta_dot; + int tick; + int continuous; + float episode_return; +}; - // Environment state variables - float x; // cart position - float x_dot; // cart velocity - float theta; // pole angle - float theta_dot; // pole angular velocity - - int steps_beyond_done; // -1 means not done yet, 0 means just done, >0 means stepping after done - int steps; // step counter for current episode - - // Control parameters - int continuous; // set in cartpole.py -} CartPole; - -typedef struct Client { -} Client; - -void init(CartPole* env) { - env->steps = 0; - env->steps_beyond_done = -1; - if (!env->log_buffer) - env->log_buffer = allocate_logbuffer(1024); +void add_log(Cartpole* env) { + if (env->episode_return > 0) { + env->log.perf = env->episode_return / MAX_STEPS; + } else { + env->log.perf = 0.0f; + } + env->log.episode_length += env->tick; + env->log.score += env->tick; + env->log.x_threshold_termination += (env->x < -X_THRESHOLD || env->x > X_THRESHOLD); + env->log.pole_angle_termination += (env->theta < -THETA_THRESHOLD_RADIANS || env->theta > THETA_THRESHOLD_RADIANS); + env->log.max_steps_termination += (env->tick >= MAX_STEPS); + env->log.n += 1; } -void free_initialized(CartPole* env) { - if (env->log_buffer) { - free_logbuffer(env->log_buffer); - env->log_buffer = NULL; - } +void init(Cartpole* env) { + env->tick = 0; + memset(&env->log, 0, sizeof(Log)); } -void allocate(CartPole* env) { +void allocate(Cartpole* env) { init(env); env->observations = (float*)calloc(4, sizeof(float)); env->actions = (float*)calloc(1, sizeof(float)); env->rewards = (float*)calloc(1, sizeof(float)); - env->dones = (unsigned char*)calloc(1, sizeof(unsigned char)); - if (!env->log_buffer) - env->log_buffer = allocate_logbuffer(1024); + env->terminals = (unsigned char*)calloc(1, sizeof(unsigned char)); } -void free_allocated(CartPole* env) { +void free_allocated(Cartpole* env) { free(env->observations); free(env->actions); free(env->rewards); - free(env->dones); - free_initialized(env); + free(env->terminals); } const Color PUFF_RED = (Color){187, 0, 0, 255}; @@ -147,9 +94,9 @@ const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; -Client* make_client(CartPole* env) { +Client* make_client(Cartpole* env) { Client* client = (Client*)calloc(1, sizeof(Client)); - InitWindow(WIDTH, HEIGHT, "puffer cartpole"); + InitWindow(WIDTH, HEIGHT, "puffer Cartpole"); SetTargetFPS(60); return client; } @@ -159,63 +106,76 @@ void close_client(Client* client) { free(client); } -void c_render(Client* client, CartPole* env) { +void c_render(Cartpole* env) { if (IsKeyDown(KEY_ESCAPE)) exit(0); if (IsKeyPressed(KEY_TAB)) ToggleFullscreen(); + if (env->client == NULL) { + env->client = make_client(env); + } + + Client* client = env->client; BeginDrawing(); ClearBackground(PUFF_BACKGROUND); - - // Draw track: a horizontal line through the middle - DrawLine(0, HEIGHT / 2, WIDTH, HEIGHT / 2, PUFF_CYAN); - - // Calculate cart position in pixels (centered) + DrawLine(0, HEIGHT / 1.5, WIDTH, HEIGHT / 1.5, PUFF_CYAN); float cart_x = WIDTH / 2 + env->x * SCALE; - float cart_y = HEIGHT / 2; - - // Draw cart as a rectangle (40x20) + float cart_y = HEIGHT / 1.6; DrawRectangle((int)(cart_x - 20), (int)(cart_y - 10), 40, 20, PUFF_CYAN); - - // Draw pole as a red line. Pole length = 2 * 0.5 scaled. float pole_length = 2.0f * 0.5f * SCALE; float pole_x2 = cart_x + sinf(env->theta) * pole_length; float pole_y2 = cart_y - cosf(env->theta) * pole_length; DrawLineEx((Vector2){cart_x, cart_y}, (Vector2){pole_x2, pole_y2}, 5, PUFF_RED); - - // Draw info text - DrawText(TextFormat("Steps: %i", env->steps), 10, 10, 20, PUFF_WHITE); + DrawText(TextFormat("Steps: %i", env->tick), 10, 10, 20, PUFF_WHITE); DrawText(TextFormat("Cart Position: %.2f", env->x), 10, 40, 20, PUFF_WHITE); DrawText(TextFormat("Pole Angle: %.2f", env->theta * 180.0f / M_PI), 10, 70, 20, PUFF_WHITE); - EndDrawing(); } -void compute_observations(CartPole* env) { +void compute_observations(Cartpole* env) { env->observations[0] = env->x; env->observations[1] = env->x_dot; env->observations[2] = env->theta; env->observations[3] = env->theta_dot; } -void c_reset(CartPole* env) { +void c_reset(Cartpole* env) { + env->episode_return = 0.0f; env->x = ((float)rand() / (float)RAND_MAX) * 0.08f - 0.04f; env->x_dot = ((float)rand() / (float)RAND_MAX) * 0.08f - 0.04f; env->theta = ((float)rand() / (float)RAND_MAX) * 0.08f - 0.04f; env->theta_dot = ((float)rand() / (float)RAND_MAX) * 0.08f - 0.04f; - env->steps = 0; - + env->tick = 0; + compute_observations(env); } -void c_step(CartPole* env) { - float force = 0.0; - if (env->continuous) { - force = env->actions[0] * FORCE_MAG; - } else { - force = (env->actions[0] > 0.5f) ? FORCE_MAG : -FORCE_MAG; +void c_step(Cartpole* env) { + // float force = 0.0; + // if (env->continuous) { + // force = env->actions[0] * FORCE_MAG; + // } else { + // force = (env->actions[0] > 0.5f) ? FORCE_MAG : -FORCE_MAG; + // } + + float a = env->actions[0]; + + /* ===== runtime sanity check –– delete after debugging ===== */ + if (!isfinite(a) || a < -1.0001f || a > 1.0001f) { + fprintf(stderr, + "[BAD ACTION] tick=%d raw=%.6f\n", + env->tick, a); + fflush(stderr); } + /* ========================================================== */ + + if (!isfinite(a)) a = 0.0f; + a = fminf(fmaxf(a, -1.0f), 1.0f); + env->actions[0] = a; + + float force = env->continuous ? a * FORCE_MAG + : (a > 0.5f ? FORCE_MAG : -FORCE_MAG); float costheta = cosf(env->theta); float sintheta = sinf(env->theta); @@ -230,25 +190,20 @@ void c_step(CartPole* env) { env->theta += TAU * env->theta_dot; env->theta_dot += TAU * thetaacc; - bool done = env->x < -X_THRESHOLD || env->x > X_THRESHOLD || - env->theta < -THETA_THRESHOLD_RADIANS || env->theta > THETA_THRESHOLD_RADIANS || - env->steps >= MAX_STEPS; + env->tick += 1; + + bool terminated = env->x < -X_THRESHOLD || env->x > X_THRESHOLD || + env->theta < -THETA_THRESHOLD_RADIANS || env->theta > THETA_THRESHOLD_RADIANS; + bool truncated = env->tick >= MAX_STEPS; + bool done = terminated || truncated; env->rewards[0] = done ? 0.0f : 1.0f; - env->dones[0] = done ? 1 : 0; - - env->steps += 1; + env->episode_return += env->rewards[0]; + env->terminals[0] = terminated ? 1 : 0; if (done) { - env->log.episode_return += env->steps; - env->log.episode_length = env->steps; - env->log.x_threshold_termination += (env->x < -X_THRESHOLD || env->x > X_THRESHOLD); - env->log.pole_angle_termination += (env->theta < -THETA_THRESHOLD_RADIANS || env->theta > THETA_THRESHOLD_RADIANS); - env->log.max_steps_termination += (env->steps >= MAX_STEPS); - - add_log(env->log_buffer, &env->log); + add_log(env); c_reset(env); - memset(&env->log, 0, sizeof(Log)); } compute_observations(env); diff --git a/pufferlib/ocean/cartpole/cartpole.py b/pufferlib/ocean/cartpole/cartpole.py index 354872e066..9b3eecca3c 100644 --- a/pufferlib/ocean/cartpole/cartpole.py +++ b/pufferlib/ocean/cartpole/cartpole.py @@ -1,60 +1,65 @@ import numpy as np import gymnasium import pufferlib -from pufferlib.ocean.cartpole.cy_cartpole import CyCartPole +from pufferlib.ocean.cartpole import binding class Cartpole(pufferlib.PufferEnv): - def __init__(self, num_envs=1, render_mode='human', report_interval=1, continuous=False, buf=None, seed=1): + def __init__(self, num_envs=1, render_mode='human', report_interval=1, continuous=False, buf=None, seed=0): self.render_mode = render_mode self.num_agents = num_envs self.report_interval = report_interval self.tick = 0 - self.is_continuous = continuous + self.continuous = continuous + self.human_action = None self.num_obs = 4 self.single_observation_space = gymnasium.spaces.Box( low=-np.inf, high=np.inf, shape=(self.num_obs,), dtype=np.float32 ) - if self.is_continuous: + if self.continuous: self.single_action_space = gymnasium.spaces.Box( low=-1.0, high=1.0, shape=(1,), dtype=np.float32 ) + else: self.single_action_space = gymnasium.spaces.Discrete(2) - super().__init__(buf=buf) - + super().__init__(buf) + self.actions = np.zeros(self.num_agents, dtype=np.float32) self.terminals = np.zeros(self.num_agents, dtype=np.uint8) self.truncations = np.zeros(self.num_agents, dtype=np.uint8) - self.c_envs = CyCartPole( + self.c_envs = binding.vec_init( self.observations, self.actions, self.rewards, self.terminals, + self.truncations, num_envs, - int(self.is_continuous), + int(self.continuous), ) def reset(self, seed=None): - self.tick = 0 - self.c_envs.reset() + self.tick = 0 + if seed is None: + binding.vec_reset(self.c_envs, 0) + else: + binding.vec_reset(self.c_envs, seed) return self.observations, [] def step(self, actions): - if self.is_continuous: + if self.continuous: self.actions[:] = np.clip(actions.flatten(), -1.0, 1.0) else: self.actions[:] = actions - - self.c_envs.step() + + self.tick += 1 + binding.vec_step(self.c_envs) + info = [] if self.tick % self.report_interval == 0: - log = self.c_envs.log() - if log['episode_length'] > 0: - info.append(log) - self.tick += 1 + info.append(binding.vec_log(self.c_envs)) return ( self.observations, @@ -65,20 +70,19 @@ def step(self, actions): ) def render(self): - if self.render_mode == 'human': - self.c_envs.render() + binding.vec_render(self.c_envs, 0) def close(self): - self.c_envs.close() + binding.vec_close(self.c_envs) -def test_performance(timeout=10, atn_cache=8192, is_continuous=True): +def test_performance(timeout=10, atn_cache=8192, continuous=True): """Benchmark environment performance.""" num_envs = 4096 - env = Cartpole(num_envs=num_envs, continuous=is_continuous) + env = Cartpole(num_envs=num_envs, continuous=continuous) env.reset() tick = 0 - if env.is_continuous: + if env.continuous: actions = np.random.uniform(-1, 1, (atn_cache, num_envs, 1)).astype(np.float32) else: actions = np.random.randint(0, env.single_action_space.n, (atn_cache, num_envs)).astype(np.int8) @@ -94,3 +98,4 @@ def test_performance(timeout=10, atn_cache=8192, is_continuous=True): if __name__ == '__main__': test_performance() + \ No newline at end of file diff --git a/pufferlib/ocean/cartpole/cy_cartpole.pyx b/pufferlib/ocean/cartpole/cy_cartpole.pyx deleted file mode 100644 index 4381e4183d..0000000000 --- a/pufferlib/ocean/cartpole/cy_cartpole.pyx +++ /dev/null @@ -1,106 +0,0 @@ -from libc.stdlib cimport calloc, free - -cdef extern from "cartpole.h": - ctypedef struct Log: - float perf - float score - float episode_return - float episode_length - int x_threshold_termination - int pole_angle_termination - int max_steps_termination - - ctypedef struct LogBuffer: - Log* logs - int length - int idx - - LogBuffer* allocate_logbuffer(int) - void free_logbuffer(LogBuffer*) - Log aggregate_and_clear(LogBuffer*) - - ctypedef struct CartPole: - float* observations - float* actions - float* rewards - unsigned char* dones - LogBuffer* log_buffer - Log log - float x - float x_dot - float theta - float theta_dot - int steps_beyond_done - int steps - int continuous - - ctypedef struct Client - - void init(CartPole* env) - void free_initialized(CartPole* env) - void allocate(CartPole* env) - void free_allocated(CartPole* env) - Client* make_client(CartPole* env) - void close_client(Client* client) - void c_render(Client* client, CartPole* env) - void c_reset(CartPole* env) - void c_step(CartPole* env) - -cdef class CyCartPole: - cdef: - Client* client - CartPole* envs - LogBuffer* logs - int num_envs - - def __init__(self, float[:, :] observations, float[:] actions, float[:] rewards, - unsigned char[:] dones, int num_envs, int continuous=0): - self.num_envs = num_envs - self.envs = calloc(num_envs, sizeof(CartPole)) - self.logs = allocate_logbuffer(1024) - self.client = NULL - - cdef int i - for i in range(num_envs): - self.envs[i].observations = &observations[i, 0] - self.envs[i].actions = &actions[i] - self.envs[i].rewards = &rewards[i] - self.envs[i].dones = &dones[i] - self.envs[i].log_buffer = self.logs - self.envs[i].continuous = continuous - init(&self.envs[i]) - - def reset(self): - cdef int i - for i in range(self.num_envs): - c_reset(&self.envs[i]) - - def step(self): - cdef int i - for i in range(self.num_envs): - c_step(&self.envs[i]) - - def render(self): - cdef CartPole* env = &self.envs[0] - if self.client == NULL: - import os - cwd = os.getcwd() - os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) - self.client = make_client(env) - os.chdir(cwd) - c_render(self.client, env) - - def close(self): - if self.client != NULL: - close_client(self.client) - self.client = NULL - if self.envs != NULL: - free(self.envs) - self.envs = NULL - if self.logs != NULL: - free_logbuffer(self.logs) - self.logs = NULL - - def log(self): - cdef Log log = aggregate_and_clear(self.logs) - return log diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 059e0774f4..2230c53692 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -111,7 +111,7 @@ static PyObject* env_init(PyObject* self, PyObject* args, PyObject* kwargs) { PyErr_SetString(PyExc_ValueError, "Truncations must be 1D"); return NULL; } - //env->truncations = PyArray_DATA(truncations); + // env->truncations = PyArray_DATA(truncations); PyObject* seed_arg = PyTuple_GetItem(args, 5); @@ -336,11 +336,15 @@ static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) { return NULL; } vec->envs[i] = env; + + // // Make sure the log is initialized to 0 + // memset(&env->log, 0, sizeof(Log)); + env->observations = (void*)((char*)PyArray_DATA(observations) + i*PyArray_STRIDE(observations, 0)); env->actions = (void*)((char*)PyArray_DATA(actions) + i*PyArray_STRIDE(actions, 0)); env->rewards = (void*)((char*)PyArray_DATA(rewards) + i*PyArray_STRIDE(rewards, 0)); env->terminals = (void*)((char*)PyArray_DATA(terminals) + i*PyArray_STRIDE(terminals, 0)); - //env->truncations = (void*)((char*)PyArray_DATA(truncations) + i*PyArray_STRIDE(truncations, 0)); + // env->truncations = (void*)((char*)PyArray_DATA(truncations) + i*PyArray_STRIDE(truncations, 0)); // Assumes each process has the same number of environments int env_seed = i + seed*vec->num_envs; @@ -484,9 +488,9 @@ static PyObject* vec_log(PyObject* self, PyObject* args) { int num_keys = sizeof(Log) / sizeof(float); for (int i = 0; i < vec->num_envs; i++) { Env* env = vec->envs[i]; - for (int i = 0; i < num_keys; i++) { - ((float*)&aggregate)[i] += ((float*)&env->log)[i]; - ((float*)&env->log)[i] = 0; + for (int j = 0; j < num_keys; j++) { + ((float*)&aggregate)[j] += ((float*)&env->log)[j]; + ((float*)&env->log)[j] = 0; } } diff --git a/setup.py b/setup.py index 165775c82a..d4d922c335 100644 --- a/setup.py +++ b/setup.py @@ -272,7 +272,7 @@ 'pufferlib/ocean/snake/cy_snake', #'pufferlib/ocean/pong/cy_pong', # 'pufferlib/ocean/breakout/cy_breakout', - 'pufferlib/ocean/cartpole/cy_cartpole', + # 'pufferlib/ocean/cartpole/cy_cartpole', 'pufferlib/ocean/connect4/cy_connect4', #'pufferlib/ocean/grid/cy_grid', 'pufferlib/ocean/tripletriad/cy_tripletriad', @@ -313,7 +313,7 @@ #c_args = ['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2'] #c_args += "-Wsign-compare -DNDEBUG -g -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC".split() -pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical'] +pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'cartpole'] extensions += [ Extension( f'pufferlib.ocean.{name}.binding',