diff --git a/config/ocean/cpr.ini b/config/ocean/cpr.ini
index 349b87d19a..ee33760075 100644
--- a/config/ocean/cpr.ini
+++ b/config/ocean/cpr.ini
@@ -5,16 +5,63 @@ vec = multiprocessing
 rnn_name = Recurrent
 
 [env]
-num_envs = 256
+num_envs = 512
 vision = 3
-widths = [32]
-heights = [32]
-num_agents = [8]
+num_agents = [12]
 report_interval=1
-reward_food = 1.0
-interactive_food_reward = 5.0
-reward_move = -0.01
+reward_food = 0.1
+interactive_food_reward = 0.2
+reward_move = +0.00
 food_base_spawn_rate = 2e-3
 
 [train]
-total_timesteps = 100_000_000
+num_envs = 1
+num_workers = 1
+total_timesteps = 60_000_000
+device = cpu
+batch_size = 32768
+minibatch_size = 8192
+bptt_horizon = 16
+checkpoint_interval = 200
+learning_rate = 0.0008524
+gamma = 0.9989
+gae_lambda = 0.99
+vf_coef = 1
+ent_coef = 0.01
+adam_beta1 = 0.9
+adam_beta2 = 0.999
+adam_eps = 1e-12
+max_grad_norm = 0.5
+vf_clip_coef = 0.1
+update_epochs = 1
+
+[workspace]
+name = boxingbytes
+project = pufferai
+
+[sweep.metric]
+goal = maximize
+name = score 
+min = -10 
+max = 10 
+
+[sweep.env.reward_food]
+distribution = log_normal
+min = 0.0001
+max = 0.01
+mean = 0.001
+scale = auto 
+
+[sweep.env.interactive_food_reward]
+distribution = log_normal
+min = 0.0001
+max = 0.02
+mean = 0.002
+scale = auto
+
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 50e6
+max = 75e6
+mean = 60e6
+scale = time
\ No newline at end of file
diff --git a/pufferlib/ocean/cpr/cpr.c b/pufferlib/ocean/cpr/cpr.c
index 1c4f684452..f7896a9b60 100644
--- a/pufferlib/ocean/cpr/cpr.c
+++ b/pufferlib/ocean/cpr/cpr.c
@@ -3,16 +3,16 @@
 #include <unistd.h>
 
 int main() {
-  int width = 24;
-  int height = 24;
+  int width = 32;
+  int height = 32;
 
   int render_cell_size = 32;
 
   CCpr env = {
-      .num_agents = 8,
+      .num_agents = 1,
       .width = width,
       .height = height,
-      .vision = 2,
+      .vision = 3,
       .reward_food = 1.0f,
       .interactive_food_reward = 5.0f,
       .food_base_spawn_rate = 2e-3,
@@ -21,9 +21,9 @@ int main() {
   c_reset(&env);
 
   Renderer *renderer = init_renderer(render_cell_size, width, height);
-
   while (!WindowShouldClose()) {
 
+    c_render(renderer, &env);
     int st = 0;
     // User can take control of the first puffer
     if (IsKeyDown(KEY_LEFT_SHIFT)) {
@@ -41,12 +41,10 @@ int main() {
       sleep(2);
     }
     for (int i = st; i < env.num_agents; i++) {
-      env.actions[i] = rand() % 4;
+      env.actions[i] = rand() % 5;
       // printf("Agent %d gets actions %d\n", i, env->actions[i]);
     }
     c_step(&env);
-
-    c_render(renderer, &env);
   }
   close_renderer(renderer);
   free_CCpr(&env);
diff --git a/pufferlib/ocean/cpr/cpr.h b/pufferlib/ocean/cpr/cpr.h
index 5db37c4c9f..9509027374 100644
--- a/pufferlib/ocean/cpr/cpr.h
+++ b/pufferlib/ocean/cpr/cpr.h
@@ -9,6 +9,8 @@
 
 #include "raylib.h"
 
+#include "grid.h"
+
 #define EMPTY 0
 #define NORMAL_FOOD 1
 #define INTERACTIVE_FOOD 2
@@ -23,6 +25,21 @@
 #define CHECK_BIT(arr, i) (arr[(i) / 8] & (1 << ((i) % 8)))
 #define min(a, b) ((a) < (b) ? (a) : (b))
 
+#define REWARD_20_HP -0
+#define REWARD_80_HP 0
+#define REWARD_DEATH -1.0f
+
+
+#define LOG_SCORE_REWARD_SMALL 1 
+#define LOG_SCORE_REWARD_MEDIUM 5
+#define LOG_SCORE_REWARD_MOVE - 0.0
+#define LOG_SCORE_REWARD_DEATH -1
+
+#define HP_REWARD_FOOD_MEDIUM 50
+#define HP_REWARD_FOOD_SMALL 20
+#define HP_LOSS_PER_STEP 1
+#define MAX_HP 100 
+
 typedef struct Log Log;
 struct Log {
   float perf;
@@ -30,6 +47,9 @@ struct Log {
   float episode_return;
   float episode_length;
   float moves;
+  float food_nb;
+  float agents_alive;
+  float alive_steps;
   float n;
 };
 
@@ -74,6 +94,9 @@ Log aggregate_and_clear(LogBuffer *logs) {
     log.episode_return += logs->logs[i].episode_return;
     log.episode_length += logs->logs[i].episode_length;
     log.moves += logs->logs[i].moves;
+    log.food_nb += logs->logs[i].food_nb;
+    log.agents_alive += logs->logs[i].agents_alive;
+    log.alive_steps += logs->logs[i].alive_steps;
     log.n += 1;
   }
   log.score /= logs->idx;
@@ -81,6 +104,9 @@ Log aggregate_and_clear(LogBuffer *logs) {
   log.episode_return /= logs->idx;
   log.episode_length /= logs->idx;
   log.moves /= logs->idx;
+  log.food_nb /= logs->idx;
+  log.agents_alive /= logs->idx;
+  log.alive_steps /= logs->idx;
   log.n /= logs->idx;
   logs->idx = 0;
   return log;
@@ -91,6 +117,8 @@ struct Agent {
   int r;
   int c;
   int id;
+  float hp;
+  int direction;
 };
 
 typedef struct FoodList FoodList;
@@ -132,11 +160,13 @@ struct CCpr {
   int *actions;
   float *rewards;
   unsigned char *terminals;
+  unsigned char *truncations;
+  unsigned char *masks;
 
   Agent *agents;
 
   LogBuffer *log_buffer;
-  Log *logs;
+  Log *log;
 
   uint8_t *interactive_food_agent_count;
 
@@ -149,8 +179,8 @@ void init_ccpr(CCpr *env) {
       (unsigned char *)calloc(env->width * env->height, sizeof(unsigned char));
   env->agents = (Agent *)calloc(env->num_agents, sizeof(Agent));
   env->vision_window = 2 * env->vision + 1;
-  env->obs_size = env->vision_window * env->vision_window;
-  env->logs = (Log *)calloc(env->num_agents, sizeof(Log));
+  env->obs_size = env->vision_window * env->vision_window;// + 1;
+  env->log = (Log *)calloc(1, sizeof(Log));
   env->interactive_food_agent_count =
       (uint8_t *)calloc((env->width * env->height + 7) / 8, sizeof(uint8_t));
   env->foods = allocate_foodlist(env->width * env->height);
@@ -158,13 +188,15 @@ void init_ccpr(CCpr *env) {
 
 void allocate_ccpr(CCpr *env) {
   // Called by C stuff
-  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1);
+  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1); //+ 1;
   env->observations = (unsigned char *)calloc(env->num_agents * obs_size,
                                               sizeof(unsigned char));
   env->actions = (int *)calloc(env->num_agents, sizeof(unsigned int));
   env->rewards = (float *)calloc(env->num_agents, sizeof(float));
   env->terminals =
       (unsigned char *)calloc(env->num_agents, sizeof(unsigned char));
+  env->truncations = (unsigned char*)calloc(env->num_agents, sizeof(unsigned char));
+  env->masks = (unsigned char *)calloc(env->num_agents, sizeof(unsigned char));
   env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE);
   init_ccpr(env);
 }
@@ -180,8 +212,10 @@ void free_CCpr(CCpr *env) {
   free(env->actions);
   free(env->rewards);
   free(env->terminals);
+  free(env->truncations);
+  free(env->masks);
   free_logbuffer(env->log_buffer);
-  free(env->logs);
+  free(env->log);
   free(env->interactive_food_agent_count);
   free_foodlist(env->foods);
 }
@@ -193,11 +227,21 @@ int get_agent_id_from_tile(int tile) { return tile - AGENTS; }
 
 void add_food(CCpr *env, int grid_idx, int food_type) {
   // Add food to the grid and the food_list at grid_idx
+  assert(env->grid[grid_idx] == EMPTY);
   env->grid[grid_idx] = food_type;
   FoodList *foods = env->foods;
   foods->indexes[foods->size++] = grid_idx;
 }
 
+void reward_agent(CCpr *env, int agent_id, float reward) {
+  // We don't reward if agent is full life
+  // Agent *agent = &env->agents[agent_id];
+  // if (agent->hp >= MAX_HP) {
+  //   return;
+  // }
+  env->rewards[agent_id] += reward;
+}
+
 void spawn_food(CCpr *env, int food_type) {
   // Randomly spawns such food in the grid
   int idx, tile;
@@ -232,24 +276,10 @@ void init_foods(CCpr *env) {
   int normal = available_tiles / (20 * normalizer);
   int interactive = available_tiles / (50 * normalizer);
   for (int i = 0; i < normal; i++) {
-    int idx, tile;
-    do {
-      int r = rand() % (env->height - 1);
-      int c = rand() % (env->width - 1);
-      idx = r * env->width + c;
-      tile = env->grid[idx];
-    } while (tile != EMPTY);
-    add_food(env, idx, NORMAL_FOOD);
+    spawn_food(env, NORMAL_FOOD);
   }
   for (int i = 0; i < interactive; i++) {
-    int idx, tile;
-    do {
-      int r = rand() % (env->height - 1);
-      int c = rand() % (env->width - 1);
-      idx = r * env->width + c;
-      tile = env->grid[idx];
-    } while (tile != EMPTY);
-    add_food(env, idx, INTERACTIVE_FOOD);
+    spawn_food(env, INTERACTIVE_FOOD);
   }
 }
 
@@ -267,42 +297,53 @@ void spawn_foods(CCpr *env) {
     for (int ri = 0; ri < 3; ri++) {
       for (int ci = 0; ci < 3; ci++) {
         int grid_idx = grid_index(env, (r + ri), (c + ci));
-        if (env->grid[grid_idx] == EMPTY) {
-          switch (env->grid[idx]) {
-          // %Chance spawning new food
-          case NORMAL_FOOD:
-            if ((rand() / (double)RAND_MAX) < env->food_base_spawn_rate) {
-              add_food(env, grid_idx, env->grid[idx]);
-            }
-            break;
-          case INTERACTIVE_FOOD:
-            if ((rand() / (double)RAND_MAX) <
-                (env->food_base_spawn_rate / 10.0)) {
-              add_food(env, grid_idx, env->grid[idx]);
-            }
-            break;
+        if (env->grid[grid_idx] != EMPTY) {
+          continue;
+        }
+        switch (env->grid[idx]) {
+        // %Chance spawning new food
+        case NORMAL_FOOD:
+          if ((rand() / (double)RAND_MAX) < env->food_base_spawn_rate) {
+            add_food(env, grid_idx, env->grid[idx]);
+          }
+          break;
+        case INTERACTIVE_FOOD:
+          if ((rand() / (double)RAND_MAX) <
+              (env->food_base_spawn_rate / 10.0)) {
+            add_food(env, grid_idx, env->grid[idx]);
           }
+          break;
         }
       }
     }
   }
 
-  // Each turn there is random probability for a food to spawn at a random
-  // location To cope with resource depletion
-  int normalizer = (env->width * env->height) / 576;
-  if ((rand() / (double)RAND_MAX) <
-      min((env->food_base_spawn_rate * 2 * normalizer), 1e-2)) {
-    spawn_food(env, NORMAL_FOOD);
-  }
-  if ((rand() / (double)RAND_MAX) <
-      min((env->food_base_spawn_rate / 5.0 * normalizer), 5e-3)) {
-    spawn_food(env, INTERACTIVE_FOOD);
-  }
+  // // Each turn there is random probability for a food to spawn at a random
+  // // location To cope with resource depletion
+  // int normalizer = (env->width * env->height) / 576;
+  // if ((rand() / (double)RAND_MAX) <
+  //     min((env->food_base_spawn_rate * 2 * normalizer), 1e-2)) {
+  //   spawn_food(env, NORMAL_FOOD);
+  // }
+  // if ((rand() / (double)RAND_MAX) <
+  //     min((env->food_base_spawn_rate / 5.0 * normalizer), 5e-3)) {
+  //   spawn_food(env, INTERACTIVE_FOOD);
+  // }
 }
 
 void compute_observations(CCpr *env) {
+  // For full obs
+  // memcpy(env->observations, env->grid,
+  //        env->width * env->height * sizeof(unsigned char));
+  // return;
+
+  // For partial obs
   for (int i = 0; i < env->num_agents; i++) {
     Agent *agent = &env->agents[i];
+    // env->observations[env->vision_window*env->vision_window + i*env->obs_size] = agent->hp;
+    if (agent->hp == 0) {
+      continue;
+    }
     int obs_offset = i * env->obs_size;
     int r_offset = agent->r - env->vision;
     int c_offset = agent->c - env->vision;
@@ -313,75 +354,111 @@ void compute_observations(CCpr *env) {
         env->observations[obs_idx] = env->grid[grid_idx];
       }
     }
+
+    
   }
 }
 
-void c_reset(CCpr *env) {
-  env->tick = 0;
+void add_hp(CCpr *env, int agent_id, float hp) {
+  Agent *agent = &env->agents[agent_id];
+  agent->hp += hp;
+  if (agent->hp > MAX_HP) {
+    agent->hp = MAX_HP;
+  } else if (agent->hp <= 0) {
+    agent->hp = 0;
+    env->log->score += LOG_SCORE_REWARD_DEATH;
+    env->rewards[agent->id] += REWARD_DEATH;
+    env->terminals[agent->id] = 1;
+  }
+}
+void remove_hp(CCpr *env, int agent_id, float hp) { add_hp(env, agent_id, -hp); }
 
-  for (int r = 0; r < env->height; r++) {
-    for (int c = 0; c < env->width; c++) {
-      int adr = grid_index(env, r, c);
-      env->grid[adr] = EMPTY;
+void save_grid_to_file(CCpr *env, const char *filename) {
+    FILE *file = fopen(filename, "w");
+    if (!file) {
+        perror("Failed to open file");
+        return;
     }
-  }
+    fprintf(file, "#ifndef GRID_H\n#define GRID_H\n\n");
+    fprintf(file, "#define GRID_HEIGHT %d\n", env->height);
+    fprintf(file, "#define GRID_WIDTH %d\n\n", env->width);
+    fprintf(file, "static const unsigned char grid[GRID_HEIGHT][GRID_WIDTH] = {\n");
+
+    for (int r = 0; r < env->height; r++) {
+        fprintf(file, "    {");
+        for (int c = 0; c < env->width; c++) {
+            unsigned char val = env->grid[r * env->width + c];
+            fprintf(file, "0x%02X%s", val, (c == env->width - 1) ? "" : ", ");
+        }
+        fprintf(file, "}%s\n", (r == env->height - 1) ? "" : ",");
+    }
+    fprintf(file, "};\n\n#endif // GRID_H\n");
+    fclose(file);
+}
 
-  // Walls need to cover vision radius around the grid
+void make_grid_from_scratch(CCpr *env){
+  memset(env->grid, EMPTY, (env->height * env->width) * sizeof(env->grid[0]));
+  // top walling
   for (int r = 0; r < env->vision; r++) {
-    for (int c = 0; c < env->width; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width), WALL,
+           env->width * sizeof(env->grid[0]));
   }
+  // left side walling
   for (int r = 0; r < env->height; r++) {
-    for (int c = 0; c < env->vision; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width), WALL,
+           env->vision * sizeof(env->grid[0]));
   }
+  // bottom walling
   for (int r = env->height - env->vision; r < env->height; r++) {
-    for (int c = 0; c < env->width; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width), WALL,
+           env->width * sizeof(env->grid[0]));
   }
+
+  // right side walling
   for (int r = 0; r < env->height; r++) {
-    for (int c = env->width - env->vision; c < env->width; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width) + (env->width - env->vision), WALL,
+           env->vision * sizeof(env->grid[0]));
   }
+  save_grid_to_file(env, "grid.h");
+}
 
-  // Agents
-  srand(time(NULL));
-  for (int i = 0; i < env->num_agents; i++) {
-    env->logs[i] = (Log){0};
-
-    Agent *agent = &env->agents[i];
-
-    agent->id = i;
-
-    int adr = 0;
-    bool allocated = false;
-
-    // Random allocation
-    while (!allocated) {
-
-      adr = rand() % (env->height * env->width);
-
-      if (env->grid[adr] == EMPTY) {
-        int r = adr / env->width;
-        int c = adr % env->width;
-        agent->r = r;
-        agent->c = c;
-        allocated = true;
-      }
+void spawn_agent(CCpr *env, int i){
+  Agent *agent = &env->agents[i];
+  agent->id = i;
+  agent->hp = 80;
+  int adr = 0;
+
+  bool allocated = false;
+  while (!allocated) {
+    adr = rand() % (env->height * env->width);
+    if (env->grid[adr] == EMPTY) {
+      int r = adr / env->width;
+      int c = adr % env->width;
+      agent->r = r;
+      agent->c = c;
+      allocated = true;
     }
+  }
+  assert(env->grid[adr] == EMPTY);
+  env->grid[adr] = get_agent_tile_from_id(agent->id);
+}
+void c_reset(CCpr *env) {
+  env->tick = 0;
+  memset(env->log, 0, sizeof(Log));
+  env->foods->size = 0;
+  memset(env->foods->indexes, 0, env->width * env->height * sizeof(int));
+  // make_grid_from_scratch(env);
+  memcpy(env->grid, grid_32_32_3v, env->width * env->height * sizeof(unsigned char));
 
-    assert(env->grid[adr] == EMPTY);
-
-    env->grid[adr] = get_agent_tile_from_id(agent->id);
+  for (int i = 0; i < env->num_agents; i++) {
+    spawn_agent(env, i);
   }
 
   init_foods(env);
-
+  memset(env->observations, 0, env->num_agents * env->obs_size * sizeof(unsigned char));
+  memset(env->truncations, 0, env->num_agents * sizeof(unsigned char));
   memset(env->terminals, 0, env->num_agents * sizeof(unsigned char));
+  memset(env->masks, 1, env->num_agents * sizeof(unsigned char));
   compute_observations(env);
 }
 
@@ -397,23 +474,20 @@ void reward_agents_near(CCpr *env, int food_index) {
 
     if ((ac == food_c && (ar == food_r - 1 || ar == food_r + 1)) ||
         (ar == food_r && (ac == food_c - 1 || ac == food_c + 1))) {
-      env->rewards[i] += env->interactive_food_reward;
-      env->logs[i].score += 1;
-      add_log(env->log_buffer, &env->logs[i]);
-      env->logs[i] = (Log){0};
+      reward_agent(env, i, env->interactive_food_reward);
+      env->log->score += LOG_SCORE_REWARD_MEDIUM;
+      add_hp(env, i, HP_REWARD_FOOD_MEDIUM);
+      // add_log(env->log_buffer, env->log);
+      // memset(env->log, 0, sizeof(Log));
     }
   }
-
-  // Empty grid cell
-  // env->grid[food_index] = EMPTY;
   remove_food(env, food_index);
-
-  // Spawn new interactive food
-  // spawn_interactive_food(env);
 }
 
 void step_agent(CCpr *env, int i) {
 
+  Agent *agent = &env->agents[i];
+
   int action = env->actions[i];
   env->logs[i].episode_length += 1;
 
@@ -423,23 +497,26 @@ void step_agent(CCpr *env, int i) {
   switch (action) {
   case 0:
     dr = -1;
+    agent->direction = 3;
     break; // UP
   case 1:
     dr = 1;
+    agent->direction = 1;
     break; // DOWN
   case 2:
     dc = -1;
+    agent->direction = 2;
     break; // LEFT
   case 3:
     dc = 1;
+    agent->direction = 0;
     break; // RIGHT
+  case 4:
+    return; // No moves
   }
-
-  if (action != 4)
-    env->logs[i].moves += 1;
+  env->log->moves += 1;
 
   // Get next row and column
-  Agent *agent = &env->agents[i];
 
   int next_r = agent->r + dr;
   int next_c = agent->c + dc;
@@ -450,7 +527,7 @@ void step_agent(CCpr *env, int i) {
 
   // Anything above should be obstacle
   if (tile >= INTERACTIVE_FOOD) {
-    env->logs[i].score += env->reward_move;
+    env->log->score += LOG_SCORE_REWARD_MOVE;
     env->rewards[i] += env->reward_move;
     next_r = agent->r;
     next_c = agent->c;
@@ -482,15 +559,15 @@ void step_agent(CCpr *env, int i) {
 
   switch (tile) {
   case NORMAL_FOOD:
-    env->logs[i].score += 1.0;
-    env->rewards[i] = env->reward_food;
-    // spawn_food(env);
+    env->log->score += LOG_SCORE_REWARD_SMALL;
+    reward_agent(env, i, env->reward_food);
+    add_hp(env, i, HP_REWARD_FOOD_SMALL);
     remove_food(env, next_grid_idx);
-    add_log(env->log_buffer, &env->logs[i]);
-    env->logs[i] = (Log){0};
+    // add_log(env->log_buffer, env->log);
+    // memset(env->log, 0, sizeof(Log));
     break;
   case EMPTY:
-    env->logs[i].score += env->reward_move;
+    env->log->score += LOG_SCORE_REWARD_MOVE;
     env->rewards[i] = env->reward_move;
     break;
   }
@@ -501,21 +578,81 @@ void step_agent(CCpr *env, int i) {
   env->grid[next_grid_idx] = agent_tile;
   agent->r = next_r;
   agent->c = next_c;
+
+  return;
+}
+
+void clear_agent(CCpr *env, int agent_id) {
+  Agent *agent = &env->agents[agent_id];
+  if (agent->r < 0 || agent->c < 0) {
+    return;
+  }
+  int grid_idx = grid_index(env, agent->r, agent->c);
+  env->grid[grid_idx] = EMPTY;
+  agent->r = -1;
+  agent->c = -1;
 }
 
 void c_step(CCpr *env) {
+  env->tick++;
 
+  // memset(env->truncations, 0, env->num_agents * sizeof(unsigned char));
   memset(env->rewards, 0, env->num_agents * sizeof(float));
+  // memset(env->terminals, 0, env->num_agents * sizeof(unsigned char));
   memset(env->interactive_food_agent_count, 0,
          (env->width * env->height + 7) / 8);
 
   for (int i = 0; i < env->num_agents; i++) {
+    if (env->agents[i].hp == 0) {
+      env->masks[i] = 0;
+      clear_agent(env, i);
+      continue;
+    }
     step_agent(env, i);
+    remove_hp(env, i, HP_LOSS_PER_STEP);
   }
 
   spawn_foods(env);
 
+  //We loop again here because in the future an entity might have attacked an agent in the process
+  int alive_agents = 0;
+  for (int i = 0; i < env->num_agents; i++) {
+    if (env->agents[i].hp > 0) {
+      alive_agents += 1;
+      if (env->agents[i].hp < 20) {
+        env->rewards[i] += REWARD_20_HP;
+        env->log->score += REWARD_20_HP;
+      } else if (env->agents[i].hp > 80) {
+        env->rewards[i] += REWARD_80_HP;
+        env->log->score += REWARD_80_HP;
+      }
+    } 
+    // else {
+      // int grid_idx = grid_index(env, env->agents[i].r, env->agents[i].c);
+      // env->grid[grid_idx] = EMPTY;
+      // spawn_agent(env, i);
+    // }
+  }
+  if (alive_agents == 0) {
+    env->log->moves = 0;
+  }else{
+    env->log->moves /= alive_agents;
+  }
+  env->log->food_nb = env->foods->size;
+  env->log->agents_alive = alive_agents;
+  env->log->alive_steps = env->tick;
   compute_observations(env);
+  
+  if (alive_agents == 0|| env->tick > 1000) {
+    c_reset(env);
+    if (alive_agents == 0) {
+      memset(env->terminals, 1, env->num_agents * sizeof(unsigned char)); 
+    } else {
+      memset(env->truncations, 1, env->num_agents * sizeof(unsigned char));
+    }
+  }
+  add_log(env->log_buffer, env->log);
+  memset(env->log, 0, sizeof(Log));
 }
 
 // Raylib client
@@ -536,12 +673,13 @@ Rectangle UV_COORDS[7] = {
     (Rectangle){384, 0, 128, 128},
 };
 
-typedef struct {
+typedef struct Renderer Renderer;
+struct Renderer {
   int cell_size;
   int width;
   int height;
   Texture2D puffer;
-} Renderer;
+};
 
 Renderer *init_renderer(int cell_size, int width, int height) {
   Renderer *renderer = (Renderer *)calloc(1, sizeof(Renderer));
@@ -581,15 +719,20 @@ void c_render(Renderer *renderer, CCpr *env) {
       } else if (tile == NORMAL_FOOD || tile == INTERACTIVE_FOOD) {
         DrawRectangle(c * ts, r * ts, ts, ts, COLORS[tile]);
       } else {
-        int u = 128 * (tile % 8);
-        int v = 128 * (tile / 8);
-        Rectangle source_rect = (Rectangle){u, v, 128, 128};
-        Rectangle dest_rect = (Rectangle){c * ts, r * ts, ts, ts};
+
         int agent_id = get_agent_id_from_tile(tile);
         int col_id = agent_id % (sizeof(COLORS) / sizeof(COLORS[0]));
         Color color = COLORS[col_id];
+        int starting_sprite_x = 0;
+        float rotation = env->agents[agent_id].direction * 90.0f;
+        if (rotation == 180) {
+          starting_sprite_x = 128;
+          rotation = 0;
+        }
+        Rectangle source_rect = (Rectangle){starting_sprite_x, 0, 128, 128};
+        Rectangle dest_rect = (Rectangle){c * ts + ts/2, r * ts + ts/2, ts, ts};        
         DrawTexturePro(renderer->puffer, source_rect, dest_rect,
-                       (Vector2){0, 0}, 0, color);
+                       (Vector2){ts/2, ts/2}, rotation, color);
       }
     }
   }
diff --git a/pufferlib/ocean/cpr/cpr.py b/pufferlib/ocean/cpr/cpr.py
index 0d15d1eb43..a651b85953 100644
--- a/pufferlib/ocean/cpr/cpr.py
+++ b/pufferlib/ocean/cpr/cpr.py
@@ -6,16 +6,16 @@
 
 class PyCPR(pufferlib.PufferEnv):
     def __init__(self, 
-                num_envs = 1,
+                num_envs=1,
                 widths=[32],
                 heights=[32], 
                 num_agents=[8],  
-                vision=2, 
+                vision=3, 
                 reward_food=1.0, 
                 interactive_food_reward=5.0,
                 reward_move=-0.01,
                 food_base_spawn_rate=2e-3,
-                report_interval=250,
+                report_interval=1,
                 render_mode=None, 
                 buf=None,
                 seed=0,
@@ -24,7 +24,7 @@ def __init__(self,
         heights = num_envs*heights 
         num_agents = num_envs*num_agents 
 
-        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1),(2*vision+1)), dtype=np.uint8)
+        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1)*(2*vision+1),), dtype=np.uint8)
         self.single_action_space = gymnasium.spaces.Discrete(5)
         self.render_mode = render_mode
         self.num_agents = sum(num_agents)
@@ -39,6 +39,8 @@ def __init__(self,
             self.actions, 
             self.rewards, 
             self.terminals,
+            self.truncations,
+            self.masks,
             widths,
             heights,
             num_agents,
@@ -79,13 +81,16 @@ def close(self):
     timeout=30
 
     tot_agents = env.num_agents
-    actions = np.random.randint(0,4,(1024,tot_agents))
+    actions = np.random.randint(0,5,(1024,tot_agents))
 
     import time 
     start = time.time()
-    while time.time() - start < timeout:
+    # while time.time() - start < timeout:
+    while tick < 500:
         atns = actions[tick % 1024]
         env.step(atns)
+        if -1 in env.rewards:
+            breakpoint()
         # env.render()
         tick += 1
 
diff --git a/pufferlib/ocean/cpr/cy_cpr.pyx b/pufferlib/ocean/cpr/cy_cpr.pyx
index ce457e1893..4aa9bf0e04 100644
--- a/pufferlib/ocean/cpr/cy_cpr.pyx
+++ b/pufferlib/ocean/cpr/cy_cpr.pyx
@@ -18,6 +18,9 @@ cdef extern from "cpr.h":
         float episode_return
         float episode_length
         float moves
+        float food_nb
+        float agents_alive
+        float alive_steps
         float n
 
     ctypedef struct LogBuffer: 
@@ -33,6 +36,8 @@ cdef extern from "cpr.h":
         int r 
         int c 
         int id 
+        int hp 
+        int direction 
 
     ctypedef struct FoodList:
         int *indexes 
@@ -57,11 +62,13 @@ cdef extern from "cpr.h":
         int *actions
         float *rewards
         unsigned char *terminals
+        unsigned char *masks
+        unsigned char *truncations
 
         Agent *agents
 
         LogBuffer *log_buffer
-        Log *logs
+        Log *log
 
         uint8_t *interactive_food_agent_count
         float interactive_food_reward
@@ -90,9 +97,12 @@ cdef class CyEnv:
         LogBuffer *logs 
         int num_envs 
 
-    def __init__(self, unsigned char[:,:,:] observations, int[:] actions, float[:] rewards, unsigned char[:] terminals,
-     list widths, list heights, list num_agents,int vision, 
-     float reward_food,float interactive_food_reward,float reward_move, float food_base_spawn_rate) -> None:
+    def __init__(
+        self, unsigned char[:,:] observations, int[:] actions, float[:] rewards,
+        unsigned char[:] terminals, unsigned char[:] truncations, unsigned char[:] masks,
+        list widths, list heights, list num_agents,int vision, 
+        float reward_food,float interactive_food_reward,float reward_move, float food_base_spawn_rate
+    ) -> None:
         self.num_envs = len(num_agents)
         self.envs = <CCpr*>calloc(self.num_envs, sizeof(CCpr))
         self.logs = allocate_logbuffer(LOG_BUFFER_SIZE)
@@ -101,10 +111,12 @@ cdef class CyEnv:
         cdef int n = 0 
         for i in range(self.num_envs):
             self.envs[i] = CCpr(
-                observations = &observations[n,0,0],
+                observations = &observations[n,0],
                 actions=&actions[n],
                 rewards=&rewards[n],
                 terminals=&terminals[n],
+                truncations=&truncations[n],
+                masks=&masks[n],
                 log_buffer=self.logs,
                 width=widths[i],
                 height=heights[i],
diff --git a/pufferlib/ocean/cpr/grid.h b/pufferlib/ocean/cpr/grid.h
new file mode 100644
index 0000000000..7682999664
--- /dev/null
+++ b/pufferlib/ocean/cpr/grid.h
@@ -0,0 +1,42 @@
+#ifndef GRID_H
+#define GRID_H
+
+#define GRID_HEIGHT 32
+#define GRID_WIDTH 32
+
+static const unsigned char grid_32_32_3v[GRID_HEIGHT][GRID_WIDTH] = {
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03}
+};
+
+#endif // GRID_H
diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py
index cac9b0ef61..49c2494a8d 100644
--- a/pufferlib/sweep.py
+++ b/pufferlib/sweep.py
@@ -740,4 +740,4 @@ def observe(self, hypers, score, cost, is_failure=False):
                 cost=cost,
                 is_failure=is_failure,
             )
-        )
+        )
\ No newline at end of file