From 8b4a98b5aba8f6aee59c0e8850db5cc393602f52 Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Thu, 27 Mar 2025 05:00:53 +0100
Subject: [PATCH 1/8] comma fix on setup.py

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 9ce4537428..6aeef4cdbc 100644
--- a/setup.py
+++ b/setup.py
@@ -267,8 +267,7 @@
     'pufferlib/ocean/go/cy_go',
     'pufferlib/ocean/rware/cy_rware',
     'pufferlib/ocean/trash_pickup/cy_trash_pickup',
-    'pufferlib/ocean/cpr/cy_cpr'
-    'pufferlib/ocean/tower_climb/cy_tower_climb',
+    'pufferlib/ocean/cpr/cy_cpr',
 ]
 
 system = platform.system()

From 9c7cd87ad3742061dae1d54ea9b8ae5aba79fc58 Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Thu, 27 Mar 2025 07:08:41 +0100
Subject: [PATCH 2/8] Improve cpr from PR comments

---
 pufferlib/ocean/cpr/cpr.h | 119 +++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 67 deletions(-)

diff --git a/pufferlib/ocean/cpr/cpr.h b/pufferlib/ocean/cpr/cpr.h
index 47a2d81d49..cfc5e01887 100644
--- a/pufferlib/ocean/cpr/cpr.h
+++ b/pufferlib/ocean/cpr/cpr.h
@@ -179,6 +179,7 @@ int get_agent_id_from_tile(int tile) { return tile - AGENTS; }
 
 void add_food(CCpr *env, int grid_idx, int food_type) {
   // Add food to the grid and the food_list at grid_idx
+  assert(env->grid[grid_idx] == EMPTY);
   env->grid[grid_idx] = food_type;
   FoodList *foods = env->foods;
   foods->indexes[foods->size++] = grid_idx;
@@ -218,24 +219,10 @@ void init_foods(CCpr *env) {
   int normal = available_tiles / (20 * normalizer);
   int interactive = available_tiles / (50 * normalizer);
   for (int i = 0; i < normal; i++) {
-    int idx, tile;
-    do {
-      int r = rand() % (env->height - 1);
-      int c = rand() % (env->width - 1);
-      idx = r * env->width + c;
-      tile = env->grid[idx];
-    } while (tile != EMPTY);
-    add_food(env, idx, NORMAL_FOOD);
+    spawn_food(env, NORMAL_FOOD);
   }
   for (int i = 0; i < interactive; i++) {
-    int idx, tile;
-    do {
-      int r = rand() % (env->height - 1);
-      int c = rand() % (env->width - 1);
-      idx = r * env->width + c;
-      tile = env->grid[idx];
-    } while (tile != EMPTY);
-    add_food(env, idx, INTERACTIVE_FOOD);
+    spawn_food(env, INTERACTIVE_FOOD);
   }
 }
 
@@ -253,21 +240,22 @@ void spawn_foods(CCpr *env) {
     for (int ri = 0; ri < 3; ri++) {
       for (int ci = 0; ci < 3; ci++) {
         int grid_idx = grid_index(env, (r + ri), (c + ci));
-        if (env->grid[grid_idx] == EMPTY) {
-          switch (env->grid[idx]) {
-          // %Chance spawning new food
-          case NORMAL_FOOD:
-            if ((rand() / (double)RAND_MAX) < env->food_base_spawn_rate) {
-              add_food(env, grid_idx, env->grid[idx]);
-            }
-            break;
-          case INTERACTIVE_FOOD:
-            if ((rand() / (double)RAND_MAX) <
-                (env->food_base_spawn_rate / 10.0)) {
-              add_food(env, grid_idx, env->grid[idx]);
-            }
-            break;
+        if (env->grid[grid_idx] != EMPTY) {
+          continue;
+        }
+        switch (env->grid[idx]) {
+        // %Chance spawning new food
+        case NORMAL_FOOD:
+          if ((rand() / (double)RAND_MAX) < env->food_base_spawn_rate) {
+            add_food(env, grid_idx, env->grid[idx]);
           }
+          break;
+        case INTERACTIVE_FOOD:
+          if ((rand() / (double)RAND_MAX) <
+              (env->food_base_spawn_rate / 10.0)) {
+            add_food(env, grid_idx, env->grid[idx]);
+          }
+          break;
         }
       }
     }
@@ -305,33 +293,28 @@ void compute_observations(CCpr *env) {
 void c_reset(CCpr *env) {
   env->tick = 0;
 
-  for (int r = 0; r < env->height; r++) {
-    for (int c = 0; c < env->width; c++) {
-      int adr = grid_index(env, r, c);
-      env->grid[adr] = EMPTY;
-    }
-  }
+  memset(env->grid, EMPTY, (env->height * env->width) * sizeof(env->grid[0]));
 
-  // Walls need to cover vision radius around the grid
+  // top walling
   for (int r = 0; r < env->vision; r++) {
-    for (int c = 0; c < env->width; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width), WALL,
+           env->width * sizeof(env->grid[0]));
   }
+  // left side walling
   for (int r = 0; r < env->height; r++) {
-    for (int c = 0; c < env->vision; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width), WALL,
+           env->vision * sizeof(env->grid[0]));
   }
+  // bottom walling
   for (int r = env->height - env->vision; r < env->height; r++) {
-    for (int c = 0; c < env->width; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width), WALL,
+           env->width * sizeof(env->grid[0]));
   }
+
+  // right side walling
   for (int r = 0; r < env->height; r++) {
-    for (int c = env->width - env->vision; c < env->width; c++) {
-      env->grid[r * env->width + c] = WALL;
-    }
+    memset(env->grid + (r * env->width) + (env->width - env->vision), WALL,
+           env->vision * sizeof(env->grid[0]));
   }
 
   // Agents
@@ -384,21 +367,15 @@ void reward_agents_near(CCpr *env, int food_index) {
     if ((ac == food_c && (ar == food_r - 1 || ar == food_r + 1)) ||
         (ar == food_r && (ac == food_c - 1 || ac == food_c + 1))) {
       env->rewards[i] += env->interactive_food_reward;
-      env->logs[i].score += env->interactive_food_reward;
+      env->logs[i].score += 5;
       add_log(env->log_buffer, &env->logs[i]);
       env->logs[i] = (Log){0};
     }
   }
-
-  // Empty grid cell
-  // env->grid[food_index] = EMPTY;
   remove_food(env, food_index);
-
-  // Spawn new interactive food
-  // spawn_interactive_food(env);
 }
 
-void step_agent(CCpr *env, int i) {
+bool step_agent(CCpr *env, int i) {
 
   int action = env->actions[i];
 
@@ -418,10 +395,10 @@ void step_agent(CCpr *env, int i) {
   case 3:
     dc = 1;
     break; // RIGHT
+  case 4:
+    return false; // No moves
   }
-
-  if (action != 4)
-    env->logs[i].moves += 1;
+  env->logs[i].moves += 1;
 
   // Get next row and column
   Agent *agent = &env->agents[i];
@@ -435,7 +412,7 @@ void step_agent(CCpr *env, int i) {
 
   // Anything above should be obstacle
   if (tile >= INTERACTIVE_FOOD) {
-    env->logs[i].score += env->reward_move;
+    env->logs[i].score += -0.01;
     env->rewards[i] += env->reward_move;
     next_r = agent->r;
     next_c = agent->c;
@@ -467,15 +444,14 @@ void step_agent(CCpr *env, int i) {
 
   switch (tile) {
   case NORMAL_FOOD:
-    env->logs[i].score += env->reward_food;
+    env->logs[i].score += 1;
     env->rewards[i] = env->reward_food;
-    // spawn_food(env);
     remove_food(env, next_grid_idx);
     add_log(env->log_buffer, &env->logs[i]);
     env->logs[i] = (Log){0};
     break;
   case EMPTY:
-    env->logs[i].score += env->reward_move;
+    env->logs[i].score += -0.01;
     env->rewards[i] = env->reward_move;
     break;
   }
@@ -486,6 +462,8 @@ void step_agent(CCpr *env, int i) {
   env->grid[next_grid_idx] = agent_tile;
   agent->r = next_r;
   agent->c = next_c;
+
+  return true;
 }
 
 void c_step(CCpr *env) {
@@ -494,8 +472,14 @@ void c_step(CCpr *env) {
   memset(env->interactive_food_agent_count, 0,
          (env->width * env->height + 7) / 8);
 
+  bool logged = false;
   for (int i = 0; i < env->num_agents; i++) {
-    step_agent(env, i);
+    logged = step_agent(env, i);
+  }
+  // To cope with sweeps waiting for logs, in case nothing moves
+  if (!logged) {
+    env->logs[0].score += 0;
+    add_log(env->log_buffer, &env->logs[0]);
   }
 
   spawn_foods(env);
@@ -521,12 +505,13 @@ Rectangle UV_COORDS[7] = {
     (Rectangle){384, 0, 128, 128},
 };
 
-typedef struct {
+typedef struct Renderer Renderer;
+struct Renderer {
   int cell_size;
   int width;
   int height;
   Texture2D puffer;
-} Renderer;
+};
 
 Renderer *init_renderer(int cell_size, int width, int height) {
   Renderer *renderer = (Renderer *)calloc(1, sizeof(Renderer));

From 27be9d2ae6a5c801b33af92472d8c3349d090114 Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Tue, 1 Apr 2025 19:32:06 +0200
Subject: [PATCH 3/8] make 1 log per env logging every step

---
 pufferlib/ocean/cpr/cpr.h | 52 ++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/pufferlib/ocean/cpr/cpr.h b/pufferlib/ocean/cpr/cpr.h
index cfc5e01887..4f7d2e21bb 100644
--- a/pufferlib/ocean/cpr/cpr.h
+++ b/pufferlib/ocean/cpr/cpr.h
@@ -122,7 +122,7 @@ struct CCpr {
   Agent *agents;
 
   LogBuffer *log_buffer;
-  Log *logs;
+  Log *log;
 
   uint8_t *interactive_food_agent_count;
 
@@ -136,7 +136,7 @@ void init_ccpr(CCpr *env) {
   env->agents = (Agent *)calloc(env->num_agents, sizeof(Agent));
   env->vision_window = 2 * env->vision + 1;
   env->obs_size = env->vision_window * env->vision_window;
-  env->logs = (Log *)calloc(env->num_agents, sizeof(Log));
+  env->log = (Log *)calloc(1, sizeof(Log));
   env->interactive_food_agent_count =
       (uint8_t *)calloc((env->width * env->height + 7) / 8, sizeof(uint8_t));
   env->foods = allocate_foodlist(env->width * env->height);
@@ -167,7 +167,7 @@ void free_CCpr(CCpr *env) {
   free(env->rewards);
   free(env->terminals);
   free_logbuffer(env->log_buffer);
-  free(env->logs);
+  free(env->log);
   free(env->interactive_food_agent_count);
   free_foodlist(env->foods);
 }
@@ -294,6 +294,7 @@ void c_reset(CCpr *env) {
   env->tick = 0;
 
   memset(env->grid, EMPTY, (env->height * env->width) * sizeof(env->grid[0]));
+  memset(env->log, 0, sizeof(Log));
 
   // top walling
   for (int r = 0; r < env->vision; r++) {
@@ -320,7 +321,7 @@ void c_reset(CCpr *env) {
   // Agents
   srand(time(NULL));
   for (int i = 0; i < env->num_agents; i++) {
-    env->logs[i] = (Log){0};
+    // env->log[0] = (Log){0};
 
     Agent *agent = &env->agents[i];
 
@@ -367,15 +368,15 @@ void reward_agents_near(CCpr *env, int food_index) {
     if ((ac == food_c && (ar == food_r - 1 || ar == food_r + 1)) ||
         (ar == food_r && (ac == food_c - 1 || ac == food_c + 1))) {
       env->rewards[i] += env->interactive_food_reward;
-      env->logs[i].score += 5;
-      add_log(env->log_buffer, &env->logs[i]);
-      env->logs[i] = (Log){0};
+      env->log->score += 5;
+      // add_log(env->log_buffer, env->log);
+      // memset(env->log, 0, sizeof(Log));
     }
   }
   remove_food(env, food_index);
 }
 
-bool step_agent(CCpr *env, int i) {
+void step_agent(CCpr *env, int i) {
 
   int action = env->actions[i];
 
@@ -396,9 +397,9 @@ bool step_agent(CCpr *env, int i) {
     dc = 1;
     break; // RIGHT
   case 4:
-    return false; // No moves
+    return; // No moves
   }
-  env->logs[i].moves += 1;
+  env->log->moves += 1;
 
   // Get next row and column
   Agent *agent = &env->agents[i];
@@ -412,7 +413,7 @@ bool step_agent(CCpr *env, int i) {
 
   // Anything above should be obstacle
   if (tile >= INTERACTIVE_FOOD) {
-    env->logs[i].score += -0.01;
+    env->log->score += -0.01;
     env->rewards[i] += env->reward_move;
     next_r = agent->r;
     next_c = agent->c;
@@ -444,14 +445,14 @@ bool step_agent(CCpr *env, int i) {
 
   switch (tile) {
   case NORMAL_FOOD:
-    env->logs[i].score += 1;
+    env->log->score += 1;
     env->rewards[i] = env->reward_food;
     remove_food(env, next_grid_idx);
-    add_log(env->log_buffer, &env->logs[i]);
-    env->logs[i] = (Log){0};
+    // add_log(env->log_buffer, env->log);
+    // memset(env->log, 0, sizeof(Log));
     break;
   case EMPTY:
-    env->logs[i].score += -0.01;
+    env->log->score += -0.01;
     env->rewards[i] = env->reward_move;
     break;
   }
@@ -463,7 +464,7 @@ bool step_agent(CCpr *env, int i) {
   agent->r = next_r;
   agent->c = next_c;
 
-  return true;
+  return;
 }
 
 void c_step(CCpr *env) {
@@ -472,19 +473,24 @@ void c_step(CCpr *env) {
   memset(env->interactive_food_agent_count, 0,
          (env->width * env->height + 7) / 8);
 
-  bool logged = false;
+  // bool logged = false;
   for (int i = 0; i < env->num_agents; i++) {
-    logged = step_agent(env, i);
-  }
-  // To cope with sweeps waiting for logs, in case nothing moves
-  if (!logged) {
-    env->logs[0].score += 0;
-    add_log(env->log_buffer, &env->logs[0]);
+    step_agent(env, i);
   }
 
+  add_log(env->log_buffer, env->log);
+  memset(env->log, 0, sizeof(Log));
+  // // To cope with sweeps waiting for logs, in case nothing moves
+  // if (!logged) {
+  //   add_log(env->log_buffer, env->log);
+  //   memset(env->log, 0, sizeof(Log));
+  // }
+
   spawn_foods(env);
 
   compute_observations(env);
+
+  env->tick++;
 }
 
 // Raylib client

From e5f94ec16e6dc949099e4e3e572c98faf7b6c284 Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Wed, 9 Apr 2025 07:33:26 +0200
Subject: [PATCH 4/8] working cpr basline

---
 config/ocean/cpr.ini           |  73 ++++++---
 pufferlib/ocean/cpr/cpr.c      |  14 +-
 pufferlib/ocean/cpr/cpr.h      | 288 +++++++++++++++++++++++++--------
 pufferlib/ocean/cpr/cpr.py     |  17 +-
 pufferlib/ocean/cpr/cy_cpr.pyx |  22 ++-
 pufferlib/ocean/cpr/grid.h     |  42 +++++
 pufferlib/sweep.py             | 120 +++++++++-----
 7 files changed, 427 insertions(+), 149 deletions(-)
 create mode 100644 pufferlib/ocean/cpr/grid.h

diff --git a/config/ocean/cpr.ini b/config/ocean/cpr.ini
index 6a04582c81..f2e94ec6aa 100644
--- a/config/ocean/cpr.ini
+++ b/config/ocean/cpr.ini
@@ -1,46 +1,67 @@
 [base]
 package = ocean
-env_name = cpr puffer_cpr
+env_name = cpr
 vec = multiprocessing
 rnn_name = Recurrent
 
 [env]
-num_envs = 1024
+num_envs = 512
 vision = 3
-widths = [32]
-heights = [32]
 num_agents = [12]
 report_interval=1
-reward_food = 1.0
-interactive_food_reward = 5.0
-reward_move = -0.01
+reward_food = 0.001
+interactive_food_reward = 0.002
+reward_move = +0.00
 food_base_spawn_rate = 2e-3
 
 [train]
-total_timesteps = 2_000_000
 num_envs = 1
 num_workers = 1
-env_batch_size = 1
-batch_size = 131072
-update_epochs = 1
-minibatch_size = 16384
-bptt_horizon = 8
-anneal_lr = False
-learning_rate=0.001
-gamma = 0.95
-gae_lambda = 0.85
-vf_ceof = 0.4
-clip_coef = 0.1
-vf_clip_coef = 0.1
-ent_coef = 0.01
-max_grad_norm = 0.86
-checkpoint_interval = 1000
+total_timesteps = 1_000_000_000
 device = cpu
+batch_size = 32768
+minibatch_size = 8192
+bptt_horizon = 16
+checkpoint_interval = 200
+learning_rate = 0.0008524
+gamma = 0.9989
+gae_lambda = 0.99
+vf_coef = 1
+ent_coef = 0.01
+adam_beta1 = 0.9
+adam_beta2 = 0.999
+adam_eps = 1e-12
+max_grad_norm = 0.5
+vf_clip_coef = 0.1
+update_epochs = 1
 
 [workspace]
 name = boxingbytes
 project = pufferai
-; [sweep.metric]
-; goal = maximize
-; name = environment/episode_return
 
+[sweep.metric]
+goal = maximize
+name = score 
+min = -10 
+max = 10 
+
+[sweep.env.reward_food]
+distribution = log_normal
+min = 0.0001
+max = 0.01
+mean = 0.001
+scale = auto 
+
+[sweep.env.interactive_food_reward]
+distribution = log_normal
+min = 0.0001
+max = 0.02
+mean = 0.002
+scale = auto
+
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 50e6
+max = 75e6
+mean = 60e6
+scale = time
\ No newline at end of file
diff --git a/pufferlib/ocean/cpr/cpr.c b/pufferlib/ocean/cpr/cpr.c
index 1c4f684452..f7896a9b60 100644
--- a/pufferlib/ocean/cpr/cpr.c
+++ b/pufferlib/ocean/cpr/cpr.c
@@ -3,16 +3,16 @@
 #include <unistd.h>
 
 int main() {
-  int width = 24;
-  int height = 24;
+  int width = 32;
+  int height = 32;
 
   int render_cell_size = 32;
 
   CCpr env = {
-      .num_agents = 8,
+      .num_agents = 1,
       .width = width,
       .height = height,
-      .vision = 2,
+      .vision = 3,
       .reward_food = 1.0f,
       .interactive_food_reward = 5.0f,
       .food_base_spawn_rate = 2e-3,
@@ -21,9 +21,9 @@ int main() {
   c_reset(&env);
 
   Renderer *renderer = init_renderer(render_cell_size, width, height);
-
   while (!WindowShouldClose()) {
 
+    c_render(renderer, &env);
     int st = 0;
     // User can take control of the first puffer
     if (IsKeyDown(KEY_LEFT_SHIFT)) {
@@ -41,12 +41,10 @@ int main() {
       sleep(2);
     }
     for (int i = st; i < env.num_agents; i++) {
-      env.actions[i] = rand() % 4;
+      env.actions[i] = rand() % 5;
       // printf("Agent %d gets actions %d\n", i, env->actions[i]);
     }
     c_step(&env);
-
-    c_render(renderer, &env);
   }
   close_renderer(renderer);
   free_CCpr(&env);
diff --git a/pufferlib/ocean/cpr/cpr.h b/pufferlib/ocean/cpr/cpr.h
index 4f7d2e21bb..8331954df1 100644
--- a/pufferlib/ocean/cpr/cpr.h
+++ b/pufferlib/ocean/cpr/cpr.h
@@ -8,6 +8,8 @@
 
 #include "raylib.h"
 
+#include "grid.h"
+
 #define EMPTY 0
 #define NORMAL_FOOD 1
 #define INTERACTIVE_FOOD 2
@@ -22,10 +24,28 @@
 #define CHECK_BIT(arr, i) (arr[(i) / 8] & (1 << ((i) % 8)))
 #define min(a, b) ((a) < (b) ? (a) : (b))
 
+#define REWARD_20_HP -0
+#define REWARD_80_HP 0
+#define REWARD_DEATH -0.0f
+
+
+#define LOG_SCORE_REWARD_SMALL 1 
+#define LOG_SCORE_REWARD_MEDIUM 5
+#define LOG_SCORE_REWARD_MOVE - 0.0
+#define LOG_SCORE_REWARD_DEATH -1
+
+#define HP_REWARD_FOOD_MEDIUM 50
+#define HP_REWARD_FOOD_SMALL 20
+#define HP_LOSS_PER_STEP 1
+#define MAX_HP 100 
+
 typedef struct Log Log;
 struct Log {
   float score;
   float moves;
+  float food_nb;
+  float agents_alive;
+  float alive_steps;
 };
 
 typedef struct LogBuffer LogBuffer;
@@ -65,9 +85,15 @@ Log aggregate_and_clear(LogBuffer *logs) {
   for (int i = 0; i < logs->idx; i++) {
     log.score += logs->logs[i].score;
     log.moves += logs->logs[i].moves;
+    log.food_nb += logs->logs[i].food_nb;
+    log.agents_alive += logs->logs[i].agents_alive;
+    log.alive_steps += logs->logs[i].alive_steps;
   }
   log.score /= logs->idx;
   log.moves /= logs->idx;
+  log.food_nb /= logs->idx;
+  log.agents_alive /= logs->idx;
+  log.alive_steps /= logs->idx;
   logs->idx = 0;
   return log;
 }
@@ -77,6 +103,8 @@ struct Agent {
   int r;
   int c;
   int id;
+  float hp;
+  int direction;
 };
 
 typedef struct FoodList FoodList;
@@ -118,6 +146,8 @@ struct CCpr {
   int *actions;
   float *rewards;
   unsigned char *terminals;
+  unsigned char *truncations;
+  unsigned char *masks;
 
   Agent *agents;
 
@@ -135,7 +165,7 @@ void init_ccpr(CCpr *env) {
       (unsigned char *)calloc(env->width * env->height, sizeof(unsigned char));
   env->agents = (Agent *)calloc(env->num_agents, sizeof(Agent));
   env->vision_window = 2 * env->vision + 1;
-  env->obs_size = env->vision_window * env->vision_window;
+  env->obs_size = env->vision_window * env->vision_window;// + 1;
   env->log = (Log *)calloc(1, sizeof(Log));
   env->interactive_food_agent_count =
       (uint8_t *)calloc((env->width * env->height + 7) / 8, sizeof(uint8_t));
@@ -144,13 +174,15 @@ void init_ccpr(CCpr *env) {
 
 void allocate_ccpr(CCpr *env) {
   // Called by C stuff
-  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1);
+  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1); //+ 1;
   env->observations = (unsigned char *)calloc(env->num_agents * obs_size,
                                               sizeof(unsigned char));
   env->actions = (int *)calloc(env->num_agents, sizeof(unsigned int));
   env->rewards = (float *)calloc(env->num_agents, sizeof(float));
   env->terminals =
       (unsigned char *)calloc(env->num_agents, sizeof(unsigned char));
+  env->truncations = (unsigned char*)calloc(env->num_agents, sizeof(unsigned char));
+  env->masks = (unsigned char *)calloc(env->num_agents, sizeof(unsigned char));
   env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE);
   init_ccpr(env);
 }
@@ -166,6 +198,8 @@ void free_CCpr(CCpr *env) {
   free(env->actions);
   free(env->rewards);
   free(env->terminals);
+  free(env->truncations);
+  free(env->masks);
   free_logbuffer(env->log_buffer);
   free(env->log);
   free(env->interactive_food_agent_count);
@@ -185,6 +219,15 @@ void add_food(CCpr *env, int grid_idx, int food_type) {
   foods->indexes[foods->size++] = grid_idx;
 }
 
+void reward_agent(CCpr *env, int agent_id, float reward) {
+  // We don't reward if agent is full life
+  // Agent *agent = &env->agents[agent_id];
+  // if (agent->hp >= MAX_HP) {
+  //   return;
+  // }
+  env->rewards[agent_id] += reward;
+}
+
 void spawn_food(CCpr *env, int food_type) {
   // Randomly spawns such food in the grid
   int idx, tile;
@@ -261,22 +304,32 @@ void spawn_foods(CCpr *env) {
     }
   }
 
-  // Each turn there is random probability for a food to spawn at a random
-  // location To cope with resource depletion
-  int normalizer = (env->width * env->height) / 576;
-  if ((rand() / (double)RAND_MAX) <
-      min((env->food_base_spawn_rate * 2 * normalizer), 1e-2)) {
-    spawn_food(env, NORMAL_FOOD);
-  }
-  if ((rand() / (double)RAND_MAX) <
-      min((env->food_base_spawn_rate / 5.0 * normalizer), 5e-3)) {
-    spawn_food(env, INTERACTIVE_FOOD);
-  }
+  // // Each turn there is random probability for a food to spawn at a random
+  // // location To cope with resource depletion
+  // int normalizer = (env->width * env->height) / 576;
+  // if ((rand() / (double)RAND_MAX) <
+  //     min((env->food_base_spawn_rate * 2 * normalizer), 1e-2)) {
+  //   spawn_food(env, NORMAL_FOOD);
+  // }
+  // if ((rand() / (double)RAND_MAX) <
+  //     min((env->food_base_spawn_rate / 5.0 * normalizer), 5e-3)) {
+  //   spawn_food(env, INTERACTIVE_FOOD);
+  // }
 }
 
 void compute_observations(CCpr *env) {
+  // For full obs
+  // memcpy(env->observations, env->grid,
+  //        env->width * env->height * sizeof(unsigned char));
+  // return;
+
+  // For partial obs
   for (int i = 0; i < env->num_agents; i++) {
     Agent *agent = &env->agents[i];
+    // env->observations[env->vision_window*env->vision_window + i*env->obs_size] = agent->hp;
+    if (agent->hp == 0) {
+      continue;
+    }
     int obs_offset = i * env->obs_size;
     int r_offset = agent->r - env->vision;
     int c_offset = agent->c - env->vision;
@@ -287,15 +340,50 @@ void compute_observations(CCpr *env) {
         env->observations[obs_idx] = env->grid[grid_idx];
       }
     }
+
+    
   }
 }
 
-void c_reset(CCpr *env) {
-  env->tick = 0;
+void add_hp(CCpr *env, int agent_id, float hp) {
+  Agent *agent = &env->agents[agent_id];
+  agent->hp += hp;
+  if (agent->hp > MAX_HP) {
+    agent->hp = MAX_HP;
+  } else if (agent->hp <= 0) {
+    agent->hp = 0;
+    env->log->score += LOG_SCORE_REWARD_DEATH;
+    env->rewards[agent->id] += REWARD_DEATH;
+    env->terminals[agent->id] = 1;
+  }
+}
+void remove_hp(CCpr *env, int agent_id, float hp) { add_hp(env, agent_id, -hp); }
 
-  memset(env->grid, EMPTY, (env->height * env->width) * sizeof(env->grid[0]));
-  memset(env->log, 0, sizeof(Log));
+void save_grid_to_file(CCpr *env, const char *filename) {
+    FILE *file = fopen(filename, "w");
+    if (!file) {
+        perror("Failed to open file");
+        return;
+    }
+    fprintf(file, "#ifndef GRID_H\n#define GRID_H\n\n");
+    fprintf(file, "#define GRID_HEIGHT %d\n", env->height);
+    fprintf(file, "#define GRID_WIDTH %d\n\n", env->width);
+    fprintf(file, "static const unsigned char grid[GRID_HEIGHT][GRID_WIDTH] = {\n");
+
+    for (int r = 0; r < env->height; r++) {
+        fprintf(file, "    {");
+        for (int c = 0; c < env->width; c++) {
+            unsigned char val = env->grid[r * env->width + c];
+            fprintf(file, "0x%02X%s", val, (c == env->width - 1) ? "" : ", ");
+        }
+        fprintf(file, "}%s\n", (r == env->height - 1) ? "" : ",");
+    }
+    fprintf(file, "};\n\n#endif // GRID_H\n");
+    fclose(file);
+}
 
+void make_grid_from_scratch(CCpr *env){
+  memset(env->grid, EMPTY, (env->height * env->width) * sizeof(env->grid[0]));
   // top walling
   for (int r = 0; r < env->vision; r++) {
     memset(env->grid + (r * env->width), WALL,
@@ -317,41 +405,46 @@ void c_reset(CCpr *env) {
     memset(env->grid + (r * env->width) + (env->width - env->vision), WALL,
            env->vision * sizeof(env->grid[0]));
   }
+  save_grid_to_file(env, "grid.h");
+}
 
-  // Agents
-  srand(time(NULL));
-  for (int i = 0; i < env->num_agents; i++) {
-    // env->log[0] = (Log){0};
-
-    Agent *agent = &env->agents[i];
-
-    agent->id = i;
-
-    int adr = 0;
-    bool allocated = false;
-
-    // Random allocation
-    while (!allocated) {
-
-      adr = rand() % (env->height * env->width);
-
-      if (env->grid[adr] == EMPTY) {
-        int r = adr / env->width;
-        int c = adr % env->width;
-        agent->r = r;
-        agent->c = c;
-        allocated = true;
-      }
+void spawn_agent(CCpr *env, int i){
+  Agent *agent = &env->agents[i];
+  agent->id = i;
+  agent->hp = 80;
+  int adr = 0;
+
+  bool allocated = false;
+  while (!allocated) {
+    adr = rand() % (env->height * env->width);
+    if (env->grid[adr] == EMPTY) {
+      int r = adr / env->width;
+      int c = adr % env->width;
+      agent->r = r;
+      agent->c = c;
+      allocated = true;
     }
+  }
+  assert(env->grid[adr] == EMPTY);
+  env->grid[adr] = get_agent_tile_from_id(agent->id);
+}
+void c_reset(CCpr *env) {
+  env->tick = 0;
+  memset(env->log, 0, sizeof(Log));
+  env->foods->size = 0;
+  memset(env->foods->indexes, 0, env->width * env->height * sizeof(int));
+  // make_grid_from_scratch(env);
+  memcpy(env->grid, grid_32_32_3v, env->width * env->height * sizeof(unsigned char));
 
-    assert(env->grid[adr] == EMPTY);
-
-    env->grid[adr] = get_agent_tile_from_id(agent->id);
+  for (int i = 0; i < env->num_agents; i++) {
+    spawn_agent(env, i);
   }
 
   init_foods(env);
-
+  memset(env->observations, 0, env->num_agents * env->obs_size * sizeof(unsigned char));
+  memset(env->truncations, 0, env->num_agents * sizeof(unsigned char));
   memset(env->terminals, 0, env->num_agents * sizeof(unsigned char));
+  memset(env->masks, 1, env->num_agents * sizeof(unsigned char));
   compute_observations(env);
 }
 
@@ -367,8 +460,9 @@ void reward_agents_near(CCpr *env, int food_index) {
 
     if ((ac == food_c && (ar == food_r - 1 || ar == food_r + 1)) ||
         (ar == food_r && (ac == food_c - 1 || ac == food_c + 1))) {
-      env->rewards[i] += env->interactive_food_reward;
-      env->log->score += 5;
+      reward_agent(env, i, env->interactive_food_reward);
+      env->log->score += LOG_SCORE_REWARD_MEDIUM;
+      add_hp(env, i, HP_REWARD_FOOD_MEDIUM);
       // add_log(env->log_buffer, env->log);
       // memset(env->log, 0, sizeof(Log));
     }
@@ -378,6 +472,8 @@ void reward_agents_near(CCpr *env, int food_index) {
 
 void step_agent(CCpr *env, int i) {
 
+  Agent *agent = &env->agents[i];
+
   int action = env->actions[i];
 
   int dr = 0;
@@ -386,15 +482,19 @@ void step_agent(CCpr *env, int i) {
   switch (action) {
   case 0:
     dr = -1;
+    agent->direction = 3;
     break; // UP
   case 1:
     dr = 1;
+    agent->direction = 1;
     break; // DOWN
   case 2:
     dc = -1;
+    agent->direction = 2;
     break; // LEFT
   case 3:
     dc = 1;
+    agent->direction = 0;
     break; // RIGHT
   case 4:
     return; // No moves
@@ -402,7 +502,6 @@ void step_agent(CCpr *env, int i) {
   env->log->moves += 1;
 
   // Get next row and column
-  Agent *agent = &env->agents[i];
 
   int next_r = agent->r + dr;
   int next_c = agent->c + dc;
@@ -413,7 +512,7 @@ void step_agent(CCpr *env, int i) {
 
   // Anything above should be obstacle
   if (tile >= INTERACTIVE_FOOD) {
-    env->log->score += -0.01;
+    env->log->score += LOG_SCORE_REWARD_MOVE;
     env->rewards[i] += env->reward_move;
     next_r = agent->r;
     next_c = agent->c;
@@ -445,14 +544,15 @@ void step_agent(CCpr *env, int i) {
 
   switch (tile) {
   case NORMAL_FOOD:
-    env->log->score += 1;
-    env->rewards[i] = env->reward_food;
+    env->log->score += LOG_SCORE_REWARD_SMALL;
+    reward_agent(env, i, env->reward_food);
+    add_hp(env, i, HP_REWARD_FOOD_SMALL);
     remove_food(env, next_grid_idx);
     // add_log(env->log_buffer, env->log);
     // memset(env->log, 0, sizeof(Log));
     break;
   case EMPTY:
-    env->log->score += -0.01;
+    env->log->score += LOG_SCORE_REWARD_MOVE;
     env->rewards[i] = env->reward_move;
     break;
   }
@@ -467,30 +567,77 @@ void step_agent(CCpr *env, int i) {
   return;
 }
 
+void clear_agent(CCpr *env, int agent_id) {
+  Agent *agent = &env->agents[agent_id];
+  if (agent->r < 0 || agent->c < 0) {
+    return;
+  }
+  int grid_idx = grid_index(env, agent->r, agent->c);
+  env->grid[grid_idx] = EMPTY;
+  agent->r = -1;
+  agent->c = -1;
+}
+
 void c_step(CCpr *env) {
+  env->tick++;
 
+  // memset(env->truncations, 0, env->num_agents * sizeof(unsigned char));
   memset(env->rewards, 0, env->num_agents * sizeof(float));
+  // memset(env->terminals, 0, env->num_agents * sizeof(unsigned char));
   memset(env->interactive_food_agent_count, 0,
          (env->width * env->height + 7) / 8);
 
-  // bool logged = false;
   for (int i = 0; i < env->num_agents; i++) {
+    if (env->agents[i].hp == 0) {
+      env->masks[i] = 0;
+      clear_agent(env, i);
+      continue;
+    }
     step_agent(env, i);
+    remove_hp(env, i, HP_LOSS_PER_STEP);
   }
 
-  add_log(env->log_buffer, env->log);
-  memset(env->log, 0, sizeof(Log));
-  // // To cope with sweeps waiting for logs, in case nothing moves
-  // if (!logged) {
-  //   add_log(env->log_buffer, env->log);
-  //   memset(env->log, 0, sizeof(Log));
-  // }
-
   spawn_foods(env);
 
+  //We loop again here because in the future an entity might have attacked an agent in the process
+  int alive_agents = 0;
+  for (int i = 0; i < env->num_agents; i++) {
+    if (env->agents[i].hp > 0) {
+      alive_agents += 1;
+      if (env->agents[i].hp < 20) {
+        env->rewards[i] += REWARD_20_HP;
+        env->log->score += REWARD_20_HP;
+      } else if (env->agents[i].hp > 80) {
+        env->rewards[i] += REWARD_80_HP;
+        env->log->score += REWARD_80_HP;
+      }
+    } 
+    // else {
+      // int grid_idx = grid_index(env, env->agents[i].r, env->agents[i].c);
+      // env->grid[grid_idx] = EMPTY;
+      // spawn_agent(env, i);
+    // }
+  }
+  if (alive_agents == 0) {
+    env->log->moves = 0;
+  }else{
+    env->log->moves /= alive_agents;
+  }
+  env->log->food_nb = env->foods->size;
+  env->log->agents_alive = alive_agents;
+  env->log->alive_steps = env->tick;
   compute_observations(env);
-
-  env->tick++;
+  
+  if (alive_agents < env->num_agents || env->tick > 2000) {
+    c_reset(env);
+    if (alive_agents < env->num_agents) {
+      memset(env->terminals, 1, env->num_agents * sizeof(unsigned char)); 
+    } else {
+      memset(env->truncations, 1, env->num_agents * sizeof(unsigned char));
+    }
+  }
+  add_log(env->log_buffer, env->log);
+  memset(env->log, 0, sizeof(Log));
 }
 
 // Raylib client
@@ -557,15 +704,20 @@ void c_render(Renderer *renderer, CCpr *env) {
       } else if (tile == NORMAL_FOOD || tile == INTERACTIVE_FOOD) {
         DrawRectangle(c * ts, r * ts, ts, ts, COLORS[tile]);
       } else {
-        int u = 128 * (tile % 8);
-        int v = 128 * (tile / 8);
-        Rectangle source_rect = (Rectangle){u, v, 128, 128};
-        Rectangle dest_rect = (Rectangle){c * ts, r * ts, ts, ts};
+
         int agent_id = get_agent_id_from_tile(tile);
         int col_id = agent_id % (sizeof(COLORS) / sizeof(COLORS[0]));
         Color color = COLORS[col_id];
+        int starting_sprite_x = 0;
+        float rotation = env->agents[agent_id].direction * 90.0f;
+        if (rotation == 180) {
+          starting_sprite_x = 128;
+          rotation = 0;
+        }
+        Rectangle source_rect = (Rectangle){starting_sprite_x, 0, 128, 128};
+        Rectangle dest_rect = (Rectangle){c * ts + ts/2, r * ts + ts/2, ts, ts};        
         DrawTexturePro(renderer->puffer, source_rect, dest_rect,
-                       (Vector2){0, 0}, 0, color);
+                       (Vector2){ts/2, ts/2}, rotation, color);
       }
     }
   }
diff --git a/pufferlib/ocean/cpr/cpr.py b/pufferlib/ocean/cpr/cpr.py
index aedfae6447..382936be13 100644
--- a/pufferlib/ocean/cpr/cpr.py
+++ b/pufferlib/ocean/cpr/cpr.py
@@ -6,16 +6,16 @@
 
 class PyCPR(pufferlib.PufferEnv):
     def __init__(self, 
-                num_envs = 1,
+                num_envs=1,
                 widths=[32],
                 heights=[32], 
                 num_agents=[8],  
-                vision=2, 
+                vision=3, 
                 reward_food=1.0, 
                 interactive_food_reward=5.0,
                 reward_move=-0.01,
                 food_base_spawn_rate=2e-3,
-                report_interval=250,
+                report_interval=1,
                 render_mode=None, 
                 buf=None
             ):
@@ -23,7 +23,7 @@ def __init__(self,
         heights = num_envs*heights 
         num_agents = num_envs*num_agents 
 
-        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1),(2*vision+1)), dtype=np.uint8)
+        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1)*(2*vision+1),), dtype=np.uint8)
         self.single_action_space = gymnasium.spaces.Discrete(5)
         self.render_mode = render_mode
         self.num_agents = sum(num_agents)
@@ -38,6 +38,8 @@ def __init__(self,
             self.actions, 
             self.rewards, 
             self.terminals,
+            self.truncations,
+            self.masks,
             widths,
             heights,
             num_agents,
@@ -78,13 +80,16 @@ def close(self):
     timeout=30
 
     tot_agents = env.num_agents
-    actions = np.random.randint(0,4,(1024,tot_agents))
+    actions = np.random.randint(0,5,(1024,tot_agents))
 
     import time 
     start = time.time()
-    while time.time() - start < timeout:
+    # while time.time() - start < timeout:
+    while tick < 500:
         atns = actions[tick % 1024]
         env.step(atns)
+        if -1 in env.rewards:
+            breakpoint()
         # env.render()
         tick += 1
 
diff --git a/pufferlib/ocean/cpr/cy_cpr.pyx b/pufferlib/ocean/cpr/cy_cpr.pyx
index a8f92d70a3..f766713ab4 100644
--- a/pufferlib/ocean/cpr/cy_cpr.pyx
+++ b/pufferlib/ocean/cpr/cy_cpr.pyx
@@ -15,6 +15,9 @@ cdef extern from "cpr.h":
     ctypedef struct Log: 
         float score
         float moves
+        float food_nb
+        float agents_alive
+        float alive_steps
 
     ctypedef struct LogBuffer: 
         Log logs 
@@ -29,6 +32,8 @@ cdef extern from "cpr.h":
         int r 
         int c 
         int id 
+        int hp 
+        int direction 
 
     ctypedef struct FoodList:
         int *indexes 
@@ -53,11 +58,13 @@ cdef extern from "cpr.h":
         int *actions
         float *rewards
         unsigned char *terminals
+        unsigned char *masks
+        unsigned char *truncations
 
         Agent *agents
 
         LogBuffer *log_buffer
-        Log *logs
+        Log *log
 
         uint8_t *interactive_food_agent_count
         float interactive_food_reward
@@ -86,9 +93,12 @@ cdef class CyEnv:
         LogBuffer *logs 
         int num_envs 
 
-    def __init__(self, unsigned char[:,:,:] observations, int[:] actions, float[:] rewards, unsigned char[:] terminals,
-     list widths, list heights, list num_agents,int vision, 
-     float reward_food,float interactive_food_reward,float reward_move, float food_base_spawn_rate) -> None:
+    def __init__(
+        self, unsigned char[:,:] observations, int[:] actions, float[:] rewards,
+        unsigned char[:] terminals, unsigned char[:] truncations, unsigned char[:] masks,
+        list widths, list heights, list num_agents,int vision, 
+        float reward_food,float interactive_food_reward,float reward_move, float food_base_spawn_rate
+    ) -> None:
         self.num_envs = len(num_agents)
         self.envs = <CCpr*>calloc(self.num_envs, sizeof(CCpr))
         self.logs = allocate_logbuffer(LOG_BUFFER_SIZE)
@@ -97,10 +107,12 @@ cdef class CyEnv:
         cdef int n = 0 
         for i in range(self.num_envs):
             self.envs[i] = CCpr(
-                observations = &observations[n,0,0],
+                observations = &observations[n,0],
                 actions=&actions[n],
                 rewards=&rewards[n],
                 terminals=&terminals[n],
+                truncations=&truncations[n],
+                masks=&masks[n],
                 log_buffer=self.logs,
                 width=widths[i],
                 height=heights[i],
diff --git a/pufferlib/ocean/cpr/grid.h b/pufferlib/ocean/cpr/grid.h
new file mode 100644
index 0000000000..7682999664
--- /dev/null
+++ b/pufferlib/ocean/cpr/grid.h
@@ -0,0 +1,42 @@
+#ifndef GRID_H
+#define GRID_H
+
+#define GRID_HEIGHT 32
+#define GRID_WIDTH 32
+
+static const unsigned char grid_32_32_3v[GRID_HEIGHT][GRID_WIDTH] = {
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03},
+    {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03}
+};
+
+#endif // GRID_H
diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py
index 2105cde210..49c2494a8d 100644
--- a/pufferlib/sweep.py
+++ b/pufferlib/sweep.py
@@ -27,6 +27,12 @@ def __init__(self, min, max, scale, mean, is_integer=False):
         self.is_integer = is_integer
 
 class Linear(Space):
+    def __init__(self, min, max, scale, mean, is_integer=False):
+        if scale == 'auto':
+            scale = 0.5
+
+        super().__init__(min, max, scale, mean, is_integer)
+
     def normalize(self, value):
         #assert isinstance(value, (int, float))
         zero_one = (value - self.min)/(self.max - self.min)
@@ -40,6 +46,13 @@ def unnormalize(self, value):
         return value
 
 class Pow2(Space):
+    def __init__(self, min, max, scale, mean, is_integer=False):
+        if scale == 'auto':
+            scale = 0.5
+            #scale = 2 / (np.log2(max) - np.log2(min))
+
+        super().__init__(min, max, scale, mean, is_integer)
+
     def normalize(self, value):
         #assert isinstance(value, (int, float))
         #assert value != 0.0
@@ -56,9 +69,11 @@ class Log(Space):
     base: int = 10
 
     def __init__(self, min, max, scale, mean, is_integer=False):
-        if scale == 'auto':
+        if scale == 'time':
             # TODO: Set scaling param intuitively based on number of jumps from min to max
             scale = 1 / (np.log2(max) - np.log2(min))
+        elif scale == 'auto':
+            scale = 0.5
 
         super().__init__(min, max, scale, mean, is_integer)
 
@@ -79,6 +94,12 @@ def unnormalize(self, value):
 class Logit(Space):
     base: int = 10
 
+    def __init__(self, min, max, scale, mean, is_integer=False):
+        if scale == 'auto':
+            scale = 0.5
+
+        super().__init__(min, max, scale, mean, is_integer)
+
     def normalize(self, value):
         #assert isinstance(value, (int, float))
         #assert value != 0.0
@@ -91,7 +112,7 @@ def unnormalize(self, value):
         log_spaced = zero_one*(math.log(1-self.max, self.base) - math.log(1-self.min, self.base)) + math.log(1-self.min, self.base)
         return 1 - self.base**log_spaced
 
-def _carbs_params_from_puffer_sweep(sweep_config):
+def _params_from_puffer_sweep(sweep_config):
     param_spaces = {}
     for name, param in sweep_config.items():
         if name in ('method', 'name', 'metric', 'max_score'):
@@ -99,7 +120,7 @@ def _carbs_params_from_puffer_sweep(sweep_config):
 
         assert isinstance(param, dict)
         if any(isinstance(param[k], dict) for k in param):
-            param_spaces[name] = _carbs_params_from_puffer_sweep(param)
+            param_spaces[name] = _params_from_puffer_sweep(param)
             continue
  
         assert 'distribution' in param
@@ -130,7 +151,7 @@ def _carbs_params_from_puffer_sweep(sweep_config):
 
 class Hyperparameters:
     def __init__(self, config, verbose=True):
-        self.spaces = _carbs_params_from_puffer_sweep(config)
+        self.spaces = _params_from_puffer_sweep(config)
         self.flat_spaces = dict(pufferlib.utils.unroll_nested_dict(self.spaces))
         self.num = len(self.flat_spaces)
 
@@ -156,7 +177,6 @@ def __init__(self, config, verbose=True):
             for name, space in self.flat_spaces.items():
                 print(f'\t{name}: {space.unnormalize(min(space.norm_mean + space.scale, space.norm_max))}')
 
-
     def sample(self, n, mu=None, scale=1):
         if mu is None:
             mu = self.search_centers
@@ -372,14 +392,14 @@ def suggest(self, fill):
         # Transformed scores
         min_score = self.min_score
         if min_score is None:
-            min_score = np.min(y) - abs(np.min(y))
+            min_score = np.min(y) - np.min(np.abs(y))
 
         if np.min(y) < min_score - 1e-6:
             raise ValueError(f'Min score {min_score} is less than min score in data {np.min(y)}')
 
         max_score = self.max_score
         if max_score is None:
-            max_score = np.max(y) + abs(np.max(y))
+            max_score = np.max(y) + np.max(np.abs(y))
 
         if np.max(y) > max_score + 1e-6:
             raise ValueError(f'Max score {max_score} is greater than max score in data {np.max(y)}')
@@ -632,55 +652,87 @@ def observe(self, hypers, score, cost, is_failure=False):
         else:
             self.success_observations.append(new_observation)
 
-'''
-from carbs import (
-    CARBS,
-    CARBSParams,
-    ObservationInParam,
-    Param,
-    LinearSpace,
-    Pow2Space,
-    LogSpace,
-    LogitSpace,
-)
-
-class PufferCarbs:
+def _carbs_params_from_puffer_sweep(sweep_config):
+    from carbs import (
+        Param,
+        LinearSpace,
+        LogSpace,
+        LogitSpace,
+    )
+
+    param_spaces = {}
+    for name, param in sweep_config.items():
+        if name in ('method', 'name', 'metric', 'max_score'):
+            continue
+
+        assert isinstance(param, dict)
+        if any(isinstance(param[k], dict) for k in param):
+            param_spaces[name] = _carbs_params_from_puffer_sweep(param)
+            continue
+ 
+        assert 'distribution' in param
+        distribution = param['distribution']
+        search_center = param['mean']
+        kwargs = dict(
+            min=param['min'],
+            max=param['max'],
+        )
+        if distribution == 'uniform':
+            space = LinearSpace(**kwargs)
+        elif distribution in ('int_uniform', 'uniform_pow2'):
+            space = LinearSpace(**kwargs, is_integer=True)
+        elif distribution == 'log_normal':
+            space = LogSpace(**kwargs)
+        elif distribution == 'logit_normal':
+            space = LogitSpace(**kwargs)
+        else:
+            raise ValueError(f'Invalid distribution: {distribution}')
+
+        param_spaces[name] = Param(
+            name=name,
+            space=space,
+            search_center=search_center
+        )
+
+    return param_spaces
+
+class Carbs:
     def __init__(self,
             sweep_config: dict,
             max_suggestion_cost: float = None,
             resample_frequency: int = 5,
             num_random_samples: int = 10,
         ):
+
         param_spaces = _carbs_params_from_puffer_sweep(sweep_config)
         flat_spaces = [e[1] for e in pufferlib.utils.unroll_nested_dict(param_spaces)]
         for e in flat_spaces:
             print(e.name, e.space)
 
-        metric = sweep_config['metric']
-        goal = metric['goal']
-        assert goal in ['maximize', 'minimize'], f"Invalid goal {goal}"
-        self.carbs_params = CARBSParams(
-            better_direction_sign=1 if goal == 'maximize' else -1,
+        from carbs import (
+            CARBSParams,
+            CARBS,
+        )
+
+        carbs_params = CARBSParams(
+            better_direction_sign=1,
             is_wandb_logging_enabled=False,
             resample_frequency=resample_frequency,
             num_random_samples=num_random_samples,
             max_suggestion_cost=max_suggestion_cost,
             is_saved_on_every_observation=False,
-            #num_candidates_for_suggestion_per_dim=10
         )
-        self.carbs = CARBS(self.carbs_params, flat_spaces)
+        self.carbs = CARBS(carbs_params, flat_spaces)
 
     def suggest(self, args):
-        #start = time.time()
         self.suggestion = self.carbs.suggest().suggestion
-        #print(f'Suggestion took {time.time() - start} seconds')
         for k in ('train', 'env'):
             for name, param in args['sweep'][k].items():
                 if name in self.suggestion:
                     args[k][name] = self.suggestion[name]
 
-    def observe(self, score, cost, is_failure=False):
-        #start = time.time()
+    def observe(self, hypers, score, cost, is_failure=False):
+        from carbs import ObservationInParam
         self.carbs.observe(
             ObservationInParam(
                 input=self.suggestion,
@@ -688,8 +740,4 @@ def observe(self, score, cost, is_failure=False):
                 cost=cost,
                 is_failure=is_failure,
             )
-        )
-        #print(f'Observation took {time.time() - start} seconds')
-
-
-'''
+        )
\ No newline at end of file

From 81aabd2761b3c8eb3da158094388046ca034c3de Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Fri, 11 Apr 2025 07:24:27 +0200
Subject: [PATCH 5/8] Cpr resets when all agents are dead or truncated

---
 config/ocean/cpr.ini      | 6 +++---
 pufferlib/ocean/cpr/cpr.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/config/ocean/cpr.ini b/config/ocean/cpr.ini
index f2e94ec6aa..abd21ea736 100644
--- a/config/ocean/cpr.ini
+++ b/config/ocean/cpr.ini
@@ -9,15 +9,15 @@ num_envs = 512
 vision = 3
 num_agents = [12]
 report_interval=1
-reward_food = 0.001
-interactive_food_reward = 0.002
+reward_food = 0.1
+interactive_food_reward = 0.2
 reward_move = +0.00
 food_base_spawn_rate = 2e-3
 
 [train]
 num_envs = 1
 num_workers = 1
-total_timesteps = 1_000_000_000
+total_timesteps = 60_000_000
 device = cpu
 batch_size = 32768
 minibatch_size = 8192
diff --git a/pufferlib/ocean/cpr/cpr.h b/pufferlib/ocean/cpr/cpr.h
index 8331954df1..1600260101 100644
--- a/pufferlib/ocean/cpr/cpr.h
+++ b/pufferlib/ocean/cpr/cpr.h
@@ -26,7 +26,7 @@
 
 #define REWARD_20_HP -0
 #define REWARD_80_HP 0
-#define REWARD_DEATH -0.0f
+#define REWARD_DEATH -1.0f
 
 
 #define LOG_SCORE_REWARD_SMALL 1 
@@ -628,9 +628,9 @@ void c_step(CCpr *env) {
   env->log->alive_steps = env->tick;
   compute_observations(env);
   
-  if (alive_agents < env->num_agents || env->tick > 2000) {
+  if (alive_agents == 0|| env->tick > 1000) {
     c_reset(env);
-    if (alive_agents < env->num_agents) {
+    if (alive_agents == 0) {
       memset(env->terminals, 1, env->num_agents * sizeof(unsigned char)); 
     } else {
       memset(env->truncations, 1, env->num_agents * sizeof(unsigned char));

From 28316815cfe0153141e88386be93774abed30eac Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Wed, 30 Apr 2025 06:30:45 +0200
Subject: [PATCH 6/8] Improve common pool resource env

---
 pufferlib/ocean/cpr/cpr.h  | 22 ++++++++++++++--------
 pufferlib/ocean/cpr/cpr.py |  2 +-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/pufferlib/ocean/cpr/cpr.h b/pufferlib/ocean/cpr/cpr.h
index 1600260101..89380f6cef 100644
--- a/pufferlib/ocean/cpr/cpr.h
+++ b/pufferlib/ocean/cpr/cpr.h
@@ -165,7 +165,7 @@ void init_ccpr(CCpr *env) {
       (unsigned char *)calloc(env->width * env->height, sizeof(unsigned char));
   env->agents = (Agent *)calloc(env->num_agents, sizeof(Agent));
   env->vision_window = 2 * env->vision + 1;
-  env->obs_size = env->vision_window * env->vision_window;// + 1;
+  env->obs_size = env->vision_window * env->vision_window + 1;
   env->log = (Log *)calloc(1, sizeof(Log));
   env->interactive_food_agent_count =
       (uint8_t *)calloc((env->width * env->height + 7) / 8, sizeof(uint8_t));
@@ -174,7 +174,7 @@ void init_ccpr(CCpr *env) {
 
 void allocate_ccpr(CCpr *env) {
   // Called by C stuff
-  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1); //+ 1;
+  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1) + 1;
   env->observations = (unsigned char *)calloc(env->num_agents * obs_size,
                                               sizeof(unsigned char));
   env->actions = (int *)calloc(env->num_agents, sizeof(unsigned int));
@@ -221,10 +221,10 @@ void add_food(CCpr *env, int grid_idx, int food_type) {
 
 void reward_agent(CCpr *env, int agent_id, float reward) {
   // We don't reward if agent is full life
-  // Agent *agent = &env->agents[agent_id];
-  // if (agent->hp >= MAX_HP) {
-  //   return;
-  // }
+  Agent *agent = &env->agents[agent_id];
+  if (agent->hp >= MAX_HP) {
+    return;
+  }
   env->rewards[agent_id] += reward;
 }
 
@@ -326,7 +326,7 @@ void compute_observations(CCpr *env) {
   // For partial obs
   for (int i = 0; i < env->num_agents; i++) {
     Agent *agent = &env->agents[i];
-    // env->observations[env->vision_window*env->vision_window + i*env->obs_size] = agent->hp;
+    env->observations[env->vision_window*env->vision_window + i*env->obs_size] = agent->hp;
     if (agent->hp == 0) {
       continue;
     }
@@ -553,7 +553,7 @@ void step_agent(CCpr *env, int i) {
     break;
   case EMPTY:
     env->log->score += LOG_SCORE_REWARD_MOVE;
-    env->rewards[i] = env->reward_move;
+    env->rewards[i] += env->reward_move;
     break;
   }
 
@@ -633,6 +633,12 @@ void c_step(CCpr *env) {
     if (alive_agents == 0) {
       memset(env->terminals, 1, env->num_agents * sizeof(unsigned char)); 
     } else {
+      //Agents get rewarded for going all the way to the end
+      for (int i = 0; i < env->num_agents; i++) {
+        if (env->agents[i].hp > 0) {
+          env->rewards[i] += 1;
+        }
+      }
       memset(env->truncations, 1, env->num_agents * sizeof(unsigned char));
     }
   }
diff --git a/pufferlib/ocean/cpr/cpr.py b/pufferlib/ocean/cpr/cpr.py
index 382936be13..02a8ae01df 100644
--- a/pufferlib/ocean/cpr/cpr.py
+++ b/pufferlib/ocean/cpr/cpr.py
@@ -23,7 +23,7 @@ def __init__(self,
         heights = num_envs*heights 
         num_agents = num_envs*num_agents 
 
-        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1)*(2*vision+1),), dtype=np.uint8)
+        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1)*(2*vision+1)+1,), dtype=np.uint8)
         self.single_action_space = gymnasium.spaces.Discrete(5)
         self.render_mode = render_mode
         self.num_agents = sum(num_agents)

From 842a5dc120d67857e9b9561ad5bdf98e3cb0e22d Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Sat, 10 May 2025 18:17:49 +0200
Subject: [PATCH 7/8] Restore the previous commit

---
 pufferlib/ocean/cpr/cpr.h  | 22 ++++++++--------------
 pufferlib/ocean/cpr/cpr.py |  2 +-
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/pufferlib/ocean/cpr/cpr.h b/pufferlib/ocean/cpr/cpr.h
index 89380f6cef..1600260101 100644
--- a/pufferlib/ocean/cpr/cpr.h
+++ b/pufferlib/ocean/cpr/cpr.h
@@ -165,7 +165,7 @@ void init_ccpr(CCpr *env) {
       (unsigned char *)calloc(env->width * env->height, sizeof(unsigned char));
   env->agents = (Agent *)calloc(env->num_agents, sizeof(Agent));
   env->vision_window = 2 * env->vision + 1;
-  env->obs_size = env->vision_window * env->vision_window + 1;
+  env->obs_size = env->vision_window * env->vision_window;// + 1;
   env->log = (Log *)calloc(1, sizeof(Log));
   env->interactive_food_agent_count =
       (uint8_t *)calloc((env->width * env->height + 7) / 8, sizeof(uint8_t));
@@ -174,7 +174,7 @@ void init_ccpr(CCpr *env) {
 
 void allocate_ccpr(CCpr *env) {
   // Called by C stuff
-  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1) + 1;
+  int obs_size = (2 * env->vision + 1) * (2 * env->vision + 1); //+ 1;
   env->observations = (unsigned char *)calloc(env->num_agents * obs_size,
                                               sizeof(unsigned char));
   env->actions = (int *)calloc(env->num_agents, sizeof(unsigned int));
@@ -221,10 +221,10 @@ void add_food(CCpr *env, int grid_idx, int food_type) {
 
 void reward_agent(CCpr *env, int agent_id, float reward) {
   // We don't reward if agent is full life
-  Agent *agent = &env->agents[agent_id];
-  if (agent->hp >= MAX_HP) {
-    return;
-  }
+  // Agent *agent = &env->agents[agent_id];
+  // if (agent->hp >= MAX_HP) {
+  //   return;
+  // }
   env->rewards[agent_id] += reward;
 }
 
@@ -326,7 +326,7 @@ void compute_observations(CCpr *env) {
   // For partial obs
   for (int i = 0; i < env->num_agents; i++) {
     Agent *agent = &env->agents[i];
-    env->observations[env->vision_window*env->vision_window + i*env->obs_size] = agent->hp;
+    // env->observations[env->vision_window*env->vision_window + i*env->obs_size] = agent->hp;
     if (agent->hp == 0) {
       continue;
     }
@@ -553,7 +553,7 @@ void step_agent(CCpr *env, int i) {
     break;
   case EMPTY:
     env->log->score += LOG_SCORE_REWARD_MOVE;
-    env->rewards[i] += env->reward_move;
+    env->rewards[i] = env->reward_move;
     break;
   }
 
@@ -633,12 +633,6 @@ void c_step(CCpr *env) {
     if (alive_agents == 0) {
       memset(env->terminals, 1, env->num_agents * sizeof(unsigned char)); 
     } else {
-      //Agents get rewarded for going all the way to the end
-      for (int i = 0; i < env->num_agents; i++) {
-        if (env->agents[i].hp > 0) {
-          env->rewards[i] += 1;
-        }
-      }
       memset(env->truncations, 1, env->num_agents * sizeof(unsigned char));
     }
   }
diff --git a/pufferlib/ocean/cpr/cpr.py b/pufferlib/ocean/cpr/cpr.py
index 02a8ae01df..382936be13 100644
--- a/pufferlib/ocean/cpr/cpr.py
+++ b/pufferlib/ocean/cpr/cpr.py
@@ -23,7 +23,7 @@ def __init__(self,
         heights = num_envs*heights 
         num_agents = num_envs*num_agents 
 
-        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1)*(2*vision+1)+1,), dtype=np.uint8)
+        self.single_observation_space = gymnasium.spaces.Box(low=0, high=255, shape=((2*vision+1)*(2*vision+1),), dtype=np.uint8)
         self.single_action_space = gymnasium.spaces.Discrete(5)
         self.render_mode = render_mode
         self.num_agents = sum(num_agents)

From 952689c925d6ca55ba3a6d6b04eb0dd345ba7f5b Mon Sep 17 00:00:00 2001
From: mx2000 <max.aguirre2424b@gmail.com>
Date: Sat, 10 May 2025 18:18:49 +0200
Subject: [PATCH 8/8] clean cpr commit

---
 pufferlib/sweep.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py
index 49c2494a8d..ce9202603f 100644
--- a/pufferlib/sweep.py
+++ b/pufferlib/sweep.py
@@ -740,4 +740,5 @@ def observe(self, hypers, score, cost, is_failure=False):
                 cost=cost,
                 is_failure=is_failure,
             )
-        )
\ No newline at end of file
+        )
+        
\ No newline at end of file