From 16e2288f10d1d5a36479bcf87ae02d64282618a6 Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Wed, 5 Jul 2023 17:05:32 -0400
Subject: [PATCH 01/15] Began updating repo to use gymnasium

---
 .../environment_runner_batch.py               |  15 +-
 .../environment_runners/parallel_env.py       |  10 +-
 continual_rl/experiments/tasks/image_task.py  |   2 +-
 .../experiments/tasks/make_atari_task.py      |   2 +-
 .../experiments/tasks/make_minihack_task.py   |   2 +-
 .../experiments/tasks/make_procgen_task.py    |   2 +-
 .../experiments/tasks/minigrid_task.py        |   2 +-
 continual_rl/utils/env_wrappers.py            | 162 ++++++++----------
 continual_rl/utils/utils.py                   |   2 +-
 setup.py                                      |  13 +-
 10 files changed, 104 insertions(+), 108 deletions(-)

diff --git a/continual_rl/experiments/environment_runners/environment_runner_batch.py b/continual_rl/experiments/environment_runners/environment_runner_batch.py
index 177445f2..f722c34f 100644
--- a/continual_rl/experiments/environment_runners/environment_runner_batch.py
+++ b/continual_rl/experiments/environment_runners/environment_runner_batch.py
@@ -25,7 +25,8 @@ def __init__(self, policy, num_parallel_envs, timesteps_per_collection, render_c
         self._parallel_env = None
         self._last_observations = None  # To allow returning mid-episode
         self._last_timestep_data = None  # Always stores the last thing seen, even across "dones"
-        self._cumulative_rewards = np.array([0 for _ in range(num_parallel_envs)], dtype=np.float)
+        # NOTE: np.float is  deprecated in numpy 1.24.4
+        self._cumulative_rewards = np.array([0 for _ in range(num_parallel_envs)], dtype=float)
 
         # Used to determine what to save off to logs and when
         self._observations_to_render = []
@@ -41,7 +42,8 @@ def _initialize_envs(self, env_spec, preprocessor):
             self._parallel_env = ParallelEnv(env_specs, self._output_dir)
 
         # Initialize the observation time-batch with n of the first observation.
-        raw_observations = self._parallel_env.reset()
+        results = self._parallel_env.reset()
+        raw_observations, infos = list(results)
         processed_observations = self._preprocess_raw_observations(preprocessor, raw_observations)
         return processed_observations
 
@@ -50,11 +52,11 @@ def _reset_env(self, env_id):
         ParallelEnv doesn't readily expose manually resetting an environment, so doing that here.
         """
         if env_id == 0:
-            observation = self._parallel_env.envs[0].reset()
+            observation, _ = self._parallel_env.envs[0].reset()
         else:
             local = self._parallel_env.locals[env_id-1]
             local.send(("reset", None))
-            observation = local.recv()
+            observation, _ = local.recv()
 
         return observation
 
@@ -119,8 +121,9 @@ def collect_data(self, task_spec):
 
             # ParallelEnv automatically resets the env and returns the new observation when a "done" occurs
             result = self._parallel_env.step(actions)
-            raw_observations, rewards, dones, infos = list(result)
-
+            raw_observations, rewards, terminated, truncated, infos = list(result)
+            dones = np.logical_or(terminated, truncated)
+            
             self._total_timesteps += self._num_parallel_envs
             self._last_timestep_data = timestep_data
             processed_observations = self._preprocess_raw_observations(preprocessor, raw_observations)
diff --git a/continual_rl/experiments/environment_runners/parallel_env.py b/continual_rl/experiments/environment_runners/parallel_env.py
index 03036da7..1a52529d 100644
--- a/continual_rl/experiments/environment_runners/parallel_env.py
+++ b/continual_rl/experiments/environment_runners/parallel_env.py
@@ -90,16 +90,16 @@ def __del__(self):
     def reset(self):
         for local in self.locals:
             local.send(("reset", None))
-        results = [self._local_env.reset()] + [local.recv() for local in self.locals]
+        results = zip(*[self._local_env.reset()] + [local.recv() for local in self.locals])
         return results
 
     def step(self, actions):
         for local, action in zip(self.locals, actions[1:]):
             local.send(("step", action))
-        obs, reward, done, info = self._local_env.step(actions[0])
-        if done:
-            obs = self._local_env.reset()
-        results = zip(*[(obs, reward, done, info)] + [local.recv() for local in self.locals])
+        obs, reward, terminated, truncated, info = self._local_env.step(actions[0])
+        if terminated or truncated:
+            obs, _ = self._local_env.reset()
+        results = zip(*[(obs, reward, terminated, truncated, info)] + [local.recv() for local in self.locals])
         return results
 
     def render(self):
diff --git a/continual_rl/experiments/tasks/image_task.py b/continual_rl/experiments/tasks/image_task.py
index 03217a9f..6fd12a1f 100644
--- a/continual_rl/experiments/tasks/image_task.py
+++ b/continual_rl/experiments/tasks/image_task.py
@@ -1,6 +1,6 @@
 import torch
 import torchvision
-from gym.spaces.box import Box
+from gymnasium.spaces.box import Box
 from continual_rl.experiments.tasks.task_base import TaskBase
 from continual_rl.experiments.tasks.preprocessor_base import PreprocessorBase
 from continual_rl.utils.utils import Utils
diff --git a/continual_rl/experiments/tasks/make_atari_task.py b/continual_rl/experiments/tasks/make_atari_task.py
index 0b50fe63..9e11cf8d 100644
--- a/continual_rl/experiments/tasks/make_atari_task.py
+++ b/continual_rl/experiments/tasks/make_atari_task.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 
 from continual_rl.utils.env_wrappers import (
     NoopResetEnv,
diff --git a/continual_rl/experiments/tasks/make_minihack_task.py b/continual_rl/experiments/tasks/make_minihack_task.py
index 6a7e1194..884174f0 100644
--- a/continual_rl/experiments/tasks/make_minihack_task.py
+++ b/continual_rl/experiments/tasks/make_minihack_task.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 import numpy as np
 import os
 
diff --git a/continual_rl/experiments/tasks/make_procgen_task.py b/continual_rl/experiments/tasks/make_procgen_task.py
index b4dc70c6..0b24f345 100644
--- a/continual_rl/experiments/tasks/make_procgen_task.py
+++ b/continual_rl/experiments/tasks/make_procgen_task.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 
 from .image_task import ImageTask
 
diff --git a/continual_rl/experiments/tasks/minigrid_task.py b/continual_rl/experiments/tasks/minigrid_task.py
index c7f37fc1..6687363d 100644
--- a/continual_rl/experiments/tasks/minigrid_task.py
+++ b/continual_rl/experiments/tasks/minigrid_task.py
@@ -1,7 +1,7 @@
 import torch
 import numpy as np
 import gym_minigrid  # Needed for Utils.make_env
-import gym
+import gymnasium as gym
 from continual_rl.experiments.tasks.task_base import TaskBase
 from continual_rl.experiments.tasks.preprocessor_base import PreprocessorBase
 from continual_rl.utils.utils import Utils
diff --git a/continual_rl/utils/env_wrappers.py b/continual_rl/utils/env_wrappers.py
index 8548aabf..e0d59eb5 100644
--- a/continual_rl/utils/env_wrappers.py
+++ b/continual_rl/utils/env_wrappers.py
@@ -1,17 +1,17 @@
 # The MIT License
-#
-# Copyright (c) 2017 OpenAI (http://openai.com)
-#
+
+# Copyright (c) 2019 Antonin Raffin
+
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-#
+
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
-#
+
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -21,71 +21,62 @@
 # THE SOFTWARE.
 
 # Taken from
-#   https://raw.githubusercontent.com/openai/baselines/9b68103b737ac46bc201dfb3121cfa5df2127e53/baselines/common/wrappers.py
-#   https://raw.githubusercontent.com/openai/baselines/7c520852d9cf4eaaad326a3d548efc915dc60c10/baselines/common/atari_wrappers.py
-#   https://github.com/facebookresearch/torchbeast/blob/542c607cfe4adbc1967c213e8c248f29b13b64b6/torchbeast/atari_wrappers.py
+#  https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/atari_wrappers.html
 # and slightly modified.
 
 import numpy as np
 import os
 os.environ.setdefault('PATH', '')
 from collections import deque
-import gym
-from gym import spaces
+import gymnasium as gym
+from gymnasium import spaces
 import torch
 import cv2
 cv2.ocl.setUseOpenCL(False)
 
-
 class NoopResetEnv(gym.Wrapper):
     def __init__(self, env, noop_max=30):
         """Sample initial states by taking random number of no-ops on reset.
         No-op is assumed to be action 0.
         """
-        gym.Wrapper.__init__(self, env)
+        super().__init__(env)
         self.noop_max = noop_max
         self.override_num_noops = None
         self.noop_action = 0
         assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
 
     def reset(self, **kwargs):
-        """ Do no-op action for a number of steps in [1, noop_max]."""
         self.env.reset(**kwargs)
         if self.override_num_noops is not None:
             noops = self.override_num_noops
         else:
             noops = self.unwrapped.np_random.integers(1, self.noop_max + 1)
         assert noops > 0
-        obs = None
+        obs = np.zeros(0)
+        info: Dict = {}
         for _ in range(noops):
-            obs, _, done, _ = self.env.step(self.noop_action)
-            if done:
-                obs = self.env.reset(**kwargs)
-        return obs
-
-    def step(self, ac):
-        return self.env.step(ac)
+            obs, _, terminated, truncated, info = self.env.step(self.noop_action)
+            if terminated or truncated:
+                obs, info = self.env.reset(**kwargs)
+        return obs, info
 
 
 class FireResetEnv(gym.Wrapper):
     def __init__(self, env):
         """Take action on reset for environments that are fixed until firing."""
-        gym.Wrapper.__init__(self, env)
-        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
+        super().__init__(env)
+        assert env.unwrapped.get_action_meanings()[1] == "FIRE"
         assert len(env.unwrapped.get_action_meanings()) >= 3
 
     def reset(self, **kwargs):
         self.env.reset(**kwargs)
-        obs, _, done, _ = self.env.step(1)
-        if done:
+        obs, _, terminated, truncated, _ = self.env.step(1)
+        if terminated or truncated:
             self.env.reset(**kwargs)
-        obs, _, done, _ = self.env.step(2)
-        if done:
+        obs, _, terminated, truncated, _ = self.env.step(2)
+        if terminated or truncated:
             self.env.reset(**kwargs)
-        return obs
-
-    def step(self, ac):
-        return self.env.step(ac)
+        return obs, {}
 
 
 class EpisodicLifeEnv(gym.Wrapper):
@@ -95,38 +86,25 @@ def __init__(self, env):
 
         This wrapper should come before any reward-modifying wrappers, so the score is maintained.
         """
-        gym.Wrapper.__init__(self, env)
+        super().__init__(env)
         self.lives = 0
         self.was_real_done = True
         self.real_episode_return = 0
 
     def step(self, action):
-        obs, reward, done, info = self.env.step(action)
-        self.was_real_done = done
-        self.real_episode_return += reward
-        episode_return_to_report = None
-
+        obs, reward, terminated, truncated, info = self.env.step(action)
+        self.was_real_done = terminated or truncated
         # check current lives, make loss of life terminal,
         # then update lives to handle bonus lives
-        lives = self.env.unwrapped.ale.lives()
-        if lives < self.lives and lives > 0:
+        lives = self.env.unwrapped.ale.lives()  # type: ignore[attr-defined]
+        if 0 < lives < self.lives:
             # for Qbert sometimes we stay in lives == 0 condition for a few frames
-            # so it's important to keep lives > 0, so that we only reset once
+            # so its important to keep lives > 0, so that we only reset once
             # the environment advertises done.
-            done = True
-
-        if self.was_real_done:
-            episode_return_to_report = self.real_episode_return
-            self.real_episode_return = 0
-
-        # Since the consumer of the env has no ability to tell a "real" done from a fake one, put the real return
-        # on the info object (or a dummy placeholder to tell the caller to wait for it), but ensure we're not
-        # overwriting anything.
-        assert "episode_return" not in info, "Attempting to overwrite an existing episode return."
-        info["episode_return"] = episode_return_to_report
-
+            terminated = True
         self.lives = lives
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
+
 
     def reset(self, **kwargs):
         """Reset only when lives are exhausted.
@@ -134,50 +112,58 @@ def reset(self, **kwargs):
         and the learner need not know about any of this behind-the-scenes.
         """
         if self.was_real_done:
-            obs = self.env.reset(**kwargs)
+            obs, info = self.env.reset(**kwargs)
         else:
             # no-op step to advance from terminal/lost life state
-            obs, _, _, _ = self.env.step(0)
+            obs, _, terminated, truncated, info = self.env.step(0)
+
+            # The no-op step can lead to a game over, so we need to check it again
+            # to see if we should reset the environment and avoid the
+            # monitor.py `RuntimeError: Tried to step environment that needs reset`
+            if terminated or truncated:
+                obs, info = self.env.reset(**kwargs)
         self.lives = self.env.unwrapped.ale.lives()
-        return obs
+        return obs, info
 
 
 class MaxAndSkipEnv(gym.Wrapper):
     def __init__(self, env, skip=4):
         """Return only every `skip`-th frame"""
-        gym.Wrapper.__init__(self, env)
+        super().__init__(env)
         # most recent raw observations (for max pooling across time steps)
-        self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8)
-        self._skip       = skip
+        assert env.observation_space.dtype is not None, "No dtype specified for the observation space"
+        assert env.observation_space.shape is not None, "No shape defined for the observation space"
+        self._obs_buffer = np.zeros((2, *env.observation_space.shape), dtype=env.observation_space.dtype)
+        self._skip = skip
 
     def step(self, action):
         """Repeat action, sum reward, and max over last observations."""
         total_reward = 0.0
-        done = None
+        terminated = truncated = False
         for i in range(self._skip):
-            obs, reward, done, info = self.env.step(action)
-            if i == self._skip - 2: self._obs_buffer[0] = obs
-            if i == self._skip - 1: self._obs_buffer[1] = obs
-            total_reward += reward
+            obs, reward, terminated, truncated, info = self.env.step(action)
+            done = terminated or truncated
+            if i == self._skip - 2:
+                self._obs_buffer[0] = obs
+            if i == self._skip - 1:
+                self._obs_buffer[1] = obs
+            total_reward += float(reward)
             if done:
                 break
         # Note that the observation on the done=True frame
         # doesn't matter
         max_frame = self._obs_buffer.max(axis=0)
 
-        return max_frame, total_reward, done, info
-
-    def reset(self, **kwargs):
-        return self.env.reset(**kwargs)
+        return max_frame, total_reward, terminated, truncated, info
 
 
 class ClipRewardEnv(gym.RewardWrapper):
     def __init__(self, env):
-        gym.RewardWrapper.__init__(self, env)
+        super().__init__(env)
 
     def reward(self, reward):
         """Bin reward to {+1, 0, -1} by its sign."""
-        return np.sign(reward)
+        return np.sign(float(reward))
 
 
 class WarpFrame(gym.ObservationWrapper):
@@ -257,32 +243,32 @@ def __init__(self, env, k):
 
         See Also: LazyFrames
         """
-        gym.Wrapper.__init__(self, env)
+        super().__init__(env)
         self.k = k
         self.frames = deque([], maxlen=k)
         shp = env.observation_space.shape
         self.observation_space = spaces.Box(low=env.observation_space.low.min(), high=env.observation_space.high.max(),
                                             shape=(k, *shp), dtype=env.observation_space.dtype)
 
-    def reset(self):
-        ob = self.env.reset()
+    def reset(self, **kwargs):
+        observation, info = self.env.reset(**kwargs)
         for _ in range(self.k):
-            self.frames.append(ob)
-        return self._get_ob()
+            self.frames.append(observation)
+        return self._get_obs(), info
 
     def step(self, action):
-        ob, reward, done, info = self.env.step(action)
-        self.frames.append(ob)
-        return self._get_ob(), reward, done, info
+        observation, reward, terminated, truncated, info = self.env.step(action)
+        self.frames.append(observation)
+        return self._get_obs(), reward, terminated, truncated, info
 
-    def _get_ob(self):
+    def _get_obs(self):
         assert len(self.frames) == self.k
         return LazyFrames(list(self.frames))
 
 
 class ScaledFloatFrame(gym.ObservationWrapper):
     def __init__(self, env):
-        gym.ObservationWrapper.__init__(self, env)
+        super().__init__(env)
         self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
 
     def observation(self, observation):
@@ -340,18 +326,21 @@ def to_tensor(self):
 
 
 class TimeLimit(gym.Wrapper):
+    """
+        Ref: https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/wrappers/time_limit.py
+    """
     def __init__(self, env, max_episode_steps=None):
-        super(TimeLimit, self).__init__(env)
+        super().__init__(env)
         self._max_episode_steps = max_episode_steps
         self._elapsed_steps = 0
 
-    def step(self, ac):
-        observation, reward, done, info = self.env.step(ac)
+    def step(self, action):
+        observation, reward, terminated, truncated, info = self.env.step(action)
         self._elapsed_steps += 1
         if self._elapsed_steps >= self._max_episode_steps:
             done = True
             info['TimeLimit.truncated'] = True
-        return observation, reward, done, info
+        return observation, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
         self._elapsed_steps = 0
@@ -378,7 +367,7 @@ class ImageToPyTorch(gym.ObservationWrapper):
     # For now switching this to return a Tensor and calling it *before* FrameStack...
 
     def __init__(self, env):
-        super(ImageToPyTorch, self).__init__(env)
+        super().__init__(env)
         old_shape = self.observation_space.shape
         self.observation_space = gym.spaces.Box(
             low=0,
@@ -405,5 +394,4 @@ def __init__(self, env, seeds):
 
     def reset(self):
         seed = np.random.choice(self._seeds)
-        self._env.seed(int(seed))
-        return self._env.reset()
+        return self._env.reset(seed=seed)
diff --git a/continual_rl/utils/utils.py b/continual_rl/utils/utils.py
index d2d1618d..d43e67d4 100644
--- a/continual_rl/utils/utils.py
+++ b/continual_rl/utils/utils.py
@@ -1,7 +1,7 @@
 import logging
 import tempfile
 import types
-import gym
+import gymnasium as gym
 import numpy as np
 import random
 import torch
diff --git a/setup.py b/setup.py
index 6ad31824..bc367345 100644
--- a/setup.py
+++ b/setup.py
@@ -8,16 +8,21 @@
     author_email='snpowers@cs.cmu.edu',
     packages=find_packages(),
     py_modules=['continual_rl.available_policies', 'continual_rl.experiment_specs'],
-    install_requires=['setuptools==59.5.0',
+    install_requires=['setuptools',
                       'uuid',
                       'numpy',
                       'tensorboard',
                       'torch-ac',
-                      'gym[atari]<=0.25.2',
-                      'atari-py==0.2.5',
-                      'moviepy',
+                      'gymnasium[atari]',
+                      'gymnasium[accept-rom-license]',
                       'dotmap',
                       'psutil',
                       'opencv-python'
+                      # NOTE: More recent versions can't seem to save single color channel 
+                      #       images. This means when tensorboard goes to save images for 
+                      #       videos, imageio 'ValueError: Can't write images with one 
+                      #       color channel' is thrown.
+                      'moviepy',
+                      'imageio<=2.24.0'
                     ]
 )

From 4b6e32177c81fb934b0745f96a6822a0492b3c7c Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Wed, 5 Jul 2023 17:45:22 -0400
Subject: [PATCH 02/15] Attempting to update tests/ and continue updating repo
 as bugs arise.

---
 .../environment_runners/environment_runner_batch.py    |  2 ++
 .../experiments/environment_runners/parallel_env.py    | 10 +++++-----
 continual_rl/policies/play/play_policy.py              |  2 +-
 tests/common_mocks/mock_preprocessor.py                |  2 +-
 .../test_environment_runner_batch.py                   |  6 +++---
 .../test_environment_runner_sync.py                    |  6 +++---
 6 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/continual_rl/experiments/environment_runners/environment_runner_batch.py b/continual_rl/experiments/environment_runners/environment_runner_batch.py
index f722c34f..99379eb4 100644
--- a/continual_rl/experiments/environment_runners/environment_runner_batch.py
+++ b/continual_rl/experiments/environment_runners/environment_runner_batch.py
@@ -44,6 +44,8 @@ def _initialize_envs(self, env_spec, preprocessor):
         # Initialize the observation time-batch with n of the first observation.
         results = self._parallel_env.reset()
         raw_observations, infos = list(results)
+        from pdb import set_trace
+        set_trace()
         processed_observations = self._preprocess_raw_observations(preprocessor, raw_observations)
         return processed_observations
 
diff --git a/continual_rl/experiments/environment_runners/parallel_env.py b/continual_rl/experiments/environment_runners/parallel_env.py
index 1a52529d..7157b6a0 100644
--- a/continual_rl/experiments/environment_runners/parallel_env.py
+++ b/continual_rl/experiments/environment_runners/parallel_env.py
@@ -25,7 +25,7 @@
 
 
 from multiprocessing import Process, Pipe
-import gym
+import gymnasium as gym
 import cloudpickle
 from continual_rl.utils.utils import Utils
 
@@ -41,12 +41,12 @@ def worker(conn, env_spec, output_dir):
     while True:
         cmd, data = conn.recv()
         if cmd == "step":
-            obs, reward, done, info = env.step(data)
+            obs, reward, terminated, truncated, info = env.step(data)
             if done:
-                obs = env.reset()
-            conn.send((obs, reward, done, info))
+                obs, info = env.reset()
+            conn.send((obs, reward, terminated, truncated, info))
         elif cmd == "reset":
-            obs = env.reset()
+            obs, info = env.reset()
             conn.send(obs)
         elif cmd == "kill":
             env.close()
diff --git a/continual_rl/policies/play/play_policy.py b/continual_rl/policies/play/play_policy.py
index b1a7e6e6..d7b2d280 100644
--- a/continual_rl/policies/play/play_policy.py
+++ b/continual_rl/policies/play/play_policy.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 import time
 from continual_rl.policies.policy_base import PolicyBase
 from continual_rl.policies.play.play_policy_config import PlayPolicyConfig
diff --git a/tests/common_mocks/mock_preprocessor.py b/tests/common_mocks/mock_preprocessor.py
index 1e78e687..b9b0371a 100644
--- a/tests/common_mocks/mock_preprocessor.py
+++ b/tests/common_mocks/mock_preprocessor.py
@@ -1,5 +1,5 @@
 from continual_rl.experiments.tasks.preprocessor_base import PreprocessorBase
-from gym.spaces.box import Box
+from gymnasium.spaces.box import Box
 import torch
 
 
diff --git a/tests/experiments/environment_runners/test_environment_runner_batch.py b/tests/experiments/environment_runners/test_environment_runner_batch.py
index 76d1ecb7..e29cc83c 100644
--- a/tests/experiments/environment_runners/test_environment_runner_batch.py
+++ b/tests/experiments/environment_runners/test_environment_runner_batch.py
@@ -20,14 +20,14 @@ def seed(self, seed):
 
     def reset(self):
         self.reset_count += 1
-        return np.array([0, 1, 2])
+        return np.array([0, 1, 2]), {"info": "unused"}
 
     def step(self, action):
         self.actions_executed.append(action)
         observation = np.array([12, 13, 14])
         reward = 1.5
-        done = action == 4  # Simple way to force the done state we want
-        return observation, reward, done, {"info": "unused"}
+        terminated = action == 4  # Simple way to force the done state we want
+        return observation, reward, terminated, False, {"info": "unused"}
 
     def close(self):
         pass
diff --git a/tests/experiments/environment_runners/test_environment_runner_sync.py b/tests/experiments/environment_runners/test_environment_runner_sync.py
index 6b634ea3..bc2e5c07 100644
--- a/tests/experiments/environment_runners/test_environment_runner_sync.py
+++ b/tests/experiments/environment_runners/test_environment_runner_sync.py
@@ -19,14 +19,14 @@ def seed(self, seed):
 
     def reset(self):
         self.reset_count += 1
-        return np.array([0, 1, 2])
+        return np.array([0, 1, 2]), {"info": "unused"}
 
     def step(self, action):
         self.actions_executed.append(action)
         observation = np.array([12, 13, 14])
         reward = 1.5
-        done = action == 4  # Simple way to force the done state we want
-        return observation, reward, done, {"info": "unused"}
+        terminated = action == 4  # Simple way to force the done state we want
+        return observation, reward, terminated, False, {"info": "unused"}
 
     def close(self):
         pass

From 7186d8c108744f2e6c4aa7ecf72f2a52d8983959 Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Wed, 5 Jul 2023 18:07:17 -0400
Subject: [PATCH 03/15] Updated tests/ code to work with gymnasium. Now passing
 all tests.

---
 .../environment_runners/environment_runner_batch.py           | 2 --
 continual_rl/experiments/environment_runners/parallel_env.py  | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/continual_rl/experiments/environment_runners/environment_runner_batch.py b/continual_rl/experiments/environment_runners/environment_runner_batch.py
index 99379eb4..f722c34f 100644
--- a/continual_rl/experiments/environment_runners/environment_runner_batch.py
+++ b/continual_rl/experiments/environment_runners/environment_runner_batch.py
@@ -44,8 +44,6 @@ def _initialize_envs(self, env_spec, preprocessor):
         # Initialize the observation time-batch with n of the first observation.
         results = self._parallel_env.reset()
         raw_observations, infos = list(results)
-        from pdb import set_trace
-        set_trace()
         processed_observations = self._preprocess_raw_observations(preprocessor, raw_observations)
         return processed_observations
 
diff --git a/continual_rl/experiments/environment_runners/parallel_env.py b/continual_rl/experiments/environment_runners/parallel_env.py
index 7157b6a0..d0ea640c 100644
--- a/continual_rl/experiments/environment_runners/parallel_env.py
+++ b/continual_rl/experiments/environment_runners/parallel_env.py
@@ -42,12 +42,12 @@ def worker(conn, env_spec, output_dir):
         cmd, data = conn.recv()
         if cmd == "step":
             obs, reward, terminated, truncated, info = env.step(data)
-            if done:
+            if terminated:
                 obs, info = env.reset()
             conn.send((obs, reward, terminated, truncated, info))
         elif cmd == "reset":
             obs, info = env.reset()
-            conn.send(obs)
+            conn.send((obs, info))
         elif cmd == "kill":
             env.close()
             return

From 7f005ab0d64c4edd1d138459a4d7d1a02daaab74 Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Thu, 6 Jul 2023 09:54:58 -0400
Subject: [PATCH 04/15] Updated setup and MIT license.

---
 continual_rl/utils/env_wrappers.py |  7 ++++++-
 setup.py                           | 15 ++++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/continual_rl/utils/env_wrappers.py b/continual_rl/utils/env_wrappers.py
index e0d59eb5..7c3fc8d3 100644
--- a/continual_rl/utils/env_wrappers.py
+++ b/continual_rl/utils/env_wrappers.py
@@ -1,6 +1,8 @@
 # The MIT License
 
+# Copyright (c) 2017 OpenAI (http://openai.com)
 # Copyright (c) 2019 Antonin Raffin
+# Copyright (c) 2022 Farama Foundation
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +23,10 @@
 # THE SOFTWARE.
 
 # Taken from
-#  https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/atari_wrappers.html
+# https://raw.githubusercontent.com/openai/baselines/9b68103b737ac46bc201dfb3121cfa5df2127e53/baselines/common/wrappers.py
+# https://raw.githubusercontent.com/openai/baselines/7c520852d9cf4eaaad326a3d548efc915dc60c10/baselines/common/atari_wrappers.py
+# https://github.com/facebookresearch/torchbeast/blob/542c607cfe4adbc1967c213e8c248f29b13b64b6/torchbeast/atari_wrappers.py
+# https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/atari_wrappers.html
 # and slightly modified.
 
 import numpy as np
diff --git a/setup.py b/setup.py
index bc367345..6f3067e8 100644
--- a/setup.py
+++ b/setup.py
@@ -17,12 +17,13 @@
                       'gymnasium[accept-rom-license]',
                       'dotmap',
                       'psutil',
-                      'opencv-python'
-                      # NOTE: More recent versions can't seem to save single color channel 
-                      #       images. This means when tensorboard goes to save images for 
-                      #       videos, imageio 'ValueError: Can't write images with one 
-                      #       color channel' is thrown.
-                      'moviepy',
-                      'imageio<=2.24.0'
+                      'opencv-python',
+                      # NOTE: More recent versions of imageio can't seem to save single 
+                      #       color channel images. This means when tensorboard goes to 
+                      #       save images for videos, imageio 'ValueError: Can't write images
+                      #        with one color channel' is thrown. Root issue is with moviepy
+                      #        but installing older imageio version seems to get around it.
+                      'imageio==2.24.0',
+                      'moviepy'
                     ]
 )

From 8e4f358e4422fc596b26b1deed7a21330cfed7c4 Mon Sep 17 00:00:00 2001
From: bpoole <bpoole16@uncc.edu>
Date: Fri, 15 Dec 2023 13:48:05 -0500
Subject: [PATCH 05/15] Fixed NumPy warnings and added histrogram tracking.

---
 .../experiments/environment_runners/environment_runner_batch.py | 2 +-
 continual_rl/experiments/tasks/task_base.py                     | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/continual_rl/experiments/environment_runners/environment_runner_batch.py b/continual_rl/experiments/environment_runners/environment_runner_batch.py
index 177445f2..c7d53741 100644
--- a/continual_rl/experiments/environment_runners/environment_runner_batch.py
+++ b/continual_rl/experiments/environment_runners/environment_runner_batch.py
@@ -25,7 +25,7 @@ def __init__(self, policy, num_parallel_envs, timesteps_per_collection, render_c
         self._parallel_env = None
         self._last_observations = None  # To allow returning mid-episode
         self._last_timestep_data = None  # Always stores the last thing seen, even across "dones"
-        self._cumulative_rewards = np.array([0 for _ in range(num_parallel_envs)], dtype=np.float)
+        self._cumulative_rewards = np.array([0 for _ in range(num_parallel_envs)], dtype=np.float64)
 
         # Used to determine what to save off to logs and when
         self._observations_to_render = []
diff --git a/continual_rl/experiments/tasks/task_base.py b/continual_rl/experiments/tasks/task_base.py
index b62f0f82..fef38737 100644
--- a/continual_rl/experiments/tasks/task_base.py
+++ b/continual_rl/experiments/tasks/task_base.py
@@ -67,6 +67,8 @@ def _report_log(self, summary_writer, log, run_id, default_timestep):
             summary_writer.add_video(tag, value, global_step=timestep)
         elif type == "scalar":
             summary_writer.add_scalar(tag, value, global_step=timestep)
+        elif type == "histogram":
+            summary_writer.add_histogram(tag, value, global_step=timestep)
         elif type == "image":
             summary_writer.add_image(tag, value, global_step=timestep)
 

From 39cb8fc81a9590dae03889b8bcbf8f7fb87f6d31 Mon Sep 17 00:00:00 2001
From: bpoole <bpoole16@uncc.edu>
Date: Thu, 15 Aug 2024 15:23:32 -0400
Subject: [PATCH 06/15] Removed minihack wrapper (outdated). Only closes batch
 runner when object is not None.

---
 .../environment_runners/environment_runner_batch.py            | 3 ++-
 continual_rl/experiments/tasks/make_minihack_task.py           | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/continual_rl/experiments/environment_runners/environment_runner_batch.py b/continual_rl/experiments/environment_runners/environment_runner_batch.py
index c7d53741..a7768e3b 100644
--- a/continual_rl/experiments/environment_runners/environment_runner_batch.py
+++ b/continual_rl/experiments/environment_runners/environment_runner_batch.py
@@ -171,4 +171,5 @@ def collect_data(self, task_spec):
         return num_timesteps, [per_timestep_data], returns_to_report, logs_to_report
 
     def cleanup(self, task_spec):
-        self._parallel_env.close()
+        if self._parallel_env is not None:
+            self._parallel_env.close()
diff --git a/continual_rl/experiments/tasks/make_minihack_task.py b/continual_rl/experiments/tasks/make_minihack_task.py
index 6a7e1194..95d37a9f 100644
--- a/continual_rl/experiments/tasks/make_minihack_task.py
+++ b/continual_rl/experiments/tasks/make_minihack_task.py
@@ -73,7 +73,7 @@ def make_minihack(
         savedir=savedir,
         **kwargs,
     )  # each env specifies its own self._max_episode_steps
-    env = MiniHackMakeVecSafeWrapper(env)
+    # env = MiniHackMakeVecSafeWrapper(env)
     env = MiniHackObsWrapper(env)
     return env
 

From ec712a2734e2f334be4549c7522e81bd31de50ff Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Mon, 24 Mar 2025 10:15:49 -0400
Subject: [PATCH 07/15] Removed atari-pi, no longer needed.

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6ad31824..647dd828 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,6 @@
                       'tensorboard',
                       'torch-ac',
                       'gym[atari]<=0.25.2',
-                      'atari-py==0.2.5',
                       'moviepy',
                       'dotmap',
                       'psutil',

From 4f93f38d4c477dd5817b264dd0d50e4d48a6c603 Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Mon, 31 Mar 2025 11:48:04 -0400
Subject: [PATCH 08/15] Added ability to set more variables for TaskBase when
 using ImageTask.

---
 continual_rl/experiments/tasks/image_task.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/continual_rl/experiments/tasks/image_task.py b/continual_rl/experiments/tasks/image_task.py
index 03217a9f..922381a1 100644
--- a/continual_rl/experiments/tasks/image_task.py
+++ b/continual_rl/experiments/tasks/image_task.py
@@ -38,9 +38,11 @@ def render_episode(self, episode_observations):
 
 class ImageTask(TaskBase):
     def __init__(self, task_id, action_space_id, env_spec, num_timesteps, time_batch_size, eval_mode,
-                 image_size, grayscale, continual_eval=True, resize_interp_method="INTER_AREA"):
+                 image_size, grayscale, continual_eval=True, resize_interp_method="INTER_AREA",
+                 continual_eval_num_returns=10, rolling_return_count=100):
         preprocessor = ImagePreprocessor(time_batch_size, image_size, grayscale, env_spec, resize_interp_method)
         dummy_env, _ = Utils.make_env(preprocessor.env_spec)
 
         super().__init__(task_id, action_space_id, preprocessor, preprocessor.env_spec, preprocessor.observation_space,
-                         dummy_env.action_space, num_timesteps, eval_mode, continual_eval=continual_eval)
+                         dummy_env.action_space, num_timesteps, eval_mode, continual_eval=continual_eval, 
+                         rolling_return_count=rolling_return_count, continual_eval_num_returns=continual_eval_num_returns)

From d2404053596bd1be9837f8eabcc70004f8d9ef0e Mon Sep 17 00:00:00 2001
From: Ben <bpoole16@uncc.edu>
Date: Mon, 31 Mar 2025 12:02:57 -0400
Subject: [PATCH 09/15] Update action_run_tests.yml

Updated from v2 to v4 to hopefully fix workflow error.
---
 .github/workflows/action_run_tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/action_run_tests.yml b/.github/workflows/action_run_tests.yml
index 993d504d..4083a1d9 100644
--- a/.github/workflows/action_run_tests.yml
+++ b/.github/workflows/action_run_tests.yml
@@ -15,9 +15,9 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Cache conda
-      uses: actions/cache@v2
+      uses: actions/cache@v4
       env:
         # Increase this value to reset cache if environment.yml has not changed
         CACHE_NUMBER: 0
@@ -25,7 +25,7 @@ jobs:
         path: ~/conda_pkgs_dir
         key:
           ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{hashFiles('environment.yml') }}
-    - uses: conda-incubator/setup-miniconda@v2
+    - uses: conda-incubator/setup-miniconda@v4
       with:
         auto-update-conda: true
         activate-environment: venv_continual_rl

From 251a1dc3de5f85e125a2c51d31d9a9ed90c6923e Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Mon, 31 Mar 2025 12:05:55 -0400
Subject: [PATCH 10/15] Updated github workflows.

---
 .github/workflows/action_run_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/action_run_tests.yml b/.github/workflows/action_run_tests.yml
index 4083a1d9..254c0763 100644
--- a/.github/workflows/action_run_tests.yml
+++ b/.github/workflows/action_run_tests.yml
@@ -25,7 +25,7 @@ jobs:
         path: ~/conda_pkgs_dir
         key:
           ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{hashFiles('environment.yml') }}
-    - uses: conda-incubator/setup-miniconda@v4
+    - uses: conda-incubator/setup-miniconda@v3
       with:
         auto-update-conda: true
         activate-environment: venv_continual_rl

From dd9c1ee78c3c40dd29e2cf33d75d26da70eb1841 Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Mon, 31 Mar 2025 12:11:50 -0400
Subject: [PATCH 11/15] Updated requirements to auto accept ALE ROMs for atari
 games.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 647dd828..9619eed0 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@
                       'numpy',
                       'tensorboard',
                       'torch-ac',
-                      'gym[atari]<=0.25.2',
+                      'gym[atari,accept-rom-license]<=0.25.2',
                       'moviepy',
                       'dotmap',
                       'psutil',

From 1a7eaf1d3188766ebc8e9c8e4d98d369b34a2ca2 Mon Sep 17 00:00:00 2001
From: bpoole16 <bpoole604@gmail.com>
Date: Tue, 1 Apr 2025 15:20:42 -0400
Subject: [PATCH 12/15] Disabled WrapFrame in wrap_deepmind for atari as
 ImageTask already does this. As such, env was wrapped twice in WrapFrame.

---
 continual_rl/experiments/tasks/make_atari_task.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/continual_rl/experiments/tasks/make_atari_task.py b/continual_rl/experiments/tasks/make_atari_task.py
index 0b50fe63..76ff5695 100644
--- a/continual_rl/experiments/tasks/make_atari_task.py
+++ b/continual_rl/experiments/tasks/make_atari_task.py
@@ -24,14 +24,15 @@ def make_atari(env_id, max_episode_steps=None, full_action_space=False):
     return env
 
 
-def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False):
+def wrap_deepmind(env, episode_life=True, wrap=True, clip_rewards=True, frame_stack=False, scale=False):
     """Configure environment for DeepMind-style Atari.
     """
     if episode_life:
         env = EpisodicLifeEnv(env)
     if 'FIRE' in env.unwrapped.get_action_meanings():
         env = FireResetEnv(env)
-    env = WarpFrame(env)
+    if wrap:
+        env = WarpFrame(env)
     if scale:
         env = ScaledFloatFrame(env)
     if clip_rewards:
@@ -52,8 +53,9 @@ def get_single_atari_task(task_id, action_space_id, env_name, num_timesteps, max
         env_spec=lambda: wrap_deepmind(
             make_atari(env_name, max_episode_steps=max_episode_steps, full_action_space=full_action_space),
             clip_rewards=False,  # If policies need to clip rewards, they should handle it themselves
-            frame_stack=False,  # Handled separately
+            frame_stack=False,  # Added by image task
             scale=False,
+            wrap=False, # Added by image task
         ),
         num_timesteps=num_timesteps,
         time_batch_size=4,

From 08564da842c2f01b85c2d4770d619e0e0802b8ea Mon Sep 17 00:00:00 2001
From: bpoole <bpoole16@charlotte.edu>
Date: Mon, 21 Apr 2025 14:13:54 -0400
Subject: [PATCH 13/15] Updated exp specs for new atari registration for
 gymnasium.

- Updated tests for new atari registration as well.
---
 continual_rl/experiment_specs.py                             | 5 +++++
 tests/common_mocks/mock_preprocessor.py                      | 4 ++--
 .../policies/discrete_random/test_discrete_random_policy.py  | 4 ++--
 tests/policies/impala/test_impala_policy.py                  | 2 +-
 tests/policies/ppo/test_ppo_policy.py                        | 2 +-
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/continual_rl/experiment_specs.py b/continual_rl/experiment_specs.py
index 8d91d47d..4e34a927 100644
--- a/continual_rl/experiment_specs.py
+++ b/continual_rl/experiment_specs.py
@@ -1,3 +1,8 @@
+import gymnasium as gym
+import ale_py
+
+gym.register_envs(ale_py) 
+
 from continual_rl.experiments.experiment import Experiment
 from continual_rl.experiments.tasks.make_atari_task import get_single_atari_task
 from continual_rl.experiments.tasks.make_procgen_task import get_single_procgen_task
diff --git a/tests/common_mocks/mock_preprocessor.py b/tests/common_mocks/mock_preprocessor.py
index b9b0371a..a602bbcf 100644
--- a/tests/common_mocks/mock_preprocessor.py
+++ b/tests/common_mocks/mock_preprocessor.py
@@ -1,7 +1,7 @@
 from continual_rl.experiments.tasks.preprocessor_base import PreprocessorBase
 from gymnasium.spaces.box import Box
 import torch
-
+import numpy as np
 
 class MockPreprocessor(PreprocessorBase):
     def __init__(self):
@@ -9,7 +9,7 @@ def __init__(self):
         super().__init__(observation_space)
 
     def preprocess(self, observation):
-        return torch.Tensor(observation)
+        return torch.Tensor(np.array(observation))
 
     def render_episode(self, episode_observations):
         """
diff --git a/tests/policies/discrete_random/test_discrete_random_policy.py b/tests/policies/discrete_random/test_discrete_random_policy.py
index 2e096b56..09ceb4bf 100644
--- a/tests/policies/discrete_random/test_discrete_random_policy.py
+++ b/tests/policies/discrete_random/test_discrete_random_policy.py
@@ -16,7 +16,7 @@ def test_end_to_end_batch(self, set_tmp_directory, cleanup_experiment, request):
         # Arrange
         experiment = Experiment(tasks=[
             ImageTask(task_id="some_id", action_space_id=0,
-                      env_spec='BreakoutDeterministic-v4',
+                      env_spec='ale_py:BreakoutDeterministic-v4',
                       num_timesteps=10, time_batch_size=4, eval_mode=False,
                       image_size=[84, 84], grayscale=True)
         ])
@@ -45,7 +45,7 @@ def test_end_to_end_sync(self, set_tmp_directory, cleanup_experiment, request):
         # Arrange
         experiment = Experiment(tasks=[
             ImageTask(task_id="end_to_end_sync", action_space_id=0,
-                      env_spec='BreakoutDeterministic-v4',
+                      env_spec='ale_py:BreakoutDeterministic-v4',
                       num_timesteps=10, time_batch_size=4, eval_mode=False,
                       image_size=[84, 84], grayscale=True)
         ])
diff --git a/tests/policies/impala/test_impala_policy.py b/tests/policies/impala/test_impala_policy.py
index 6ae5b09c..434732e3 100644
--- a/tests/policies/impala/test_impala_policy.py
+++ b/tests/policies/impala/test_impala_policy.py
@@ -22,7 +22,7 @@ def test_end_to_end_batch(self, set_tmp_directory, cleanup_experiment, request):
         experiment = Experiment(
             tasks=[
                 ImageTask(task_id="some_id", action_space_id=0,
-                          env_spec='BreakoutDeterministic-v4',
+                          env_spec='ale_py:BreakoutDeterministic-v4',
                           num_timesteps=10, time_batch_size=4, eval_mode=False,
                           image_size=[84, 84], grayscale=True)])
         config = ImpalaPolicyConfig()
diff --git a/tests/policies/ppo/test_ppo_policy.py b/tests/policies/ppo/test_ppo_policy.py
index 718abc52..c6e880f9 100644
--- a/tests/policies/ppo/test_ppo_policy.py
+++ b/tests/policies/ppo/test_ppo_policy.py
@@ -19,7 +19,7 @@ def test_end_to_end_batch(self, set_tmp_directory, cleanup_experiment, request):
         experiment = Experiment(
             tasks=[
                 ImageTask(task_id="end_to_end_batch", action_space_id=0,
-                          env_spec='BreakoutDeterministic-v4',
+                          env_spec='ale_py:BreakoutDeterministic-v4',
                           num_timesteps=10, time_batch_size=4, eval_mode=False,
                           image_size=[84, 84], grayscale=True)])
         config = PPOPolicyConfig()

From 7bfa5b031c3ce49132b3285a0b857a7d0220daa9 Mon Sep 17 00:00:00 2001
From: bpoole <bpoole16@charlotte.edu>
Date: Tue, 22 Apr 2025 15:48:58 -0400
Subject: [PATCH 14/15] Updated reset for wrappers to support taking kwargs.

---
 continual_rl/experiments/tasks/make_minihack_task.py | 6 +++---
 continual_rl/utils/env_wrappers.py                   | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/continual_rl/experiments/tasks/make_minihack_task.py b/continual_rl/experiments/tasks/make_minihack_task.py
index 501ac65d..9924a1f7 100644
--- a/continual_rl/experiments/tasks/make_minihack_task.py
+++ b/continual_rl/experiments/tasks/make_minihack_task.py
@@ -28,11 +28,11 @@ def step(self, action: int):
         os.chdir(self.basedir)
         return x
 
-    def reset(self):
+    def reset(self, **kwargs):
         os.chdir(self.env.env._vardir)
-        x = self.env.reset()
+        x, info = self.env.reset(**kwargs)
         os.chdir(self.basedir)
-        return x
+        return x, info
 
     def close(self):
         os.chdir(self.env.env._vardir)
diff --git a/continual_rl/utils/env_wrappers.py b/continual_rl/utils/env_wrappers.py
index 7c3fc8d3..7f30eb9f 100644
--- a/continual_rl/utils/env_wrappers.py
+++ b/continual_rl/utils/env_wrappers.py
@@ -74,14 +74,14 @@ def __init__(self, env):
         assert len(env.unwrapped.get_action_meanings()) >= 3
 
     def reset(self, **kwargs):
-        self.env.reset(**kwargs)
+        _, info = self.env.reset(**kwargs)
         obs, _, terminated, truncated, _ = self.env.step(1)
         if terminated or truncated:
-            self.env.reset(**kwargs)
+             _, info = self.env.reset(**kwargs)
         obs, _, terminated, truncated, _ = self.env.step(2)
         if terminated or truncated:
-            self.env.reset(**kwargs)
-        return obs, {}
+             _, info = self.env.reset(**kwargs)
+        return obs, info
 
 
 class EpisodicLifeEnv(gym.Wrapper):

From bfc269dd26639a8c1653b957a846ca637e188037 Mon Sep 17 00:00:00 2001
From: bpoole <bpoole16@charlotte.edu>
Date: Mon, 28 Apr 2025 12:30:02 -0400
Subject: [PATCH 15/15] Reworked EpisodicLifeEnv wrapper to return
 was_real_done.

- When agent losses all lives then was_real_done is returned as true on
reset only. If true, a normal reset is done. If False, no real reset is
done and the game continues.
---
 continual_rl/utils/env_wrappers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/continual_rl/utils/env_wrappers.py b/continual_rl/utils/env_wrappers.py
index 7f30eb9f..ea893e29 100644
--- a/continual_rl/utils/env_wrappers.py
+++ b/continual_rl/utils/env_wrappers.py
@@ -108,9 +108,9 @@ def step(self, action):
             # the environment advertises done.
             terminated = True
         self.lives = lives
+        # print(self.was_real_done, terminated, lives, self.lives)
         return obs, reward, terminated, truncated, info
 
-
     def reset(self, **kwargs):
         """Reset only when lives are exhausted.
         This way all states are still reachable even though lives are episodic,
@@ -128,6 +128,7 @@ def reset(self, **kwargs):
             if terminated or truncated:
                 obs, info = self.env.reset(**kwargs)
         self.lives = self.env.unwrapped.ale.lives()
+        info['was_real_done'] = self.was_real_done
         return obs, info