Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/action_run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Cache conda
uses: actions/cache@v2
uses: actions/cache@v4
env:
# Increase this value to reset cache if environment.yml has not changed
CACHE_NUMBER: 0
with:
path: ~/conda_pkgs_dir
key:
${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{hashFiles('environment.yml') }}
- uses: conda-incubator/setup-miniconda@v2
- uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
activate-environment: venv_continual_rl
Expand Down
5 changes: 5 additions & 0 deletions continual_rl/experiment_specs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import gymnasium as gym
import ale_py

gym.register_envs(ale_py)

from continual_rl.experiments.experiment import Experiment
from continual_rl.experiments.tasks.make_atari_task import get_single_atari_task
from continual_rl.experiments.tasks.make_procgen_task import get_single_procgen_task
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def __init__(self, policy, num_parallel_envs, timesteps_per_collection, render_c
self._parallel_env = None
self._last_observations = None # To allow returning mid-episode
self._last_timestep_data = None # Always stores the last thing seen, even across "dones"
self._cumulative_rewards = np.array([0 for _ in range(num_parallel_envs)], dtype=np.float)
# NOTE: np.float is deprecated in numpy 1.24.4
self._cumulative_rewards = np.array([0 for _ in range(num_parallel_envs)], dtype=float)

# Used to determine what to save off to logs and when
self._observations_to_render = []
Expand All @@ -41,7 +42,8 @@ def _initialize_envs(self, env_spec, preprocessor):
self._parallel_env = ParallelEnv(env_specs, self._output_dir)

# Initialize the observation time-batch with n of the first observation.
raw_observations = self._parallel_env.reset()
results = self._parallel_env.reset()
raw_observations, infos = list(results)
processed_observations = self._preprocess_raw_observations(preprocessor, raw_observations)
return processed_observations

Expand All @@ -50,11 +52,11 @@ def _reset_env(self, env_id):
ParallelEnv doesn't readily expose manually resetting an environment, so doing that here.
"""
if env_id == 0:
observation = self._parallel_env.envs[0].reset()
observation, _ = self._parallel_env.envs[0].reset()
else:
local = self._parallel_env.locals[env_id-1]
local.send(("reset", None))
observation = local.recv()
observation, _ = local.recv()

return observation

Expand Down Expand Up @@ -119,8 +121,9 @@ def collect_data(self, task_spec):

# ParallelEnv automatically resets the env and returns the new observation when a "done" occurs
result = self._parallel_env.step(actions)
raw_observations, rewards, dones, infos = list(result)

raw_observations, rewards, terminated, truncated, infos = list(result)
dones = np.logical_or(terminated, truncated)

self._total_timesteps += self._num_parallel_envs
self._last_timestep_data = timestep_data
processed_observations = self._preprocess_raw_observations(preprocessor, raw_observations)
Expand Down Expand Up @@ -171,4 +174,5 @@ def collect_data(self, task_spec):
return num_timesteps, [per_timestep_data], returns_to_report, logs_to_report

def cleanup(self, task_spec):
self._parallel_env.close()
if self._parallel_env is not None:
self._parallel_env.close()
24 changes: 12 additions & 12 deletions continual_rl/experiments/environment_runners/parallel_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


from multiprocessing import Process, Pipe
import gym
import gymnasium as gym
import cloudpickle
from continual_rl.utils.utils import Utils

Expand All @@ -41,13 +41,13 @@ def worker(conn, env_spec, output_dir):
while True:
cmd, data = conn.recv()
if cmd == "step":
obs, reward, done, info = env.step(data)
if done:
obs = env.reset()
conn.send((obs, reward, done, info))
obs, reward, terminated, truncated, info = env.step(data)
if terminated:
obs, info = env.reset()
conn.send((obs, reward, terminated, truncated, info))
elif cmd == "reset":
obs = env.reset()
conn.send(obs)
obs, info = env.reset()
conn.send((obs, info))
elif cmd == "kill":
env.close()
return
Expand Down Expand Up @@ -90,16 +90,16 @@ def __del__(self):
def reset(self):
for local in self.locals:
local.send(("reset", None))
results = [self._local_env.reset()] + [local.recv() for local in self.locals]
results = zip(*[self._local_env.reset()] + [local.recv() for local in self.locals])
return results

def step(self, actions):
for local, action in zip(self.locals, actions[1:]):
local.send(("step", action))
obs, reward, done, info = self._local_env.step(actions[0])
if done:
obs = self._local_env.reset()
results = zip(*[(obs, reward, done, info)] + [local.recv() for local in self.locals])
obs, reward, terminated, truncated, info = self._local_env.step(actions[0])
if terminated or truncated:
obs, _ = self._local_env.reset()
results = zip(*[(obs, reward, terminated, truncated, info)] + [local.recv() for local in self.locals])
return results

def render(self):
Expand Down
8 changes: 5 additions & 3 deletions continual_rl/experiments/tasks/image_task.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
import torchvision
from gym.spaces.box import Box
from gymnasium.spaces.box import Box
from continual_rl.experiments.tasks.task_base import TaskBase
from continual_rl.experiments.tasks.preprocessor_base import PreprocessorBase
from continual_rl.utils.utils import Utils
Expand Down Expand Up @@ -38,9 +38,11 @@ def render_episode(self, episode_observations):

class ImageTask(TaskBase):
def __init__(self, task_id, action_space_id, env_spec, num_timesteps, time_batch_size, eval_mode,
image_size, grayscale, continual_eval=True, resize_interp_method="INTER_AREA"):
image_size, grayscale, continual_eval=True, resize_interp_method="INTER_AREA",
continual_eval_num_returns=10, rolling_return_count=100):
preprocessor = ImagePreprocessor(time_batch_size, image_size, grayscale, env_spec, resize_interp_method)
dummy_env, _ = Utils.make_env(preprocessor.env_spec)

super().__init__(task_id, action_space_id, preprocessor, preprocessor.env_spec, preprocessor.observation_space,
dummy_env.action_space, num_timesteps, eval_mode, continual_eval=continual_eval)
dummy_env.action_space, num_timesteps, eval_mode, continual_eval=continual_eval,
rolling_return_count=rolling_return_count, continual_eval_num_returns=continual_eval_num_returns)
10 changes: 6 additions & 4 deletions continual_rl/experiments/tasks/make_atari_task.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym

from continual_rl.utils.env_wrappers import (
NoopResetEnv,
Expand All @@ -24,14 +24,15 @@ def make_atari(env_id, max_episode_steps=None, full_action_space=False):
return env


def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False):
def wrap_deepmind(env, episode_life=True, wrap=True, clip_rewards=True, frame_stack=False, scale=False):
"""Configure environment for DeepMind-style Atari.
"""
if episode_life:
env = EpisodicLifeEnv(env)
if 'FIRE' in env.unwrapped.get_action_meanings():
env = FireResetEnv(env)
env = WarpFrame(env)
if wrap:
env = WarpFrame(env)
if scale:
env = ScaledFloatFrame(env)
if clip_rewards:
Expand All @@ -52,8 +53,9 @@ def get_single_atari_task(task_id, action_space_id, env_name, num_timesteps, max
env_spec=lambda: wrap_deepmind(
make_atari(env_name, max_episode_steps=max_episode_steps, full_action_space=full_action_space),
clip_rewards=False, # If policies need to clip rewards, they should handle it themselves
frame_stack=False, # Handled separately
frame_stack=False, # Added by image task
scale=False,
wrap=False, # Added by image task
),
num_timesteps=num_timesteps,
time_batch_size=4,
Expand Down
10 changes: 5 additions & 5 deletions continual_rl/experiments/tasks/make_minihack_task.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym
import numpy as np
import os

Expand Down Expand Up @@ -28,11 +28,11 @@ def step(self, action: int):
os.chdir(self.basedir)
return x

def reset(self):
def reset(self, **kwargs):
os.chdir(self.env.env._vardir)
x = self.env.reset()
x, info = self.env.reset(**kwargs)
os.chdir(self.basedir)
return x
return x, info

def close(self):
os.chdir(self.env.env._vardir)
Expand Down Expand Up @@ -73,7 +73,7 @@ def make_minihack(
savedir=savedir,
**kwargs,
) # each env specifies its own self._max_episode_steps
env = MiniHackMakeVecSafeWrapper(env)
# env = MiniHackMakeVecSafeWrapper(env)
env = MiniHackObsWrapper(env)
return env

Expand Down
2 changes: 1 addition & 1 deletion continual_rl/experiments/tasks/make_procgen_task.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym

from .image_task import ImageTask

Expand Down
2 changes: 1 addition & 1 deletion continual_rl/experiments/tasks/minigrid_task.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
import numpy as np
import gym_minigrid # Needed for Utils.make_env
import gym
import gymnasium as gym
from continual_rl.experiments.tasks.task_base import TaskBase
from continual_rl.experiments.tasks.preprocessor_base import PreprocessorBase
from continual_rl.utils.utils import Utils
Expand Down
2 changes: 2 additions & 0 deletions continual_rl/experiments/tasks/task_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def _report_log(self, summary_writer, log, run_id, default_timestep):
summary_writer.add_video(tag, value, global_step=timestep)
elif type == "scalar":
summary_writer.add_scalar(tag, value, global_step=timestep)
elif type == "histogram":
summary_writer.add_histogram(tag, value, global_step=timestep)
elif type == "image":
summary_writer.add_image(tag, value, global_step=timestep)

Expand Down
2 changes: 1 addition & 1 deletion continual_rl/policies/play/play_policy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym
import time
from continual_rl.policies.policy_base import PolicyBase
from continual_rl.policies.play.play_policy_config import PlayPolicyConfig
Expand Down
Loading