diff --git a/clean_pufferl.py b/clean_pufferl.py
index 20da4c0246..96bd3264e6 100644
--- a/clean_pufferl.py
+++ b/clean_pufferl.py
@@ -1,885 +1,684 @@
-from pdb import set_trace as T
-import numpy as np
+# TODO: Add information
+# - Help menu
+# - Docs link
+#python -m torch.distributed.run --standalone --nnodes=1 --nproc-per-node=1 clean_pufferl.py --env puffer_nmmo3 --mode train
+#from torch.distributed.elastic.multiprocessing.errors import record
+#@record
 
 import os
-import random
-import psutil
+import glob
+import ast
 import time
-
+import random
+import shutil
+import argparse
+import configparser
 from threading import Thread
 from collections import defaultdict, deque
-from contextlib import nullcontext
 
-import rich
-from rich.console import Console
-from rich.table import Table
+import numpy as np
+import psutil
+
 import torch
-import torch.distributed as dist
-from torch.utils.cpp_extension import load
+import torch.distributed
+import torch.utils.cpp_extension
 
 import pufferlib
-import pufferlib.utils
+import pufferlib.sweep
+import pufferlib.vector
 import pufferlib.pytorch
+from pufferlib import _C
 
-def create(config, vecenv, policy, optimizer=None, wandb=None, neptune=None):
-    random.seed(config.seed)
-    np.random.seed(config.seed)
-    torch.backends.cudnn.deterministic = config.torch_deterministic
-    torch.backends.cudnn.benchmark = True
-    torch.set_float32_matmul_precision('high')
-    if config.seed is not None:
-        torch.manual_seed(config.seed)
-
-    ext = 'cu' if 'cuda' in config.device else 'cpp'
-    puffer_cuda = load(
-        name='puffer_cuda',
-        sources=[f'pufferlib.{ext}'],
-        verbose=True
-    )
-    compute_gae = puffer_cuda.compute_gae
-    compute_vtrace = puffer_cuda.compute_vtrace
-    compute_puff_advantage = puffer_cuda.compute_puff_advantage
-
-    losses = pufferlib.namespace(
-        policy_loss=0,
-        value_loss=0,
-        entropy=0,
-        old_approx_kl=0,
-        approx_kl=0,
-        clipfrac=0,
-        explained_variance=0,
-        diayn_loss=0,
-        grad_var=0,
-        importance=0,
-    )
+import rich
+import rich.traceback
+from rich.table import Table
+from rich.console import Console
+from rich_argparse import RichHelpFormatter
+rich.traceback.install(show_locals=False)
+
+class CleanPuffeRL:
+    def __init__(self, config, vecenv, policy, neptune=False, wandb=False):
+        # Backend perf optimization
+        torch.set_float32_matmul_precision('high')
+        torch.backends.cudnn.deterministic = config['torch_deterministic']
+        torch.backends.cudnn.benchmark = True
+
+        # Reproducibility
+        seed = config['seed']
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+
+        # Vecenv info
+        vecenv.async_reset(seed)
+        obs_space = vecenv.single_observation_space
+        atn_space = vecenv.single_action_space
+        total_agents = vecenv.num_agents
+        self.total_agents = total_agents
+
+        # Experience
+        if config['batch_size'] == 'auto' and config['bptt_horizon'] == 'auto':
+            raise pufferlib.APIUsageError('Must specify batch_size or bptt_horizon')
+        elif config['batch_size'] == 'auto':
+            config['batch_size'] = total_agents * config['bptt_horizon']
+        elif config['bptt_horizon'] == 'auto':
+            config['bptt_horizon'] = config['batch_size'] // total_agents
+
+        batch_size = config['batch_size']
+        horizon = config['bptt_horizon']
+        segments = batch_size // horizon
+        self.segments = segments
+        if total_agents > segments:
+            raise pufferlib.APIUsageError(
+                f'Total agents {total_agents} <= segments {segments}'
+            )
 
-    utilization = Utilization()
-    msg = f'Model Size: {abbreviate(count_params(policy))} parameters'
-
-    vecenv.async_reset(config.seed)
-    total_agents = vecenv.num_agents
-    obs_shape = vecenv.single_observation_space.shape
-    atn_shape = vecenv.single_action_space.shape
-    obs_dtype = pufferlib.pytorch.numpy_to_torch_dtype_dict[vecenv.single_observation_space.dtype]
-    atn_dtype = pufferlib.pytorch.numpy_to_torch_dtype_dict[vecenv.single_action_space.dtype]
-    on_policy_rows = config.batch_size // config.bptt_horizon
-    off_policy_rows = int(config.replay_factor*config.batch_size // config.bptt_horizon)
-    experience_rows = on_policy_rows + off_policy_rows
-    pin = config.device == 'cuda' and config.cpu_offload
-    obs_device = config.device if not pin else 'cpu'
-    experience = pufferlib.namespace(
-        obs=torch.zeros(experience_rows, config.bptt_horizon, *obs_shape,
-            dtype=obs_dtype, pin_memory=pin, device='cpu' if pin else config.device),
-        actions=torch.zeros(experience_rows, config.bptt_horizon, *atn_shape,
-            dtype=atn_dtype, device=config.device),
-        logprobs=torch.zeros(experience_rows, config.bptt_horizon, device=config.device),
-        rewards=torch.zeros(experience_rows, config.bptt_horizon, device=config.device),
-        dones=torch.zeros(experience_rows, config.bptt_horizon, device=config.device),
-        truncateds=torch.zeros(experience_rows, config.bptt_horizon, device=config.device),
-        ratio = torch.ones(experience_rows, config.bptt_horizon, device=config.device),
-    )
-    ep_uses = torch.zeros(experience_rows, device=config.device, dtype=torch.int32)
-    ep_lengths = torch.zeros(total_agents, device=config.device, dtype=torch.int32)
-    ep_indices = torch.arange(total_agents, device=config.device, dtype=torch.int32)
-    free_idx = total_agents
-    assert free_idx <= experience_rows, f'Total agents {total_agents} must be at least batch size {config.batch_size} / bptt_horizon {config.bptt_horizon} = {experience_rows}'
-
-    diayn_skills = None
-    if config.use_diayn:
-        diayn_skills = torch.randint(
-            0, config.diayn_archive, (total_agents,), dtype=torch.long, device=config.device)
-        experience.diayn_batch = torch.zeros(experience_rows, config.bptt_horizon,
-            dtype=torch.long, device=config.device)
-
-    if config.use_p3o:
-        batch_size = config.batch_size
-        p3o_horizon = config.p3o_horizon
-        device = config.device
-        experience.values_mean=torch.zeros(batch_size, p3o_horizon, device=device)
-        experience.values_std=torch.zeros(batch_size, p3o_horizon, device=device)
-        experience.reward_block = torch.zeros(batch_size, p3o_horizon, dtype=torch.float32, device=device)
-        experience.mask_block = torch.ones(batch_size, p3o_horizon, dtype=torch.float32, device=device)
-        experience.buf = torch.zeros(batch_size, p3o_horizon, dtype=torch.float32, device=device)
-        experience.advantages = torch.zeros(batch_size, dtype=torch.float32, device=device)
-        experience.bounds = torch.zeros(batch_size, dtype=torch.int32, device=device)
-        experience.vstd_max = 1.0
-    else:
-        experience.values = torch.zeros(experience_rows, config.bptt_horizon, device=config.device)
-
-    if config.use_vtrace or config.use_puff_advantage:
-        experience.importance = torch.ones(experience_rows, config.bptt_horizon, device=config.device)
-
-    lstm_h = None
-    lstm_c = None
-    # TODO: This breaks compile
-    if isinstance(policy, torch.nn.LSTM):
-        assert total_agents > 0
-        if config.env_batch_size > 1:
-            shape = (total_agents, policy.hidden_size)
-            lstm_h = torch.zeros(shape).to(config.device)
-            lstm_c = torch.zeros(shape).to(config.device)
-        else:
-            # TODO: Doesn't exist in native envs
+        device = config['device']
+        self.observations = torch.zeros(segments, horizon, *obs_space.shape,
+            dtype=pufferlib.pytorch.numpy_to_torch_dtype_dict[obs_space.dtype],
+            pin_memory=device == 'cuda' and config['cpu_offload'],
+            device='cpu' if config['cpu_offload'] else device)
+        self.actions = torch.zeros(segments, horizon, *atn_space.shape, device=device,
+            dtype=pufferlib.pytorch.numpy_to_torch_dtype_dict[atn_space.dtype])
+        self.values = torch.zeros(segments, horizon, device=device)
+        self.logprobs = torch.zeros(segments, horizon, device=device)
+        self.rewards = torch.zeros(segments, horizon, device=device)
+        self.terminals = torch.zeros(segments, horizon, device=device)
+        self.truncations = torch.zeros(segments, horizon, device=device)
+        self.ratio = torch.ones(segments, horizon, device=device)
+        self.importance = torch.ones(segments, horizon, device=device)
+        self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32)
+        self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32)
+        self.free_idx = total_agents
+
+        # LSTM
+        if config['use_rnn']:
             n = vecenv.agents_per_batch
-            shape = (n, policy.hidden_size)
-            lstm_h = {slice(i*n, (i+1)*n):torch.zeros(shape).to(config.device) for i in range(total_agents//n)}
-            lstm_c = {slice(i*n, (i+1)*n):torch.zeros(shape).to(config.device) for i in range(total_agents//n)}
-
-    minibatch_size = min(config.minibatch_size, config.max_minibatch_size)
-    uncompiled_policy = policy
-    if config.compile:
-        policy = torch.compile(policy, mode=config.compile_mode, fullgraph=config.compile_fullgraph)
-
-    if config.optimizer == 'adam':
-        optimizer = torch.optim.Adam(
-            policy.parameters(),
-            lr=config.learning_rate,
-            betas=(config.adam_beta1, config.adam_beta2),
-            eps=config.adam_eps,
-        )
-    elif config.optimizer == 'muon':
-        from heavyball import ForeachMuon
-        import heavyball.utils
-        #heavyball.utils.compile_mode = "reduce-overhead"
-        optimizer = ForeachMuon(
-            policy.parameters(),
-            lr=config.learning_rate,
-            betas=(config.adam_beta1, config.adam_beta2),
-            eps=config.adam_eps,
-
-        )
-    elif config.optimizer == 'kron':
-        from heavyball import ForeachPSGDKron
-        import heavyball.utils
-        #heavyball.utils.compile_mode = "reduce-overhead"
-        optimizer = ForeachPSGDKron(
-            policy.parameters(),
-            lr=config.learning_rate,
-            precond_lr=config.precond_lr,
-            beta=config.adam_beta1,
-        )
-    else:
-        raise ValueError(f'Unknown optimizer: {config.optimizer}')
-
-    epochs = config.total_timesteps // config.batch_size
-    assert config.scheduler in ('linear', 'cosine')
-    if config.scheduler == 'linear':
-        scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.0, total_iters=epochs)
-    elif config.scheduler == 'cosine':
-        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
-
-    amp_context = nullcontext()
-    scaler = None
-    if config.precision != 'float32':
-        amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, config.precision))
-        scaler = torch.amp.GradScaler()
-
-    profile = Profile(['eval', 'env', 'eval_forward', 'eval_copy', 'eval_misc', 'train', 'train_forward',
-        'learn', 'train_copy', 'train_misc', 'custom'], frequency=5)
-
-    data = pufferlib.namespace(
-        config=config,
-        vecenv=vecenv,
-        policy=policy,
-        uncompiled_policy=uncompiled_policy,
-        optimizer=optimizer,
-        scheduler=scheduler,
-        scaler=scaler,
-        experience=experience,
-        profile=profile,
-        losses=losses,
-        wandb=wandb,
-        neptune=neptune,
-        global_step=0,
-        epoch=0,
-        stats=defaultdict(list),
-        msg=msg,
-        last_log_time=0,
-        utilization=utilization,
-        use_p3o=config.use_p3o,
-        p3o_horizon=config.p3o_horizon,
-        puf=config.puf,
-        use_diayn=config.use_diayn,
-        diayn_coef=config.diayn_coef,
-        # Do we use these?
-        ptr=0,
-        step=0,
-        lstm_h=lstm_h,
-        lstm_c=lstm_c,
-        ep_uses=ep_uses,
-        ep_lengths=ep_lengths,
-        ep_indices=ep_indices,
-        free_idx=free_idx,
-        on_policy_rows=on_policy_rows,
-        off_policy_rows=off_policy_rows,
-        experience_rows=experience_rows,
-        device=config.device,
-        minibatch_size=minibatch_size,
-        compute_gae=compute_gae,
-        compute_vtrace=compute_vtrace,
-        compute_puff_advantage=compute_puff_advantage,
-        diayn_skills=diayn_skills,
-        total_agents=total_agents,
-        total_epochs=epochs,
-        start_time=time.time(),
-        uptime=0,
-    )
-    print_dashboard(data, clear=True)
-    return data
-
-def evaluate(data):
-    profile = data.profile
-    epoch = data.epoch
-    profile('eval', epoch)
-    profile('eval_misc', epoch, nest=True)
-    config = data.config
-    experience = data.experience
-    policy = data.policy
-    infos = defaultdict(list)
-    lstm_h = data.lstm_h
-    lstm_c = data.lstm_c
-
-    data.full_rows = 0
-    while data.full_rows < data.on_policy_rows:
-        profile('env', epoch)
-        o, r, d, t, info, env_id, mask = data.vecenv.recv()
-
-        profile('eval_misc', epoch)
-        # Zero-copy indexing for contiguous env_id
-        if config.env_batch_size == 1:
-            gpu_env_id = cpu_env_id = slice(env_id[0], env_id[-1] + 1)
-        else:
-            cpu_env_id = env_id
-            gpu_env_id = torch.as_tensor(env_id).to(config.device, non_blocking=True)
-
-        done_mask = d + t
-        data.global_step += mask.sum()
-
-        profile('eval_copy', epoch)
-        o = torch.as_tensor(o)
-        o_device = o.to(config.device, non_blocking=True)
-        r = torch.as_tensor(r).to(config.device, non_blocking=True)
-        d = torch.as_tensor(d).to(config.device, non_blocking=True)
-
-        h = None
-        c = None
-        if lstm_h is not None:
-            h = lstm_h[gpu_env_id]
-            c = lstm_c[gpu_env_id]
-
-        profile('eval_forward', epoch)
-        with torch.no_grad():
-            state = pufferlib.namespace(
-                reward=r,
-                done=d,
-                env_id=gpu_env_id,
-                mask=mask,
-                lstm_h=h,
-                lstm_c=c,
+            h = policy.hidden_size
+            self.lstm_h = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)}
+            self.lstm_c = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)}
+
+        # Minibatching & gradient accumulation
+        minibatch_size = config['minibatch_size']
+        max_minibatch_size = config['max_minibatch_size']
+        self.minibatch_size = min(minibatch_size, max_minibatch_size)
+        if minibatch_size > max_minibatch_size and minibatch_size % max_minibatch_size != 0:
+            raise pufferlib.APIUsageError(
+                f'minibatch_size {minibatch_size} > max_minibatch_size {max_minibatch_size} must divide evenly')
+
+        self.accumulate_minibatches = max(1, minibatch_size // max_minibatch_size)
+        self.total_minibatches = int(config['update_epochs'] * batch_size / self.minibatch_size)
+        self.minibatch_segments = self.minibatch_size // horizon 
+        if self.minibatch_segments * horizon != self.minibatch_size:
+            raise pufferlib.APIUsageError(
+                f'minibatch_size {self.minibatch_size} must be divisible by bptt_horizon {horizon}'
             )
 
-            if data.use_diayn:
-                state.diayn_z = data.diayn_skills[env_id]
+        # Torch compile
+        self.uncompiled_policy = policy
+        self.policy = policy
+        if config['compile']:
+            self.policy = torch.compile(policy, mode=config['compile_mode'], fullgraph=config['compile_fullgraph'])
 
-            logits, value = policy(o_device, state)
-            action, logprob, _ = pufferlib.pytorch.sample_logits(logits, is_continuous=policy.is_continuous)
-            r = torch.clamp(r, -1, 1)
 
-        profile('eval_copy', epoch)
-        with torch.no_grad():
-            if lstm_h is not None:
-                lstm_h[gpu_env_id] = state.lstm_h
-                lstm_c[gpu_env_id] = state.lstm_c
-
-            o = o if config.cpu_offload else o_device
-            actions = store(data, state, o, value, action, logprob, r, d, gpu_env_id, mask)
-
-        profile('eval_misc', epoch)
-        for i in info:
-            for k, v in pufferlib.utils.unroll_nested_dict(i):
-                infos[k].append(v)
-
-        profile('env', epoch)
-        data.vecenv.send(actions)
-
-    profile('eval_misc', epoch)
-    for k, v in infos.items():
-        if '_map' in k:
-            if data.wandb is not None:
-                data.stats[f'Media/{k}'] = data.wandb.Image(v[0])
-                continue
-            elif data.neptune is not None:
-                # TODO: Add neptune image logging
-                pass
-
-        if isinstance(v, np.ndarray):
-            v = v.tolist()
-        try:
-            iter(v)
-        except TypeError:
-            data.stats[k].append(v)
-        else:
-            data.stats[k] += v
-
-    data.free_idx = data.total_agents
-    data.ep_indices = torch.arange(data.total_agents, device=config.device, dtype=torch.int32)
-    data.ep_lengths.zero_()
-    data.ep_uses.zero_()
-    profile.end()
-    return data.stats, infos
-
-def train(data):
-    profile = data.profile
-    epoch = data.epoch
-    profile('train', epoch)
-    config = data.config
-    experience = data.experience
-    losses = data.losses
-
-    total_minibatches = int(config.update_epochs*config.batch_size/data.minibatch_size)
-    accumulate_minibatches = max(1, config.minibatch_size // config.max_minibatch_size)
-    n_samples = data.minibatch_size // config.bptt_horizon
-    for mb in range(total_minibatches):
-        profile('train_misc', epoch, nest=True)
-        loss = 0
-        if config.use_p3o:
-            # Note: This function gets messed up by computing across
-            # episode bounds. Because we store experience in a flat buffer,
-            # bounds can be crossed even after handling dones. This prevent
-            # our method from scaling to longer horizons. TODO: Redo the way
-            # we store experience to avoid this issue
-            vstd_min = experience.values_std.min().item()
-            vstd_max = experience.values_std.max().item()
-
-            data.mask_block.zero_()
-            data.buf.zero_()
-            data.reward_block.zero_()
-            data.bounds.zero_()
-
-            r_mean = experience.rewards.mean().item()
-            r_std = experience.rewards.std().item()
-
-            # TODO: Rename vstd to r_std
-            advantages = compute_advantages(
-                experience.reward_block, experience.mask_block,
-                experience.values_mean, experience.values_std,
-                experience.buf, experience.dones, experience.rewards,
-                experience.bounds, r_std, data.puf, config.p3o_horizon
+        # Optimizer
+        if config['optimizer'] == 'adam':
+            optimizer = torch.optim.Adam(
+                self.policy.parameters(),
+                lr=config['learning_rate'],
+                betas=(config['adam_beta1'], config['adam_beta2']),
+                eps=config['adam_eps'],
+            )
+        elif config['optimizer'] == 'muon':
+            from heavyball import ForeachMuon
+            import heavyball.utils
+            heavyball.utils.compile_mode = config['compile_mode'] if config['compile'] else None
+            optimizer = ForeachMuon(
+                self.policy.parameters(),
+                lr=config['learning_rate'],
+                betas=(config['adam_beta1'], config['adam_beta2']),
+                eps=config['adam_eps'],
             )
-
-            horizon = torch.where(experience.values_std[0] > 0.95*r_std)[0]
-            horizon = horizon[0].item()+1 if len(horizon) else 1
-            if horizon < 16:
-                horizon = 16
-
-            advantages = advantages.cpu().numpy()
-            torch.cuda.synchronize()
-        elif config.use_vtrace:
-            importance = advantages = torch.zeros(experience.values.shape, device=config.device).to(config.device)
-            vs = torch.zeros(experience.values.shape, device=config.device)
-            data.compute_vtrace(experience.values, experience.rewards, experience.dones,
-                experience.ratio, vs, advantages, config.gamma, config.vtrace_rho_clip, config.vtrace_c_clip)
-        elif config.use_puff_advantage:
-            importance = advantages = torch.zeros(experience.values.shape, device=config.device).to(config.device)
-            vs = torch.zeros(experience.values.shape, device=config.device)
-            data.compute_puff_advantage(experience.values, experience.rewards, experience.dones,
-                experience.ratio, vs, advantages, config.gamma, config.gae_lambda, config.vtrace_rho_clip, config.vtrace_c_clip)
         else:
-            importance = advantages = data.compute_gae(experience.values, experience.rewards,
-                experience.dones, config.gamma, config.gae_lambda)
-
-        profile('train_copy', epoch)
-        batch = sample(data, importance, n_samples)
-
-        profile('train_misc', epoch)
-        state = pufferlib.namespace(
-            action=batch.actions,
-            lstm_h=None,
-            lstm_c=None,
-        )
+            raise ValueError(f'Unknown optimizer: {config["optimizer"]}')
+
+        self.optimizer = optimizer
+
+        # Learning rate scheduler
+        epochs = config['total_timesteps'] // config['batch_size']
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
+        self.total_epochs = epochs
+
+        # Automatic mixed precision
+        precision = config['precision']
+        self.amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, precision))
+        if precision not in ('float32', 'bfloat16'):
+            raise pufferlib.APIUsageError(f'Invalid precision: {precision}: use float32 or bfloat16')
+
+        # Logging
+        self.neptune = neptune
+        self.wandb = wandb
+        if neptune:
+            self.neptune = init_neptune(args, tag=config['tag'])
+            self.run_id = self.neptune._sys_id
+            for k, v in pufferlib.unroll_nested_dict(args):
+                self.neptune[k].append(v)
+        elif wandb:
+            self.wandb = init_wandb(args, tag=config['tag'])
+            self.run_id = self.wandb.run.id
+        else:
+            self.run_id = str(int(random.random() * 1e8))
+
+        # Initializations
+        self.config = config
+        self.vecenv = vecenv
+        self.epoch = 0
+        self.global_step = 0
+        self.last_log_step = 0
+        self.last_log_time = time.time()
+        self.start_time = time.time()
+        self.utilization = Utilization()
+        self.profile = Profile()
+        self.stats = defaultdict(list)
+        self.last_stats = defaultdict(list)
+        self.losses = {}
+
+        # Dashboard
+        self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad)
+        self.print_dashboard(clear=True)
+
+    @property
+    def uptime(self):
+        return time.time() - self.start_time
+
+    @property
+    def sps(self):
+        if self.global_step == self.last_log_step:
+            return 0
+
+        return (self.global_step - self.last_log_step) / (time.time() - self.last_log_time)
+
+    def evaluate(self):
+        profile = self.profile
+        epoch = self.epoch
+        profile('eval', epoch)
+        profile('eval_misc', epoch, nest=True)
+
+        config = self.config
+        device = config['device']
+
+        self.full_rows = 0
+        while self.full_rows < self.segments:
+            profile('env', epoch)
+            o, r, d, t, info, env_id, mask = self.vecenv.recv()
+
+            profile('eval_misc', epoch)
+            # TODO: Port to vecenv
+            env_id = slice(env_id[0], env_id[-1] + 1)
+
+            # TODO: Handle truncations
+            done_mask = d + t
+            self.global_step += mask.sum()
+
+            o = torch.as_tensor(o)
+            o = o.pin_memory()
+            profile('eval_copy', epoch)
+            o_device = o.to(device, non_blocking=True)
+            profile('eval_misc', epoch)
+            r = torch.as_tensor(r).to(device, non_blocking=True)
+            d = torch.as_tensor(d).to(device, non_blocking=True)
+
+            profile('eval_forward', epoch)
+            with torch.no_grad(), self.amp_context:
+                state = dict(
+                    reward=r,
+                    done=d,
+                    env_id=env_id,
+                    mask=mask,
+                )
+
+                if config['use_rnn']:
+                    state['lstm_h'] = self.lstm_h[env_id.start]
+                    state['lstm_c'] = self.lstm_c[env_id.start]
+
+                logits, value = self.policy(o_device, state)
+                action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
+                r = torch.clamp(r, -1, 1)
+
+            profile('eval_copy', epoch)
+            with torch.no_grad():
+                if config['use_rnn']:
+                    self.lstm_h[env_id.start] = state['lstm_h']
+                    self.lstm_c[env_id.start] = state['lstm_c']
+
+                # Fast path for fully vectorized envs
+                l = self.ep_lengths[env_id.start].item()
+                batch_rows = slice(self.ep_indices[env_id.start].item(), 1+self.ep_indices[env_id.stop - 1].item())
+
+                if config['cpu_offload']:
+                    self.observations[batch_rows, l] = o
+                else:
+                    self.observations[batch_rows, l] = o_device
+
+                self.actions[batch_rows, l] = action
+                self.logprobs[batch_rows, l] = logprob
+                self.rewards[batch_rows, l] = r
+                self.terminals[batch_rows, l] = d.float()
+                self.values[batch_rows, l] = value.flatten()
+
+                # TODO: Handle masks!!
+                #indices = np.where(mask)[0]
+                #data.ep_lengths[env_id[mask]] += 1
+                self.ep_lengths[env_id] += 1
+                if l+1 >= config['bptt_horizon']:
+                    num_full = env_id.stop - env_id.start
+                    self.ep_indices[env_id] = self.free_idx + torch.arange(num_full, device=config['device']).int()
+                    self.ep_lengths[env_id] = 0
+                    self.free_idx += num_full
+                    self.full_rows += num_full
+
+                action = action.squeeze(-1).cpu().numpy()
+                if isinstance(logits, torch.distributions.Normal):
+                    action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high)
+
+            profile('eval_misc', epoch)
+            for i in info:
+                for k, v in pufferlib.unroll_nested_dict(i):
+                    if isinstance(v, np.ndarray):
+                        v = v.tolist()
+                    elif isinstance(v, (list, tuple)):
+                        self.stats[k].extend(v)
+                    else:
+                        self.stats[k].append(v)
+
+            profile('env', epoch)
+            self.vecenv.send(action)
 
-        if config.use_diayn:
-            state.diayn_z = batch.diayn_z.reshape(-1)
+        profile('eval_misc', epoch)
+        self.free_idx = self.total_agents
+        self.ep_indices = torch.arange(self.total_agents, device=device, dtype=torch.int32)
+        self.ep_lengths.zero_()
+        profile.end()
+        return self.stats
+
+    def train(self):
+        profile = self.profile
+        epoch = self.epoch
+        profile('train', epoch)
+        losses = defaultdict(float)
+        config = self.config
+        device = config['device']
+
+        b0 = config['prio_beta0']
+        a = config['prio_alpha']
+        clip_coef = config['clip_coef']
+        vf_clip = config['vf_clip_coef']
+        anneal_beta = b0 + (1 - b0)*a*self.epoch/self.total_epochs
+        self.ratio[:] = 1
+
+        for mb in range(self.total_minibatches):
+            profile('train_misc', epoch, nest=True)
+            self.amp_context.__enter__()
+
+            # TODO: Eliminate
+            shape = self.values.shape
+            n = (shape[0]//256)*256
+            advantages = torch.zeros(shape, device=device)
+            torch.ops.pufferlib.compute_puff_advantage(self.values[:n], self.rewards[:n],
+                self.terminals[:n], self.ratio[:n], advantages[:n], config['gamma'],
+                config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip'])
+
+            profile('train_copy', epoch)
+            adv = advantages.abs().sum(axis=1)
+            prio_weights = torch.nan_to_num(adv**a, 0, 0, 0)
+            prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6)
+            idx = torch.multinomial(prio_probs, self.minibatch_segments)
+            mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta
+            mb_obs = self.observations[idx]
+            mb_actions = self.actions[idx]
+            mb_logprobs = self.logprobs[idx]
+            mb_rewards = self.rewards[idx]
+            mb_terminals = self.terminals[idx]
+            mb_truncations = self.truncations[idx]
+            mb_ratio = self.ratio[idx]
+            mb_values = self.values[idx]
+            mb_returns = advantages[idx] + mb_values
+            mb_advantages = advantages[idx]
+
+            profile('train_forward', epoch)
+            if not config['use_rnn']:
+                mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape)
+
+            state = dict(
+                action=mb_actions,
+                lstm_h=None,
+                lstm_c=None,
+            )
 
-        profile('train_forward', epoch)
-        if not isinstance(data.policy, torch.nn.LSTM):
-            batch.obs = batch.obs.reshape(-1, *data.vecenv.single_observation_space.shape)
+            # TODO: Currently only returning traj shaped value as a hack
+            logits, newvalue = self.policy.forward_train(mb_obs, state)
+            # TODO: Redundant actions?
+            actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions)
 
-        # TODO: Currently only returning traj shaped value as a hack
-        logits, newvalue = data.policy.forward_train(batch.obs, state)
-        actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits,
-            action=batch.actions, is_continuous=data.policy.is_continuous)
+            profile('train_misc', epoch)
+            newlogprob = newlogprob.reshape(mb_logprobs.shape)
+            logratio = newlogprob - mb_logprobs
+            ratio = logratio.exp()
+            self.ratio[idx] = ratio # TODO: Experiment with this
 
-        profile('train_misc', epoch)
-        if config.use_diayn:
-            N = 1
-            batch_logits = state.batch_logits[:, ::N]
-            batch_logits = torch.nn.functional.log_softmax(batch_logits, dim=-1)
-            mask = torch.nn.functional.one_hot(batch.actions[:, ::N], batch_logits.shape[-1]).bool()
-            #batch_logits = mask*batch_logits
-            batch_logits = batch_logits.view(batch_logits.shape[0], -1)
-            diayn_policy = data.policy.policy
-            q = diayn_policy.discrim_forward(batch_logits)
-            z_idxs = batch.diayn_z[:, 0]
-            q = q.view(-1, q.shape[-1])
-            diayn_loss = torch.nn.functional.cross_entropy(q, z_idxs)
-            loss += config.diayn_loss_coef*diayn_loss
-
-        newlogprob = newlogprob.reshape(batch.logprobs.shape)
-        logratio = newlogprob - batch.logprobs
-        ratio = logratio.exp()
-        experience.ratio[batch.idx] = ratio
-
-        # TODO: Only do this if we are KL clipping? Saves 1-2% compute
-        with torch.no_grad():
-            # calculate approx_kl http://joschu.net/blog/kl-approx.html
-            old_approx_kl = (-logratio).mean()
-            approx_kl = ((ratio - 1) - logratio).mean()
-            clipfrac = ((ratio - 1.0).abs() > config.clip_coef).float().mean()
-
-        if config.use_vtrace or config.use_puff_advantage:
+            # TODO: Only do this if we are KL clipping? Saves 1-2% compute
             with torch.no_grad():
-                adv = advantages[batch.idx]
-                vs = vs[batch.idx]
-                if config.use_vtrace:
-                    data.compute_vtrace(batch.values, batch.rewards, batch.dones,
-                        ratio, vs, adv, config.gamma, config.vtrace_rho_clip, config.vtrace_c_clip)
-                elif config.use_puff_advantage:
-                    data.compute_puff_advantage(batch.values, batch.rewards, batch.dones,
-                        ratio, vs, adv, config.gamma, config.gae_lambda, config.vtrace_rho_clip, config.vtrace_c_clip)
-
-                #advantages[batch.idx] = adv
-                #importance[batch.idx] = adv
-
-        adv = batch.advantages
-        if config.norm_adv:
-            adv = (adv - adv.mean()) / (adv.std() + 1e-8)
-
-        adv = adv * batch.prio
-
-        # Policy loss
-        pg_loss1 = -adv * ratio
-        pg_loss2 = -adv * torch.clamp(
-            ratio, 1 - config.clip_coef, 1 + config.clip_coef
-        )
-        pg_loss = torch.max(pg_loss1, pg_loss2).mean()
-
-        # Value loss
-        if config.use_p3o:
-            newvalue_mean = newvalue.mean.view(-1, config.p3o_horizon)
-            newvalue_std = newvalue.std.view(-1, config.p3o_horizon)
-            newvalue_var = torch.square(newvalue_std)
-            criterion = torch.nn.GaussianNLLLoss(reduction='none')
-            v_loss = criterion(newvalue_mean, batch.reward_block, newvalue_var)
-            v_loss = v_loss[:, :(horizon+3)]
-            mask_block = mask_block[:, :(horizon+3)]
-            v_loss = v_loss[mask_block.bool()].mean()
-        elif config.clip_vloss:
-            ret = batch.returns#.flatten()
-            newvalue = newvalue.view(ret.shape)
-            v_loss_unclipped = (newvalue - ret) ** 2
-            val = batch.values#.flatten()
-            v_clipped = val + torch.clamp(
-                newvalue - val,
-                -config.vf_clip_coef,
-                config.vf_clip_coef,
-            )
-            v_loss_clipped = (v_clipped - ret) ** 2
-            v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
-            v_loss = 0.5 * v_loss_max.mean()
-        else:
-            newvalue = newvalue.flatten()
-            v_loss = 0.5 * ((newvalue - ret) ** 2).mean()
-
-        entropy_loss = entropy.mean()
-        loss += pg_loss - config.ent_coef*entropy_loss + v_loss*config.vf_coef
-
-        # This breaks vloss clipping?
-        with torch.no_grad():
-            experience.values[batch.idx] = newvalue
+                old_approx_kl = (-logratio).mean()
+                approx_kl = ((ratio - 1) - logratio).mean()
+                clipfrac = ((ratio - 1.0).abs() > config['clip_coef']).float().mean()
+
+            # TODO: Do you need to do this? Policy hasn't changed
+            adv = advantages[idx]
+            torch.ops.pufferlib.compute_puff_advantage(mb_values, mb_rewards, mb_terminals,
+                ratio, adv, config['gamma'], config['gae_lambda'],
+                config['vtrace_rho_clip'], config['vtrace_c_clip'])
+            adv = mb_advantages
+            adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8) # TODO: Norm by full batch
+
+            # Losses
+            pg_loss1 = -adv * ratio
+            pg_loss2 = -adv * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef)
+            pg_loss = torch.max(pg_loss1, pg_loss2).mean()
+
+            newvalue = newvalue.view(mb_returns.shape)
+            v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip)
+            v_loss_unclipped = (newvalue - mb_returns) ** 2
+            v_loss_clipped = (v_clipped - mb_returns) ** 2
+            v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean()
+
+            entropy_loss = entropy.mean()
+
+            loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss
+            self.amp_context.__enter__() # TODO: Debug
+
+            # This breaks vloss clipping?
+            self.values[idx] = newvalue.detach().float()
+
+            # Logging
+            profile('train_misc', epoch)
+            losses['policy_loss'] += pg_loss.item() / self.total_minibatches
+            losses['value_loss'] += v_loss.item() / self.total_minibatches
+            losses['entropy'] += entropy_loss.item() / self.total_minibatches
+            losses['old_approx_kl'] += old_approx_kl.item() / self.total_minibatches
+            losses['approx_kl'] += approx_kl.item() / self.total_minibatches
+            losses['clipfrac'] += clipfrac.item() / self.total_minibatches
+            losses['importance'] += ratio.mean().item() / self.total_minibatches
+
+            # Learn on accumulated minibatches
+            profile('learn', epoch)
+            loss.backward()
+            if (mb + 1) % self.accumulate_minibatches == 0:
+                torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm'])
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+        # Reprioritize experience
+        profile('train_misc', epoch)
+        if config['anneal_lr']:
+            self.scheduler.step()
+
+        y_pred = self.values.flatten()
+        y_true = advantages.flatten() + self.values.flatten()
+        var_y = y_true.var()
+        explained_var = torch.nan if var_y == 0 else 1 - (y_true - y_pred).var() / var_y
+        losses['explained_variance'] = explained_var.item()
+
+        profile.end()
+        logs = None
+        self.epoch += 1
+        done_training = self.global_step >= config['total_timesteps']
+        if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25:
+            logs = self.mean_and_log()
+            self.losses = losses
+            self.print_dashboard()
+            self.last_stats = self.stats
+            self.stats = defaultdict(list)
+            self.last_log_time = time.time()
+            self.last_log_step = self.global_step
+            profile.clear()
+
+        if self.epoch % config['checkpoint_interval'] == 0 or done_training:
+            self.save_checkpoint()
+            self.msg = f'Checkpoint saved at update {self.epoch}'
 
-        profile('learn', epoch)
-        if data.scaler is not None:
-            loss = data.scaler.scale(loss)
+        return logs
 
-        loss.backward()
+    def mean_and_log(self):
+        config = self.config
+        for k in list(self.stats.keys()):
+            v = self.stats[k]
+            try:
+                v = np.mean(v)
+            except:
+                del self.stats[k]
+
+            self.stats[k] = v
+
+        device = config['device']
+        agent_steps = int(dist_sum(self.global_step, device))
+        logs = {
+            'SPS': dist_sum(self.sps, device),
+            'agent_steps': agent_steps,
+            'uptime': time.time() - self.start_time,
+            'epoch': int(dist_sum(self.epoch, device)),
+            'learning_rate': self.optimizer.param_groups[0]["lr"],
+            **{f'environment/{k}': dist_mean(v, device) for k, v in self.stats.items()},
+            **{f'losses/{k}': dist_mean(v, device) for k, v in self.losses.items()},
+            **{f'performance/{k}': dist_sum(v['elapsed'], device) for k, v in self.profile},
+        }
 
-        if data.scaler is not None:
-            data.scaler.unscale_(data.optimizer)
+        if torch.distributed.is_initialized() and torch.distributed.get_rank() != 0:
+            return logs
+        elif self.wandb:
+            self.wandb.log(logs)
+        elif self.neptune:
+            for k, v in logs.items():
+                self.neptune[k].append(v, step=agent_steps)
 
-        # TODO: Delete?
-        with torch.no_grad():
-            grads = torch.cat([p.grad.flatten() for p in data.policy.parameters()])
-            grad_var = grads.var(0).mean() * config.minibatch_size
-            data.msg = f'Gradient variance: {grad_var.item():.3f}'
+        return logs
 
-        if (mb + 1) % accumulate_minibatches == 0:
-            torch.nn.utils.clip_grad_norm_(data.policy.parameters(), config.max_grad_norm)
+    def close(self):
+        self.vecenv.close()
+        self.utilization.stop()
+        model_path = self.save_checkpoint()
+        path = os.path.join(self.config['data_dir'], f'{self.run_id}.pt')
+        shutil.copy(model_path, path)
+        if self.wandb:
+            artifact = self.wandb.Artifact(self.run_id, type='model')
+            artifact.add_file(path)
+            self.wandb.run.log_artifact(artifact)
+            self.wandb.finish()
+        elif self.neptune:
+            self.neptune['model'].track_files(path)
+            self.neptune.stop()
+
+    def save_checkpoint(self):
+        path = os.path.join(self.config['data_dir'], self.run_id)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        model_name = f'model_{self.epoch:06d}.pt'
+        model_path = os.path.join(path, model_name)
+        if os.path.exists(model_path):
+            return model_path
+
+        torch.save(self.uncompiled_policy.state_dict(), model_path)
+
+        state = {
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'global_step': self.global_step,
+            'agent_step': self.global_step,
+            'update': self.epoch,
+            'model_name': model_name,
+            'run_id': self.run_id,
+        }
+        state_path = os.path.join(path, 'trainer_state.pt')
+        torch.save(state, state_path + '.tmp')
+        os.rename(state_path + '.tmp', state_path)
+        return model_path
 
-            # TODO: Can remove scaler if only using bf16
-            if data.scaler is None:
-                data.optimizer.step()
-            else:
-                data.scaler.step(data.optimizer)
-                data.scaler.update()
+    def try_load_checkpoint(self):
+        config = self.config
+        path = os.path.join(config['data_dir'], self.run_id)
+        if not os.path.exists(path):
+            print('No checkpoints found. Assuming new experiment')
+            return
 
-            data.optimizer.zero_grad()
+        trainer_path = os.path.join(path, 'trainer_state.pt')
+        resume_state = torch.load(trainer_path, weights_only=False)
+        model_path = os.path.join(path, resume_state['model_name'])
+        self.policy.uncompiled.load_state_dict(
+            torch.load(model_path, weights_only=True), map_location=config['device'])
+        self.optimizer.load_state_dict(resume_state['optimizer_state_dict'])
+        print(f'Loaded checkpoint {resume_state["model_name"]}')
+
+    def print_dashboard(self, clear=False, idx=[0],
+            c1='[cyan]', c2='[white]', b1='[bright_cyan]', b2='[bright_white]'):
+        profile = self.profile
+        config = self.config
+        console = Console()
+        dashboard = Table(box=rich.box.ROUNDED, expand=True,
+            show_header=False, border_style='bright_cyan')
+        table = Table(box=None, expand=True, show_header=False)
+        dashboard.add_row(table)
+
+        table.add_column(justify="left", width=30)
+        table.add_column(justify="center", width=12)
+        table.add_column(justify="center", width=12)
+        table.add_column(justify="center", width=13)
+        table.add_column(justify="right", width=13)
+
+        table.add_row(
+            f'{b1}PufferLib {b2}2.0.0 {idx[0]*" "}:blowfish:',
+            f'{c1}CPU: {b2}{np.mean(self.utilization.cpu_util):.1f}{c2}%',
+            f'{c1}GPU: {b2}{np.mean(self.utilization.gpu_util):.1f}{c2}%',
+            f'{c1}DRAM: {b2}{np.mean(self.utilization.cpu_mem):.1f}{c2}%',
+            f'{c1}VRAM: {b2}{np.mean(self.utilization.gpu_mem):.1f}{c2}%',
+        )
+        idx[0] = (idx[0] - 1) % 10
+            
+        s = Table(box=None, expand=True)
+        sps = self.sps
+        remaining = 'A hair past a freckle'
+        if sps != 0:
+            remaining = duration((config['total_timesteps'] - self.global_step)/sps, b2, c2)
+
+        s.add_column(f"{c1}Summary", justify='left', vertical='top', width=10)
+        s.add_column(f"{c1}Value", justify='right', vertical='top', width=14)
+        s.add_row(f'{c2}Env', f'{b2}{config["env"]}')
+        s.add_row(f'{c2}Params', abbreviate(self.model_size, b2, c2))
+        s.add_row(f'{c2}Steps', abbreviate(self.global_step, b2, c2))
+        s.add_row(f'{c2}SPS', abbreviate(sps, b2, c2))
+        s.add_row(f'{c2}Epoch', f'{b2}{self.epoch}')
+        s.add_row(f'{c2}Uptime', duration(self.uptime, b2, c2))
+        s.add_row(f'{c2}Remaining', remaining)
+
+        delta = profile.eval['buffer'] + profile.train['buffer']
+        p = Table(box=None, expand=True, show_header=False)
+        p.add_column(f"{c1}Performance", justify="left", width=10)
+        p.add_column(f"{c1}Time", justify="right", width=8)
+        p.add_column(f"{c1}%", justify="right", width=4)
+        p.add_row(*fmt_perf('Evaluate', b1, delta, profile.eval, b2, c2))
+        p.add_row(*fmt_perf('  Forward', c2, delta, profile.eval_forward, b2, c2))
+        p.add_row(*fmt_perf('  Env', c2, delta, profile.env, b2, c2))
+        p.add_row(*fmt_perf('  Copy', c2, delta, profile.eval_copy, b2, c2))
+        p.add_row(*fmt_perf('  Misc', c2, delta, profile.eval_misc, b2, c2))
+        p.add_row(*fmt_perf('Train', b1, delta, profile.train, b2, c2))
+        p.add_row(*fmt_perf('  Forward', c2, delta, profile.train_forward, b2, c2))
+        p.add_row(*fmt_perf('  Learn', c2, delta, profile.learn, b2, c2))
+        p.add_row(*fmt_perf('  Copy', c2, delta, profile.train_copy, b2, c2))
+        p.add_row(*fmt_perf('  Misc', c2, delta, profile.train_misc, b2, c2))
+
+        l = Table(box=None, expand=True, )
+        l.add_column(f'{c1}Losses', justify="left", width=16)
+        l.add_column(f'{c1}Value', justify="right", width=8)
+        for metric, value in self.losses.items():
+            l.add_row(f'{c2}{metric}', f'{b2}{value:.3f}')
+
+        monitor = Table(box=None, expand=True, pad_edge=False)
+        monitor.add_row(s, p, l)
+        dashboard.add_row(monitor)
+
+        table = Table(box=None, expand=True, pad_edge=False)
+        dashboard.add_row(table)
+        left = Table(box=None, expand=True)
+        right = Table(box=None, expand=True)
+        table.add_row(left, right)
+        left.add_column(f"{c1}User Stats", justify="left", width=20)
+        left.add_column(f"{c1}Value", justify="right", width=10)
+        right.add_column(f"{c1}User Stats", justify="left", width=20)
+        right.add_column(f"{c1}Value", justify="right", width=10)
+        i = 0
+        for metric, value in (self.stats or self.last_stats).items():
+            try: # Discard non-numeric values
+                int(value)
+            except:
+                continue
 
-        profile('train_misc', epoch)
-        losses.policy_loss += pg_loss.item() / total_minibatches
-        losses.value_loss += v_loss.item() / total_minibatches
-        losses.entropy += entropy_loss.item() / total_minibatches
-        losses.old_approx_kl += old_approx_kl.item() / total_minibatches
-        losses.approx_kl += approx_kl.item() / total_minibatches
-        losses.clipfrac += clipfrac.item() / total_minibatches
-        losses.grad_var += grad_var.item() / total_minibatches
-        losses.importance += ratio.mean().item() / total_minibatches
-
-        if data.use_diayn:
-            losses.diayn_loss += diayn_loss.item() / total_minibatches
-
-        if config.target_kl is not None:
-            if approx_kl > config.target_kl:
+            u = left if i % 2 == 0 else right
+            u.add_row(f'{c2}{metric}', f'{b2}{value:.3f}')
+            i += 1
+            if i == 30:
                 break
 
-    # Reprioritize experience
-    profile('train_misc', epoch)
-    data.max_uses = data.ep_uses.max().item()
-    data.mean_uses = data.ep_uses.float().mean().item()
-    if config.replay_factor > 0:
-        advantages = torch.zeros(experience.values.shape, device=config.device).to(config.device)
-        vs = torch.zeros(experience.values.shape, device=config.device)
-        data.compute_puff_advantage(experience.values, experience.rewards, experience.dones,
-            experience.ratio, vs, advantages, config.gamma, config.gae_lambda, config.vtrace_rho_clip, config.vtrace_c_clip)
-
-        exp = sample(data, advantages, data.off_policy_rows, method='random')
-        for k, v in experience.items():
-            v[data.on_policy_rows:] = exp[k]
+        if clear:
+            console.clear()
 
-    experience.ratio[:data.on_policy_rows] = 1
+        with console.capture() as capture:
+            console.print(dashboard)
 
-    if config.anneal_lr:
-        data.scheduler.step()
+        print('\033[0;0H' + capture.get())
 
-    if config.use_p3o:
-        y_pred = experience.values_mean
-        y_true = experience.reward_block
-    else:
-        y_pred = experience.values.flatten()
-
-        # Probably not updated
-        y_true = advantages.flatten() + experience.values.flatten()
-
-    var_y = y_true.var()
-    explained_var = torch.nan if var_y == 0 else 1 - (y_true - y_pred).var() / var_y
-    #losses.explained_variance = explained_var.item()
-
-    profile.end()
-    profile.clear()
-    logs = None
-    data.epoch += 1
-    done_training = data.global_step >= config.total_timesteps
-    if done_training or data.global_step == 0 or time.time() - data.start_time - data.uptime > 1:
-        data.uptime = time.time() - data.start_time
-        logs = mean_and_log(data)
-        print_dashboard(data)
-        data.stats = defaultdict(list)
-
-    for k in losses:
-        losses[k] = 0
-
-    if data.epoch % config.checkpoint_interval == 0 or done_training:
-        save_checkpoint(data)
-        data.msg = f'Checkpoint saved at update {data.epoch}'
-
-    return logs
-
-def store(data, state, obs, value, action, logprob, reward, done, env_id, mask):
-    exp = data.experience
-
-    # Fast path for fully vectorized envs
-    if data.config.env_batch_size == 1:
-        l = data.ep_lengths[env_id.start].item()
-        batch_rows = slice(data.ep_indices[env_id.start].item(), 1+data.ep_indices[env_id.stop - 1].item())
-    else:
-        l = data.ep_lengths[env_id]
-        batch_rows = data.ep_indices[env_id]
-
-    exp.obs[batch_rows, l] = obs
-    exp.actions[batch_rows, l] = action
-    exp.logprobs[batch_rows, l] = logprob
-    exp.rewards[batch_rows, l] = reward
-    exp.dones[batch_rows, l] = done.float()
-
-    if data.use_p3o:
-        exp.values_mean[batch_rows, l] = value.mean
-        exp.values_std[batch_rows, l] = value.std
-    else:
-        exp.values[batch_rows, l] = value.flatten()
-        #exp.values[l, batch_rows] = value.flatten()
-
-    if data.use_diayn:
-        exp.diayn_batch[batch_rows, l] = state.diayn_z
-
-    # TODO: Handle masks!!
-    #indices = np.where(mask)[0]
-    #data.ep_lengths[env_id[mask]] += 1
-    data.ep_lengths[env_id] += 1
-    if data.config.env_batch_size == 1:
-        if l+1 >= data.config.bptt_horizon:
-            num_full = env_id.stop - env_id.start
-            data.ep_indices[env_id] = data.free_idx + torch.arange(num_full, device=data.device).int()
-            data.ep_lengths[env_id] = 0
-            data.free_idx += num_full
-            data.full_rows += num_full
-    else:
-        full = data.ep_lengths[env_id] >= data.config.bptt_horizon
-        num_full = full.sum()
-        if num_full > 0:
-            full_ids = env_id[full]
-            data.ep_indices[full_ids] = data.free_idx + torch.arange(num_full, device=data.device).int()
-            data.ep_lengths[full_ids] = 0
-            data.free_idx += num_full
-            data.full_rows += num_full
-
-    data.step += 1
-
-    return action.cpu().numpy()
-
-def sample(data, advantages, n, reward_block=None, mask_block=None, method='prio'):
-    exp = data.experience
-    if method == 'topk':
-        _, idx = torch.topk(advantages.abs().sum(axis=1), n)
-    elif method == 'prio':
-        adv = advantages.abs().sum(axis=1)
-        probs = adv**data.config.prio_alpha
-        probs = (probs + 1e-6)/(probs.sum() + 1e-6)
-        idx = torch.multinomial(probs, n)
-    elif method == 'multinomial':
-        idx = torch.multinomial(advantages.abs().sum(axis=1) + 1e-6, n)
-    elif method == 'random':
-        idx = torch.randint(0, advantages.shape[0], (n,), device=data.device)
-    else:
-        raise ValueError(f'Unknown sampling method: {method}')
-
-
-    data.ep_uses[idx] += 1
-    output = {k: v[idx] for k, v in exp.items()}
-    output['idx'] = idx
-
-    if data.use_p3o:
-        output['reward_block'] = reward_block[idx]
-        output['mask_block'] = mask_block[idx]
-        output['values_mean'] = exp.values_mean[idx]
-        output['values_std'] = exp.values_std[idx]
+def abbreviate(num, b2, c2):
+    if num < 1e3:
+        return str(num)
+    elif num < 1e6:
+        return f'{num/1e3:.1f}K'
+    elif num < 1e9:
+        return f'{num/1e6:.1f}M'
+    elif num < 1e12:
+        return f'{num/1e9:.1f}B'
     else:
-        output['values'] = exp.values[idx]
-        output['advantages'] = advantages[idx]
-        output['returns'] = advantages[idx] + exp.values[idx]
+        return f'{num/1e12:.2f}T'
 
-    if data.use_diayn:
-        output['diayn_z'] = exp.diayn_batch[idx]
-
-    output['prio'] = 1
-    if method == 'prio':
-        beta = data.config.prio_beta0 + (1 - data.config.prio_beta0)*data.config.prio_alpha*data.epoch/data.total_epochs
-        output['prio'] = (((1/len(probs)) * (1/probs[idx]))**beta).unsqueeze(1).expand_as(output['advantages'])
+def duration(seconds, b2, c2):
+    seconds = int(seconds)
+    h = seconds // 3600
+    m = (seconds % 3600) // 60
+    s = seconds % 60
+    return f"{b2}{h}{c2}h {b2}{m}{c2}m {b2}{s}{c2}s" if h else f"{b2}{m}{c2}m {b2}{s}{c2}s" if m else f"{b2}{s}{c2}s"
 
-    return pufferlib.namespace(**output)
+def fmt_perf(name, color, delta_ref, prof, b2, c2):
+    percent = 0 if delta_ref == 0 else int(100*prof['buffer']/delta_ref - 1e-5)
+    return f'{color}{name}', duration(prof['elapsed'], b2, c2), f'{b2}{percent:2d}{c2}%'
 
 def dist_sum(value, device):
-    if not dist.is_initialized():
+    if not torch.distributed.is_initialized():
         return value
 
     tensor = torch.tensor(value, device=device)
-    dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
+    torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM)
     return tensor.item()
 
 def dist_mean(value, device):
-    if not dist.is_initialized():
+    if not torch.distributed.is_initialized():
         return value
 
-    return dist_sum(value, device) / dist.get_world_size()
-
-def mean_and_log(data):
-    for k in list(data.stats.keys()):
-        v = data.stats[k]
-        try:
-            v = np.mean(v)
-        except:
-            del data.stats[k]
-
-        data.stats[k] = v
-
-    device = data.config.device
-
-    agent_steps = int(dist_sum(data.global_step, device))
-    logs = {
-        #'SPS': dist_sum(data.profile.SPS, device),
-        'agent_steps': agent_steps,
-        'epoch': int(dist_sum(data.epoch, device)),
-        'learning_rate': data.optimizer.param_groups[0]["lr"],
-        'max_uses': data.max_uses,
-        'mean_uses': data.mean_uses,
-        **{f'environment/{k}': dist_mean(v, device) for k, v in data.stats.items()},
-        **{f'losses/{k}': dist_mean(v, device) for k, v in data.losses.items()},
-        #**{f'performance/{k}': dist_sum(v, device) for k, v in data.profile},
-    }
-
-    if dist.is_initialized() and dist.get_rank() != 0:
-        return logs
-
-    if data.wandb is not None:
-        data.last_log_time = time.time()
-        data.wandb.log(logs)
-    elif data.neptune is not None:
-        data.last_log_time = time.time()
-        for k, v in logs.items():
-            data.neptune[k].append(v, step=agent_steps)
-
-    return logs
-
-def close(data):
-    data.vecenv.close()
-    data.utilization.stop()
-    config = data.config
-    if data.wandb is not None:
-        artifact_name = f"{config.exp_id}_model"
-        artifact = data.wandb.Artifact(artifact_name, type="model")
-        model_path = save_checkpoint(data)
-        artifact.add_file(model_path)
-        data.wandb.run.log_artifact(artifact)
-        data.wandb.finish()
-    elif data.neptune is not None:
-        data.neptune.stop()
-
-def save_checkpoint(data):
-    config = data.config
-    path = os.path.join(config.data_dir, config.exp_id)
-    if not os.path.exists(path):
-        os.makedirs(path)
-
-    model_name = f'model_{data.epoch:06d}.pt'
-    model_path = os.path.join(path, model_name)
-    if os.path.exists(model_path):
-        return model_path
-
-    torch.save(data.uncompiled_policy.state_dict(), model_path)
-
-    state = {
-        'optimizer_state_dict': data.optimizer.state_dict(),
-        'global_step': data.global_step,
-        'agent_step': data.global_step,
-        'update': data.epoch,
-        'model_name': model_name,
-        'exp_id': config.exp_id,
-    }
-    state_path = os.path.join(path, 'trainer_state.pt')
-    torch.save(state, state_path + '.tmp')
-    os.rename(state_path + '.tmp', state_path)
-    return model_path
-
-def try_load_checkpoint(data):
-    config = data.config
-    path = os.path.join(config.data_dir, config.exp_id)
-    if not os.path.exists(path):
-        print('No checkpoints found. Assuming new experiment')
-        return
-
-    trainer_path = os.path.join(path, 'trainer_state.pt')
-    resume_state = torch.load(trainer_path, weights_only=False)
-    model_path = os.path.join(path, resume_state['model_name'])
-    data.policy.uncompiled.load_state_dict(
-        torch.load(model_path, weights_only=True), map_location=config.device)
-    data.optimizer.load_state_dict(resume_state['optimizer_state_dict'])
-    print(f'Loaded checkpoint {resume_state["model_name"]}')
-
-def count_params(policy):
-    return sum(p.numel() for p in policy.parameters() if p.requires_grad)
-
-def rollout(env_creator, env_kwargs, policy_cls, rnn_cls, agent_creator, agent_kwargs,
-        backend, render_mode='auto', model_path=None, device='cuda'):
-
-    if render_mode != 'auto':
-        env_kwargs['render_mode'] = render_mode
-
-    # We are just using Serial vecenv to give a consistent
-    # single-agent/multi-agent API for evaluation
-    env = pufferlib.vector.make(env_creator, env_kwargs=env_kwargs, backend=backend)
-
-    agent = agent_creator(env, policy_cls, rnn_cls, agent_kwargs).to(device)
-    if model_path is not None:
-        agent.load_state_dict(torch.load(model_path, map_location=device, weights_only=False))
-
-    ob, info = env.reset()
-    driver = env.driver_env
-    os.system('clear')
-
-    state = pufferlib.namespace(
-        lstm_h=None,
-        lstm_c=None,
-        diayn_z=torch.arange(env.num_agents, dtype=torch.long, device=device) % 4
-    )
-
-    num_agents = env.observation_space.shape[0]
-    if hasattr(agent, 'recurrent'):
-        shape = (num_agents, agent.hidden_size)
-        state.lstm_h = torch.zeros(shape).to(device)
-        state.lstm_c = torch.zeros(shape).to(device)
-
-    frames = []
-    tick = 0
-    value = [0]
-    intrinsic = [0]
-    intrinsic_mean = None
-    intrinsic_std = None
-    while tick <= 200000:
-        if tick > 1000 and tick % 1 == 0:
-            #render = driver.render(overlay=float(intrinsic[0]))
-            render = driver.render()
-            if driver.render_mode == 'ansi':
-                print('\033[0;0H' + render + '\n')
-                time.sleep(0.05)
-            elif driver.render_mode == 'rgb_array':
-                frames.append(render)
-                import cv2
-                render = cv2.cvtColor(render, cv2.COLOR_RGB2BGR)
-                cv2.imshow('frame', render)
-                cv2.waitKey(1)
-                time.sleep(1/24)
-            elif driver.render_mode in ('human', 'raylib') and render is not None:
-                frames.append(render)
-
-        with torch.no_grad():
-            ob = torch.as_tensor(ob).to(device)
-            logits, value = agent(ob, state)
-            action, logprob, _ = pufferlib.pytorch.sample_logits(logits, is_continuous=agent.is_continuous)
-            action = action.cpu().numpy().reshape(env.action_space.shape)
-
-        ob, reward = env.step(action)[:2]
-        reward = reward.mean()
-        if tick % 128 == 0:
-            print(f'Reward: {reward:.4f}, Tick: {tick}')
-        tick += 1
-
-    # Save frames as gif
-    if frames:
-        import imageio
-        os.makedirs('../docker', exist_ok=True) or imageio.mimsave('../docker/eval.gif', frames, fps=15, loop=0)
+    return dist_sum(value, device) / torch.distributed.get_world_size()
 
 class Profile:
-    def __init__(self, keys, frequency=1):
-        self.stack = []
+    def __init__(self, frequency=5):
+        self.profiles = defaultdict(lambda: defaultdict(float))
         self.frequency = frequency
-        self.profiles = {k:
-            pufferlib.namespace(
-                start = 0,
-                buffer = 0,
-                delta = 0,
-                elapsed = 0,
-                calls = 0,
-            ) for k in keys
-        }
+        self.stack = []
+
+    def __iter__(self):
+        return iter(self.profiles.items())
 
     def __getattr__(self, name):
         return self.profiles[name]
@@ -890,33 +689,29 @@ def __call__(self, name, epoch, nest=False):
 
         torch.cuda.synchronize()
         tick = time.time()
-
         if len(self.stack) != 0 and not nest:
             self.pop(tick)
 
         self.stack.append(name)
-        self.profiles[name].start = tick
+        self.profiles[name]['start'] = tick
 
     def pop(self, end):
         profile = self.profiles[self.stack.pop()]
-        delta = end - profile.start
-        profile.buffer += delta
-        profile.elapsed += delta
-        profile.calls += 1
+        delta = end - profile['start']
+        profile['elapsed'] += delta
+        profile['delta'] += delta
 
     def end(self):
         torch.cuda.synchronize()
         end = time.time()
-
         for i in range(len(self.stack)):
             self.pop(end)
 
     def clear(self):
-        for v in self.profiles.values():
-            if v.buffer != 0:
-                v.delta = v.buffer
-
-            v.buffer = 0
+        for prof in self.profiles.values():
+            if prof['delta'] > 0:
+                prof['buffer'] = prof['delta']
+                prof['delta'] = 0
 
 class Utilization(Thread):
     def __init__(self, delay=1, maxlen=20):
@@ -925,9 +720,8 @@ def __init__(self, delay=1, maxlen=20):
         self.cpu_util = deque(maxlen=maxlen)
         self.gpu_util = deque(maxlen=maxlen)
         self.gpu_mem = deque(maxlen=maxlen)
-
-        self.delay = delay
         self.stopped = False
+        self.delay = delay
         self.start()
 
     def run(self):
@@ -942,159 +736,301 @@ def run(self):
             else:
                 self.gpu_util.append(0)
                 self.gpu_mem.append(0)
+
             time.sleep(self.delay)
 
     def stop(self):
         self.stopped = True
 
-ROUND_OPEN = rich.box.Box(
-    "╭──╮\n"
-    "│  │\n"
-    "│  │\n"
-    "│  │\n"
-    "│  │\n"
-    "│  │\n"
-    "│  │\n"
-    "╰──╯\n"
-)
-
-c1 = '[cyan]'
-c2 = '[white]'
-b1 = '[bright_cyan]'
-b2 = '[bright_white]'
-
-def abbreviate(num):
-    if num < 1e3:
-        return f'{b2}{num:.0f}'
-    elif num < 1e6:
-        return f'{b2}{num/1e3:.1f}{c2}k'
-    elif num < 1e9:
-        return f'{b2}{num/1e6:.1f}{c2}m'
-    elif num < 1e12:
-        return f'{b2}{num/1e9:.1f}{c2}b'
-    else:
-        return f'{b2}{num/1e12:.1f}{c2}t'
-
-def duration(seconds):
-    seconds = int(seconds)
-    h = seconds // 3600
-    m = (seconds % 3600) // 60
-    s = seconds % 60
-    return f"{b2}{h}{c2}h {b2}{m}{c2}m {b2}{s}{c2}s" if h else f"{b2}{m}{c2}m {b2}{s}{c2}s" if m else f"{b2}{s}{c2}s"
-
-def fmt_perf(name, color, delta_ref, prof):
-    percent = 0 if delta_ref == 0 else int(100*prof.delta/delta_ref - 1e-5)
-    return f'{color}{name}', duration(prof.elapsed), f'{b2}{percent:2d}{c2}%'
-
-# TODO: Add env name to print_dashboard
-def print_dashboard(data, clear=False, max_stats=[0]):
-    utilization = data.utilization
-    profile = data.profile
-    config = data.config
-    console = Console()
-    if clear:
-        console.clear()
-
-    dashboard = Table(box=ROUND_OPEN, expand=True,
-        show_header=False, border_style='bright_cyan')
-
-    table = Table(box=None, expand=True, show_header=False)
-    dashboard.add_row(table)
-    cpu_percent = np.mean(utilization.cpu_util)
-    dram_percent = np.mean(utilization.cpu_mem)
-    gpu_percent = np.mean(utilization.gpu_util)
-    vram_percent = np.mean(utilization.gpu_mem)
-    table.add_column(justify="left", width=30)
-    table.add_column(justify="center", width=12)
-    table.add_column(justify="center", width=12)
-    table.add_column(justify="center", width=13)
-    table.add_column(justify="right", width=13)
-    table.add_row(
-        f':blowfish: {b1}PufferLib {b2}2.0.0',
-        f'{c1}CPU: {b2}{cpu_percent:.1f}{c2}%',
-        f'{c1}GPU: {b2}{gpu_percent:.1f}{c2}%',
-        f'{c1}DRAM: {b2}{dram_percent:.1f}{c2}%',
-        f'{c1}VRAM: {b2}{vram_percent:.1f}{c2}%',
+def init_wandb(args, id=None, resume=True, tag=None):
+    import wandb
+    wandb.init(
+        id=id or wandb.util.generate_id(),
+        project=args['wandb_project'],
+        group=args['wandb_group'],
+        allow_val_change=True,
+        save_code=False,
+        resume=resume,
+        config=args,
+        tags=[tag] if tag is not None else [],
     )
-        
-    s = Table(box=None, expand=True)
-    SPS = 0
-    delta = profile.eval.delta + profile.train.delta
-    remaining = 'A hair past a freckle'
-    if delta != 0:
-        SPS = config.batch_size/delta
-        remaining = duration((config.total_timesteps - data.global_step)/SPS)
-
-    uptime = time.time() - data.start_time
-    s.add_column(f"{c1}Summary", justify='left', vertical='top', width=10)
-    s.add_column(f"{c1}Value", justify='right', vertical='top', width=14)
-    s.add_row(f'{c2}Env', f'{b2}{config.env}')
-    s.add_row(f'{c2}Steps', abbreviate(data.global_step))
-    s.add_row(f'{c2}SPS', abbreviate(SPS))
-    s.add_row(f'{c2}Epoch', abbreviate(data.epoch))
-    s.add_row(f'{c2}Uptime', duration(uptime))
-    s.add_row(f'{c2}Remaining', remaining)
-
-    p = Table(box=None, expand=True, show_header=False)
-    p.add_column(f"{c1}Performance", justify="left", width=10)
-    p.add_column(f"{c1}Time", justify="right", width=8)
-    p.add_column(f"{c1}%", justify="right", width=4)
-    p.add_row(*fmt_perf('Evaluate', b1, delta, profile.eval))
-    p.add_row(*fmt_perf('  Forward', c2, delta, profile.eval_forward))
-    p.add_row(*fmt_perf('  Env', c2, delta, profile.env))
-    p.add_row(*fmt_perf('  Copy', c2, delta, profile.eval_copy))
-    p.add_row(*fmt_perf('  Misc', c2, delta, profile.eval_misc))
-    p.add_row(*fmt_perf('Train', b1, delta, profile.train))
-    p.add_row(*fmt_perf('  Forward', c2, delta, profile.train_forward))
-    p.add_row(*fmt_perf('  Learn', c2, delta, profile.learn))
-    p.add_row(*fmt_perf('  Copy', c2, delta, profile.train_copy))
-    p.add_row(*fmt_perf('  Misc', c2, delta, profile.train_misc))
-    if 'custom' in profile.profiles:
-        p.add_row(*fmt_perf('  Custom', c2, uptime, profile.custom))
-
-    l = Table(box=None, expand=True, )
-    l.add_column(f'{c1}Losses', justify="left", width=16)
-    l.add_column(f'{c1}Value', justify="right", width=8)
-    for metric, value in data.losses.items():
-        l.add_row(f'{c2}{metric}', f'{b2}{value:.3f}')
-
-    monitor = Table(box=None, expand=True, pad_edge=False)
-    monitor.add_row(s, p, l)
-    dashboard.add_row(monitor)
-
-    table = Table(box=None, expand=True, pad_edge=False)
-    dashboard.add_row(table)
-    left = Table(box=None, expand=True)
-    right = Table(box=None, expand=True)
-    table.add_row(left, right)
-    left.add_column(f"{c1}User Stats", justify="left", width=20)
-    left.add_column(f"{c1}Value", justify="right", width=10)
-    right.add_column(f"{c1}User Stats", justify="left", width=20)
-    right.add_column(f"{c1}Value", justify="right", width=10)
+    return wandb
+
+def init_neptune(args, id=None, resume=True, tag=None, mode="async"):
+    import neptune
+    import neptune.exceptions
+    try:
+        neptune_name = args['neptune_name']
+        neptune_project = args['neptune_project']
+        run = neptune.init_run(
+            project=f"{neptune_name}/{neptune_project}",
+            capture_hardware_metrics=False,
+            capture_stdout=False,
+            capture_stderr=False,
+            capture_traceback=False,
+            tags=[tag] if tag is not None else [],
+            mode=mode,
+        )
+    except neptune.exceptions.NeptuneConnectionLostException:
+        print("couldn't connect to neptune, logging in offline mode")
+        return init_neptune(args, id, resume, tag, mode="offline")
+    return run
+
+# TODO:  Do we need this?
+def make_policy(env, policy_cls, rnn_cls, args):
+    policy = policy_cls(env, **args['policy'])
+    if rnn_cls is not None:
+        policy = rnn_cls(env, policy, **args['rnn'])
+
+    return policy.to(args['train']['device'])
+
+# TODO: Is there a simpler interp
+def downsample_linear(arr, m):
+    n = len(arr)
+    x_old = np.linspace(0, 1, n)  # Original indices normalized
+    x_new = np.linspace(0, 1, m)  # New indices normalized
+    return np.interp(x_new, x_old, arr)
+
+# TODO: All logs?
+def experiment(vecenv, policy, args):
+    train_config = dict(**args['train'], env=env_name, tag=args['tag'])
+    pufferl = CleanPuffeRL(train_config, vecenv, policy, neptune=args['neptune'], wandb=args['wandb'])
+
+    all_logs = []
+    while pufferl.global_step < train_config['total_timesteps']:
+        pufferl.evaluate()
+        logs = pufferl.train()
+        if logs is not None:
+            all_logs.append(logs)
+
+    vecenv.async_reset(train_config['seed'])
     i = 0
-    for metric, value in data.stats.items():
-        try: # Discard non-numeric values
-            int(value)
+    stats = {}
+    while i < 10 and not stats:
+        stats = pufferl.evaluate()
+        i += 1
+
+    logs = pufferl.mean_and_log()
+    if logs is not None:
+        all_logs.append(logs)
+
+    pufferl.close()
+    return all_logs
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=f':blowfish: PufferLib [bright_cyan]{pufferlib.__version__}[/]'
+        ' demo options. Shows valid args for your env and policy',
+        formatter_class=RichHelpFormatter, add_help=False)
+    parser.add_argument('--env', '--environment', type=str,
+        default='puffer_squared', help='Name of specific environment to run')
+    parser.add_argument('--mode', type=str, default='train',
+        choices='train eval sweep autotune profile'.split())
+    parser.add_argument('--load-model-path', type=str, default=None,
+        help='Path to a pretrained checkpoint')
+    parser.add_argument('--load-id', type=str,
+        default=None, help='Kickstart/eval from from a finished Wandb/Neptune run')
+    parser.add_argument('--render-mode', type=str, default='auto',
+        choices=['auto', 'human', 'ansi', 'rgb_array', 'raylib', 'None'])
+    parser.add_argument('--save-frames', type=int, default=0)
+    parser.add_argument('--gif-path', type=str, default='eval.gif')
+    parser.add_argument('--fps', type=float, default=15)
+    parser.add_argument('--max-runs', type=int, default=200, help='Max number of sweep runs')
+    parser.add_argument('--wandb', action='store_true', help='Use wandb for logging')
+    parser.add_argument('--wandb-project', type=str, default='pufferlib')
+    parser.add_argument('--wandb-group', type=str, default='debug')
+    parser.add_argument('--neptune', action='store_true', help='Use neptune for logging')
+    parser.add_argument('--neptune-name', type=str, default='pufferai')
+    parser.add_argument('--neptune-project', type=str, default='ablations')
+    parser.add_argument('--local-rank', type=int, default=0, help='Used by torchrun for DDP')
+    parser.add_argument('--tag', type=str, default=None, help='Tag for experiment')
+    args = parser.parse_known_args()[0]
+
+    # Load defaults and config
+    for path in glob.glob('config/**/*.ini', recursive=True):
+        p = configparser.ConfigParser()
+        p.read(['config/default.ini', path])
+        if args.env in p['base']['env_name'].split(): break
+    else:
+        raise pufferlib.APIUsageError('No config for env_name {}'.format(args.env))
+
+    # Dynamic help menu from config
+    for section in p.sections():
+        for key in p[section]:
+            try:
+                value = ast.literal_eval(p[section][key])
+            except:
+                value = p[section][key]
+
+            fmt = f'--{key}' if section == 'base' else f'--{section}.{key}'
+            parser.add_argument(fmt.replace('_', '-'), default=value)
+
+    parser.add_argument('-h', '--help', default=argparse.SUPPRESS,
+        action='help', help='Show this help message and exit')
+
+    # Unpack to nested dict
+    parsed = vars(parser.parse_args())
+    env_name = parsed.pop('env')
+    args = defaultdict(dict)
+    for key, value in parsed.items():
+        next = args
+        for subkey in key.split('.'):
+            prev = next
+            next = next.setdefault(subkey, {})
+
+        prev[subkey] = value
+
+    # Dynamically import environment and policy
+    import importlib
+    package = args['package']
+    module_name = 'pufferlib.ocean' if package == 'ocean' else f'pufferlib.environments.{package}'
+    env_module = importlib.import_module(module_name)
+    make_env = env_module.env_creator(env_name)
+    policy_cls = getattr(env_module.torch, args['policy_name'])
+    rnn_name = args['rnn_name']
+    rnn_cls = None
+    if rnn_name is not None:
+        rnn_cls = getattr(env_module.torch, args['rnn_name'])
+
+    # Aggressively exit on ctrl+c
+    import signal
+    signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0))
+
+    # Assume TorchRun DDP is used if LOCAL_RANK is set
+    if 'LOCAL_RANK' in os.environ:
+        torch.distributed.init_process_group(backend='nccl', rank=0, world_size=1)
+
+    if args['mode'] == 'autotune':
+        pufferlib.vector.autotune(make_env, batch_size=args['train']['env_batch_size'])
+        exit(0)
+
+    args['train']['use_rnn'] = rnn_cls is not None
+    env_name = args['env_name']
+    device = args['train']['device']
+
+    if args['mode'] == 'sweep':
+        if not args['wandb'] and not args['neptune']:
+            raise pufferlib.APIUsageError('Sweeps require either wandb or neptune')
+
+        method = args['sweep'].pop('method')
+        try:
+            sweep_cls = getattr(pufferlib.sweep, method)
         except:
-            continue
+            raise pufferlib.APIUsageError(f'Invalid sweep method {method}. See pufferlib.sweep')
 
-        u = left if i % 2 == 0 else right
-        u.add_row(f'{c2}{metric}', f'{b2}{value:.3f}')
-        i += 1
-        if i == 30:
-            break
+        sweep = sweep_cls(args['sweep'])
+        target_key = f'environment/{args["sweep"]["metric"]}'
+        total_timesteps = args['train']['total_timesteps']
+        for i in range(args['max_runs']):
+            seed = time.time_ns() & 0xFFFFFFFF
+            random.seed(seed)
+            np.random.seed(seed)
+            torch.manual_seed(seed)
+            sweep.suggest(args)
+
+            vecenv = pufferlib.vector.make(make_env, env_kwargs=args['env'], **args['vec'])
+            policy = make_policy(vecenv.driver_env, policy_cls, rnn_cls, args)
+            all_logs = experiment(vecenv, policy, args)
+
+            scores = downsample_linear([log[target_key] for log in all_logs], 10)
+            costs = downsample_linear([log['uptime'] for log in all_logs], 10)
+            timesteps = downsample_linear([log['agent_steps'] for log in all_logs], 10)
 
-    for i in range(max_stats[0] - i):
-        u = left if i % 2 == 0 else right
-        u.add_row('', '')
+            for score, cost, timestep in zip(scores, costs, timesteps):
+                args['train']['total_timesteps'] = timestep
+                sweep.observe(args, score, cost)
 
-    max_stats[0] = max(max_stats[0], i)
+            # Prevent logging final eval steps as training steps
+            args['train']['total_timesteps'] = total_timesteps
 
-    table = Table(box=None, expand=True, pad_edge=False)
-    dashboard.add_row(table)
-    table.add_row(f' {c1}Message: {c2}{data.msg}')
+        exit(0)
+
+    if args['mode'] == 'eval':
+        args['vec'] = dict(backend='Serial', num_envs=1)
+        
+    vecenv = pufferlib.vector.make(make_env, env_kwargs=args['env'], **args['vec'])
+    policy = make_policy(vecenv.driver_env, policy_cls, rnn_cls, args)
+
+    load_id = args['load_id']
+    if load_id is not None:
+        if args['mode'] not in ('train', 'eval'):
+            raise pufferlib.APIUsageError('load_id requires mode to be train or eval')
+
+        if args['neptune']:
+            import neptune
+            neptune_name = args['neptune_name']
+            neptune_project = args['neptune_project']
+            run = neptune.init_run(
+                project=f"{neptune_name}/{neptune_project}",
+                with_id=load_id, mode="read-only")
+            data_dir = 'artifacts'
+            run["model"].download(destination=data_dir)
+        elif args['wandb']:
+            run = init_wandb(args, load_id, resume='must')
+            artifact = run.use_artifact(f'{load_id}:latest')
+            data_dir = artifact.download()
+            model_file = max(os.listdir(data_dir))
+        else:
+            raise pufferlib.APIUsageError('No run id provided for eval')
+
+        policy.load_state_dict(torch.load(f'{data_dir}/{load_id}.pt', map_location=device))
+
+    if args['load_model_path'] is not None:
+        policy.load_state_dict(torch.load(
+            args['load_model_path'], map_location=args['train']['device']))
+
+    if args['mode'] == 'train':
+        experiment(vecenv, policy, args)
+    elif args['mode'] == 'eval':
+        ob, info = vecenv.reset()
+        driver = vecenv.driver_env
+        num_agents = vecenv.observation_space.shape[0]
+
+        state = {}
+        if args['train']['use_rnn']:
+            state = dict(
+                lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device),
+                lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device),
+            )
+
+        frames = []
+        while True:
+            render = driver.render()
+            if len(frames) < args['save_frames']:
+                frames.append(render)
 
-    with console.capture() as capture:
-        console.print(dashboard)
+            # TODO: Frames from raylib
+            if driver.render_mode == 'ansi':
+                print('\033[0;0H' + render + '\n')
+                time.sleep(1/args['fps'])
+            elif driver.render_mode == 'rgb_array':
+                import cv2
+                render = cv2.cvtColor(render, cv2.COLOR_RGB2BGR)
+                cv2.imshow('frame', render)
+                cv2.waitKey(1)
+                time.sleep(1/args['fps'])
 
-    print('\033[0;0H' + capture.get())
+            with torch.no_grad():
+                ob = torch.as_tensor(ob).to(args['train']['device'])
+                logits, value = policy(ob, state)
+                action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
+                action = action.cpu().numpy().reshape(vecenv.action_space.shape)
+
+            ob = vecenv.step(action)[0]
+
+            if len(frames) > 0 and len(frames) == args['save_frames']:
+                import imageio
+                imageio.mimsave(args['gif_path'], frames, fps=args['fps'], loop=0)
+                frames.append('Done')
+    elif args['mode'] == 'profile':
+        import torch
+        import torchvision.models as models
+        from torch.profiler import profile, record_function, ProfilerActivity
+        with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
+            with record_function("model_inference"):
+                for _ in range(10):
+                    stats = pufferl.evaluate()
+                    pufferl.train()
+
+        print(prof.key_averages().table(sort_by='cuda_time_total', row_limit=10))
+        prof.export_chrome_trace("trace.json")
diff --git a/config/default.ini b/config/default.ini
index aa8efdc02c..2816695409 100644
--- a/config/default.ini
+++ b/config/default.ini
@@ -1,26 +1,33 @@
 [base]
 package = None
 env_name = None
-vec = native
 policy_name = Policy
 rnn_name = None
 max_suggestion_cost = 3600
 
-[workspace]
-name = pufferai 
-project = ablations
+[vec]
+backend = Multiprocessing
+num_envs = 2
+num_workers = auto
+batch_size = auto
+zero_copy = True
+seed = 42
 
 [env]
 [policy]
 [rnn]
 
 [train]
-seed = 0
+name = pufferai 
+project = ablations
+run_id = None
+run_tag = None
+
+seed = 42
 torch_deterministic = True
 cpu_offload = False
 device = cuda
 optimizer = muon
-scheduler = cosine
 anneal_lr = True
 precision = float32
 total_timesteps = 10_000_000
@@ -28,78 +35,47 @@ learning_rate = 0.025
 gamma = 0.995
 gae_lambda = 0.85
 update_epochs = 1
-norm_adv = True
 # Consider raising clip coef to 0.2
 clip_coef = 0.1
-clip_vloss = True
 vf_coef = 2.0
 vf_clip_coef = 0.1
 max_grad_norm = 0.5
 ent_coef = 0.01
-target_kl = None
 adam_beta1 = 0.9
 adam_beta2 = 0.999
 adam_eps = 1e-12
 
-num_envs = 2
-num_workers = 2
-env_batch_size = 1
-zero_copy = True
 data_dir = experiments
 checkpoint_interval = 200
-batch_size = 524288
+batch_size = auto
 minibatch_size = 8192
-replay_factor = 0.0
+
 # Accumulate gradients above this size
 max_minibatch_size = 32768
 bptt_horizon = 64
 compile = False
-compile_mode = reduce-overhead
+compile_mode = max-autotune-no-cudagraphs
 compile_fullgraph = True
 
-use_diayn = False
-diayn_archive = 256
-diayn_loss_coef = 0.000
-diayn_coef = 0.0
-
-use_p3o = False
-p3o_horizon = 128
-puf = 0.0
-
-use_vtrace = False
 vtrace_rho_clip = 1.0
 vtrace_c_clip = 1.0
 
-use_puff_advantage = True
-
 prio_alpha = 0.6
 prio_beta0 = 0.4
 
 [sweep]
-method = protein 
-name = sweep
-
-[sweep.metric]
+method = Protein 
+metric = score
 goal = maximize
-name = score 
-min = 0
-max = 1
 
-[sweep.env.num_envs]
+[sweep.vec.num_envs]
 distribution = uniform_pow2
-min = 64
-max = 4096
-mean = 1024
+min = 1
+max = 8
+mean = 2
 scale = auto
-#scale = 0.5
-
-#[sweep.policy.hidden_size]
-#distribution = uniform_pow2
-#min = 32
-#max = 1024
-#mean = 128
-#scale = auto
 
+# TODO: Elim from base
 [sweep.train.total_timesteps]
 distribution = log_normal
 min = 5e7
@@ -107,18 +83,18 @@ max = 1e10
 mean = 1e8
 scale = time
 
-[sweep.train.batch_size]
-distribution = uniform_pow2
-min = 32768
-max = 1048576
-mean = 262144
+[sweep.train.bptt_horizon]
+distribution = int_uniform
+min = 16
+max = 64
+mean = 64
 scale = auto
 
 [sweep.train.minibatch_size]
 distribution = uniform_pow2
-min = 1024
-max = 32768
-mean = 8192
+min = 8192
+max = 131072
+mean = 32768
 scale = auto
 
 [sweep.train.learning_rate]
@@ -141,7 +117,6 @@ min = 0.8
 mean = 0.98
 max = 0.9999
 scale = auto
-#scale = 0.5
 
 [sweep.train.gae_lambda]
 distribution = logit_normal
@@ -149,7 +124,6 @@ min = 0.6
 mean = 0.95
 max = 0.995
 scale = auto
-#scale = 0.5
 
 [sweep.train.update_epochs]
 distribution = int_uniform
@@ -158,6 +132,20 @@ max = 4
 mean = 1
 scale = 1.0
 
+[sweep.train.clip_coef]
+distribution = uniform
+min = 0.01
+max = 1.0
+mean = 0.1
+scale = auto
+
+[sweep.train.vf_clip_coef]
+distribution = uniform
+min = 0.01
+max = 5.0
+mean = 0.1
+scale = auto
+
 [sweep.train.vf_coef]
 distribution = uniform
 min = 0.0
@@ -172,20 +160,6 @@ mean = 1.0
 max = 5.0
 scale = auto
 
-[sweep.train.bptt_horizon]
-distribution = uniform_pow2
-min = 4
-max = 128
-mean = 16
-scale = auto
-
-#[sweep.train.puf]
-#distribution = logit_normal
-#min = 0.01
-#mean = 0.5
-#max = 0.99
-#scale = auto
-
 [sweep.train.adam_beta1]
 distribution = logit_normal
 min = 0.5
@@ -201,36 +175,22 @@ max = 0.99999
 scale = auto
 
 [sweep.train.adam_eps]
-distribution = uniform
-min = 0.00000000000001
-mean = 0.00000001
-max = 0.001
+distribution = log_normal
+min = 1e-14
+mean = 1e-8
+max = 1e-4
 scale = auto
 
-#[sweep.train.horizon]
-#distribution = uniform_pow2
-#min = 4
-#max = 128
-#mean = 32
-#scale = 0.25
-
-#[sweep.train.diayn_archive]
-#distribution = uniform_pow2
-#min = 2
-#max = 64
-#mean = 8
-#scale = auto
-
-#[sweep.train.diayn_loss_coef]
-#distribution = uniform
-#min = 0.0
-#max = 2.0
-#mean = 1.0
-#scale = auto
-
-#[sweep.train.diayn_coef]
-#distribution = log_normal
-#min = 0.0001
-#mean = 0.1
-#max = 0.99
-#scale = auto
+[sweep.train.prio_alpha]
+distribution = logit_normal
+min = 0.1
+mean = 0.6
+max = 0.99
+scale = auto
+
+[sweep.train.prio_beta0]
+distribution = logit_normal
+min = 0.1
+mean = 0.4
+max = 0.99
+scale = auto
diff --git a/config/metta.ini b/config/metta.ini
index df284bf76f..9bfcbd20ce 100644
--- a/config/metta.ini
+++ b/config/metta.ini
@@ -3,17 +3,17 @@ package = metta
 env_name = metta 
 policy_name = Policy
 rnn_name = Recurrent
-vec = multiprocessing
+
+[vec]
+num_envs = 128
+num_workers = 16
+batch_size = 64
 
 [env]
 render_mode = auto
-#num_envs = 128 
 
 [train]
-total_timesteps = 5_000_000_000
-num_envs = 128
-num_workers = 16
-env_batch_size = 64
+total_timesteps = 100_000_000
 learning_rate = 0.0013848535655657842
 gamma = 0.9959746852829785
 gae_lambda = 0.9283720217357007
@@ -45,17 +45,6 @@ adam_eps = 0.000249501214984291
 #minibatch_size = 32768
 #compile = False
 
-[sweep]
-method = protein
-name = sweep
-
-[sweep.metric]
-goal = maximize
-name = score 
-min = 0
-max = 10
-scale = auto
-
 #[sweep.train.total_timesteps]
 #distribution = log_normal
 #min = 2e7
diff --git a/config/ocean/blastar.ini b/config/ocean/blastar.ini
index 227096a0c9..e545f019b8 100644
--- a/config/ocean/blastar.ini
+++ b/config/ocean/blastar.ini
@@ -14,8 +14,8 @@ gamma = 0.95
 learning_rate = 0.05
 minibatch_size = 32768
 
-[sweep.metric]
-name = environment/enemy_crossed_screen
+[sweep]
+metric = environment/enemy_crossed_screen
 goal = minimize
 
 [sweep.parameters.train.parameters.batch_size]
diff --git a/config/ocean/breakout.ini b/config/ocean/breakout.ini
index ebc0ac1660..25355a3e44 100644
--- a/config/ocean/breakout.ini
+++ b/config/ocean/breakout.ini
@@ -3,7 +3,9 @@ package = ocean
 env_name = puffer_breakout
 policy_name = Policy
 rnn_name = Recurrent
-vec = multiprocessing
+
+[vec]
+num_envs = 2
 
 [env]
 num_envs = 4096
@@ -16,23 +18,35 @@ input_size = 128
 hidden_size = 128
 
 [train]
-total_timesteps = 80_000_000
-learning_rate = 0.05
-minibatch_size = 32768
-
-[sweep]
-method = protein
-name = sweep
-
-[sweep.metric]
-goal = maximize
-name = score 
-min = 0
-max = 864
-
-#[sweep.train.total_timesteps]
-#distribution = log_normal
-#min = 2e7
-#max = 1e8
-#mean = 5e7
-#scale = auto
+total_timesteps = 75_000_000 
+
+# Highly sensitive
+adam_beta1 = 0.99
+
+adam_beta2 = 0.9999
+adam_eps = 1e-14
+ent_coef = 0.025
+gae_lambda = 0.85
+
+# Highly sensitive
+gamma = 0.975
+
+learning_rate = 0.01
+max_grad_norm = 1.5
+minibatch_size = 16384
+
+prio_alpha = 0.0
+# Doesn't matter
+prio_beta0 = 1.0
+
+# Just can't be low
+vf_coef = 1.3
+
+# TODO: Try tuning clip coefs
+
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 2e7
+max = 5e8
+mean = 8e7
+scale = auto
diff --git a/config/ocean/cartpole.ini b/config/ocean/cartpole.ini
index 9a5674c6f0..6ecfb7db00 100644
--- a/config/ocean/cartpole.ini
+++ b/config/ocean/cartpole.ini
@@ -1,7 +1,6 @@
 [base]
 package = ocean
 env_name = puffer_cartpole
-vec = multiprocessing
 policy_name = Policy
 rnn_name = Recurrent
 
@@ -16,13 +15,7 @@ minibatch_size = 32768
 
 [sweep]
 method = protein
-name = sweep
-
-[sweep.metric]
-goal = maximize
-name = episode_length
-min = 0
-max = 205
+metric = episode_length
 
 [sweep.train.total_timesteps]
 distribution = log_normal
diff --git a/config/ocean/enduro.ini b/config/ocean/enduro.ini
index 4f6455b57a..f67089205f 100644
--- a/config/ocean/enduro.ini
+++ b/config/ocean/enduro.ini
@@ -15,15 +15,7 @@ minibatch_size = 32768
 
 
 [sweep]
-method = protein 
-name = sweep
-max_score = None
-
-[sweep.metric]
-goal = maximize
-name = days_completed
-min = 0
-max = None
+metric = days_completed
 
 [sweep.train.total_timesteps]
 distribution = log_normal
diff --git a/config/ocean/gpudrive.ini b/config/ocean/gpudrive.ini
index 0235fca9c7..4391e1c7f1 100644
--- a/config/ocean/gpudrive.ini
+++ b/config/ocean/gpudrive.ini
@@ -3,41 +3,82 @@ package = ocean
 env_name = puffer_gpudrive
 policy_name = GPUDrive
 rnn_name = Recurrent
-vec = native
+
+[vec]
+num_workers = 16
+num_envs = 16
+batch_size = 8
+#backend = Serial
 
 [policy]
 input_size = 64
-hidden_size = 128
+hidden_size = 512
 
 [rnn]
-input_size = 64
-hidden_size = 128
+input_size = 512
+hidden_size = 512
 
 [env]
-num_envs = 75
-reward_vehicle_collision = 0
-reward_offroad_collision = 0
+num_envs = 72
+reward_vehicle_collision = 0.0
+reward_offroad_collision = 0.0
 
 [train]
 total_timesteps = 150_000_000
-learning_rate = 0.005
-num_workers = 1
-num_envs = 1
-env_batch_size = 1
+#learning_rate = 0.005
 anneal_lr = True
+batch_size = 738192
+minibatch_size = 23296
+max_minibatch_size = 23296
+bptt_horizon = 91
 
+#adam_beta1 = 0.9225899639773112
+#adam_beta2 = 0.9
+#adam_eps = 0.0004030478187254784
+#ent_coef = 0.0020159472963835016
+#gae_lambda = 0.8829440612065992
+#gamma = 0.9872971455373439
+#learning_rate = 0.0003947934701844728
+#max_grad_norm = 0.5296288081133984
+#prio_alpha = 0.99
+#prio_beta0 = 0.48469847315324566
+#update_epochs = 2
+#vf_coef = 3.6777541336880786
+#checkpoint_interval = 1000
 
+adam_beta1 = 0.9852000972032763
+adam_beta2 = 0.9948751690861872
+adam_eps = 0.000002967099767264975
+clip_coef = 0.3153578071651496
+ent_coef = 0.000369784972524992
+gae_lambda = 0.9385892578563558
+gamma = 0.9864999317644947
+learning_rate = 0.0022659903674495338
+max_grad_norm = 1.942292174080673
+prio_alpha = 0.9414003089586056
+prio_beta = 0.9429842108374631
+vf_clip_coef = 1.9533056765171148
+vf_coef = 3.2028923035616774
 
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 5e7
+max = 2e8
+mean = 1e8
+scale = time
+ 
 [sweep.env.reward_vehicle_collision]
 distribution = uniform
 min = -1.0
 max = -0.25
+max = 0.0
 mean = -0.5
 scale = auto 
-
+ 
 [sweep.env.reward_offroad_collision]
 distribution = uniform
 min = -1.0
 max = -0.25
+max = 0.0
 mean = -0.5
 scale = auto
diff --git a/config/ocean/grid.ini b/config/ocean/grid.ini
index 511ed0428c..1c0bb9544b 100644
--- a/config/ocean/grid.ini
+++ b/config/ocean/grid.ini
@@ -1,7 +1,6 @@
 [base]
 package = ocean
 env_name = puffer_grid
-vec = multiprocessing
 policy_name = Policy
 rnn_name = Recurrent
 
@@ -13,54 +12,32 @@ input_size = 512
 hidden_size = 512
 
 [env]
-max_size = 31
+max_size = 47
 num_envs = 4096
 num_maps = 8192
 
 [train]
-total_timesteps = 180_000_000
+total_timesteps = 250_000_000
+adam_beta1 = 0.9225899639773112
+adam_beta2 = 0.9
+adam_eps = 0.0004030478187254784
+anneal_lr = true
+batch_size = 524288
+ent_coef = 0.0020159472963835016
+gae_lambda = 0.8829440612065992
+gamma = 0.9872971455373439
+learning_rate = 0.0003947934701844728
+max_grad_norm = 0.5296288081133984
+minibatch_size = 4096
+prio_alpha = 0.99
+prio_beta0 = 0.48469847315324566
+#update_epochs = 2
+vf_coef = 3.6777541336880786
 checkpoint_interval = 1000
-gamma = 0.9944336976183826
-gae_lambda = 0.9474288929489364
-ent_coef = 0.00001
-learning_rate = 0.005
-minibatch_size = 32768
-
-[sweep]
-method = protein
-name = sweep
-
-[sweep.metric]
-goal = maximize
-name = score 
-min = 0
-max = 1
 
 [sweep.train.total_timesteps]
 distribution = log_normal
 min = 5e7
-max = 2e8
+max = 6e8
 mean = 1e8
 scale = auto
-
-[sweep.train.e3b_coef]
-distribution = logit_normal
-min = 0.0001
-max = 0.99
-mean = 0.001
-scale = auto
-
-[sweep.train.e3b_lambda]
-distribution = log_normal
-min = 0.01
-max = 10.0
-mean = 0.1
-scale = auto
-
-[sweep.train.e3b_norm]
-distribution = log_normal
-min = 0.0001
-max = 0.1
-mean = 0.001
-scale = auto
-
diff --git a/config/ocean/impulse_wars.ini b/config/ocean/impulse_wars.ini
index bacbf0b228..9fe5eff614 100644
--- a/config/ocean/impulse_wars.ini
+++ b/config/ocean/impulse_wars.ini
@@ -41,16 +41,6 @@ compile_mode = reduce-overhead
 compile_fullgraph = False
 device = cuda
 
-[sweep]
-method = protein 
-name = sweep
-
-[sweep.metric]
-goal = maximize
-name = score
-min = 0.0
-max = 1.0
-
 [sweep.env.num_envs]
 distribution = uniform_pow2
 min = 16
diff --git a/config/ocean/moba.ini b/config/ocean/moba.ini
index bb345d6282..cd9e621771 100644
--- a/config/ocean/moba.ini
+++ b/config/ocean/moba.ini
@@ -18,13 +18,13 @@ num_envs = 8
 num_workers = 4
 env_batch_size = 4
 minibatch_size = 20_480
+max_minibatch_size = 20_480
 batch_size = 409_600
 bptt_horizon = 80
 learning_rate = 0.05
 
 [sweep.metric]
-goal = maximize
-name = radiant_towers_alive
+metric = radiant_towers_alive
 
 [sweep.train.total_timesteps]
 distribution = log_normal
diff --git a/config/ocean/nmmo3.ini b/config/ocean/nmmo3.ini
index 4ebbb77fcc..f5a33a5ee2 100644
--- a/config/ocean/nmmo3.ini
+++ b/config/ocean/nmmo3.ini
@@ -1,40 +1,38 @@
 [base]
 package = ocean
 env_name = puffer_nmmo3
-vec = multiprocessing
 policy_name = NMMO3
 rnn_name = NMMO3LSTM
 
+[vec]
+num_workers = 8
+num_envs = 8
+batch_size = 4
+
 [env]
 reward_combat_level = 1.0
 reward_prof_level = 1.0
 reward_item_level = 1.0
 reward_market = 0.0
 reward_death = -1.0
-num_envs = 4
+num_envs = 1
 
 [train]
 total_timesteps = 107000000000
 checkpoint_interval = 1000
 learning_rate = 0.0004573146765703167
-num_envs = 2
-num_workers = 2
-env_batch_size = 1
-update_epochs = 1
 gamma = 0.7647543366891623
 gae_lambda = 0.996005622445478
 ent_coef = 0.01210084358004069
 max_grad_norm = 0.6075578331947327
 vf_coef = 0.3979089612467003
-# todo: run 500k, 64 horz
-bptt_horizon = 32
-batch_size = 262144
+bptt_horizon = 64
+batch_size = 524288
 minibatch_size = 32768
-compile = False
+max_minibatch_size = 32768
 
-[sweep.metric]
-goal = maximize
-name = min_comb_prof
+[sweep]
+metric = min_comb_prof
 
 [sweep.env.num_envs]
 distribution = uniform_pow2
diff --git a/config/ocean/pong.ini b/config/ocean/pong.ini
index 7ddf13c044..bcacb4f9ef 100644
--- a/config/ocean/pong.ini
+++ b/config/ocean/pong.ini
@@ -3,25 +3,17 @@ package = ocean
 env_name = puffer_pong 
 policy_name = Policy
 rnn_name = Recurrent
-vec = multiprocessing
+
+[vec]
+num_envs = 2
 
 [env]
 num_envs = 4096
 
 [train]
-total_timesteps = 80_000_000
+total_timesteps = 500_000_000
 learning_rate = 0.05
-minibatch_size = 32768
-
-[sweep]
-method = protein
-name = sweep
-
-[sweep.metric]
-goal = maximize
-name = score 
-min = -21
-max = 21
+batch_size = auto
 
 [sweep.train.total_timesteps]
 distribution = log_normal
diff --git a/config/ocean/snake.ini b/config/ocean/snake.ini
index 2f6e6194ba..001d0df092 100644
--- a/config/ocean/snake.ini
+++ b/config/ocean/snake.ini
@@ -22,17 +22,6 @@ total_timesteps = 300_000_000
 learning_rate = 0.05
 minibatch_size = 32768
 
-[sweep]
-method = protein 
-name = sweep
-max_score = None
-
-[sweep.metric]
-goal = maximize
-name = score 
-min = 0
-max = None
-
 [sweep.train.diayn_archive]
 distribution = uniform_pow2
 min = 2
diff --git a/config/ocean/tower_climb.ini b/config/ocean/tower_climb.ini
index fe7f210ce2..01b2059423 100644
--- a/config/ocean/tower_climb.ini
+++ b/config/ocean/tower_climb.ini
@@ -19,8 +19,7 @@ learning_rate = 0.05
 minibatch_size = 32768
 
 [sweep.metric]
-goal = maximize
-name = environment/levels_completed
+metric = environment/levels_completed
 
 [sweep.parameters.train.parameters.total_timesteps]
 distribution = uniform
diff --git a/config/ocean/tripletriad.ini b/config/ocean/tripletriad.ini
index b0c2392e32..203555ae2a 100644
--- a/config/ocean/tripletriad.ini
+++ b/config/ocean/tripletriad.ini
@@ -14,12 +14,6 @@ gamma = 0.95
 learning_rate = 0.05
 minibatch_size = 32768
 
-[sweep.metric]
-goal = maximize
-name = score 
-min = 0
-max = 9.0
-
 [sweep.train.total_timesteps]
 distribution = log_normal
 min = 5e7
diff --git a/config/trade_sim.ini b/config/trade_sim.ini
index f31f37fc5b..2a7f5b4054 100644
--- a/config/trade_sim.ini
+++ b/config/trade_sim.ini
@@ -3,29 +3,24 @@ package = trade_sim
 env_name = trade_sim
 policy_name = Policy
 rnn_name = Recurrent
-vec = multiprocessing
+
+[vec]
+backend = Multiprocessing
+num_envs = 1024
+num_workers = 16
+batch_size = 512
 
 #[env]
 #num_envs = 128 
 
 [train]
 total_timesteps = 100_000_000
-num_envs = 1024
-num_workers = 16
-env_batch_size = 512
 gamma = 0.95
 learning_rate = 0.05
 minibatch_size = 32768
 
 [sweep]
-method = protein
-name = sweep
-
-[sweep.metric]
-goal = maximize
-name = final_capital
-min = 0
-max = 20000
+metric = final_capital
 
 [sweep.train.total_timesteps]
 distribution = log_normal
diff --git a/demo.py b/demo.py
deleted file mode 100644
index 8275d5e9bc..0000000000
--- a/demo.py
+++ /dev/null
@@ -1,381 +0,0 @@
-import configparser
-import argparse
-import shutil
-import glob
-import uuid
-import ast
-import os
-import random
-import time
-
-import numpy as np
-import torch
-
-import pufferlib
-import pufferlib.sweep
-import pufferlib.utils
-import pufferlib.vector
-
-from rich_argparse import RichHelpFormatter
-from rich.console import Console
-from rich.traceback import install
-install(show_locals=False) # Rich tracebacks
-
-import signal # Aggressively exit on ctrl+c
-signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0))
-
-import clean_pufferl
- 
-def init_wandb(args, name, id=None, resume=True, tag=None):
-    import wandb
-    wandb.init(
-        id=id or wandb.util.generate_id(),
-        project=args['wandb_project'],
-        group=args['wandb_group'],
-        allow_val_change=True,
-        save_code=False,
-        resume=resume,
-        config=args,
-        name=name,
-        tags=[tag] if tag is not None else [],
-    )
-    return wandb
-
-def init_neptune(args, name, id=None, resume=True, tag=None, mode="async"):
-    import neptune
-    import neptune.exceptions
-    try:
-        workspace = args['workspace']
-        run = neptune.init_run(
-                project=f"{workspace['name']}/{workspace['project']}",
-                capture_hardware_metrics=False,
-                capture_stdout=False,
-                capture_stderr=False,
-                capture_traceback=False,
-                tags=[tag] if tag is not None else [],
-                mode=mode,
-            )
-    except neptune.exceptions.NeptuneConnectionLostException:
-        print("couldn't connect to neptune, logging in offline mode")
-        return init_neptune(args, name, id, resume, tag, mode="offline")
-    return run
-
-def make_policy(env, policy_cls, rnn_cls, args):
-    policy = policy_cls(env, **args['policy'],
-        #batch_size=args['train']['batch_size'],
-        use_p3o=args['train']['use_p3o'],
-        p3o_horizon=args['train']['p3o_horizon'],
-        use_diayn=args['train']['use_diayn'],
-        diayn_skills=args['train']['diayn_archive'],
-    )
-    args['rnn']['input_size'] = policy.hidden_size
-    args['rnn']['hidden_size'] = policy.hidden_size
-    if rnn_cls is not None:
-        policy = rnn_cls(env, policy, **args['rnn'])
-
-    return policy.to(args['train']['device'])
-
-def sweep(args, env_name, make_env, policy_cls, rnn_cls):
-    method = args['sweep']['method']
-    if method == 'random':
-        sweep = pufferlib.sweep.Random(args['sweep'])
-    elif method == 'pareto_genetic':
-        sweep = pufferlib.sweep.ParetoGenetic(args['sweep'])
-    elif method == 'protein':
-        sweep = pufferlib.sweep.Protein(
-            args['sweep'],
-            resample_frequency=0,
-            num_random_samples=50, # Should be number of params
-            max_suggestion_cost=args['max_suggestion_cost'],
-            min_score = args['sweep']['metric']['min'],
-            max_score = args['sweep']['metric']['max'],
-        )
-    elif method == 'carbs':
-        sweep = pufferlib.sweep.Carbs(
-            args['sweep'],
-            resample_frequency=5,
-            num_random_samples=10, # Should be number of params
-            max_suggestion_cost=args['max_suggestion_cost'],
-        )
-    else:
-        raise ValueError(f'Invalid sweep method {method} (random/pareto_genetic/protein)')
-
-    target_metric = args['sweep']['metric']['name']
-    for i in range(args['max_runs']):
-        seed = time.time_ns() & 0xFFFFFFFF
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-
-        info = sweep.suggest(args)
-        if args['train']['minibatch_size'] >= args['train']['batch_size']:
-            sweep.observe(args, 0.0, 0.0)
-            continue
-        
-        scores, costs, timesteps, _, _ = train(args, make_env, policy_cls, rnn_cls, target_metric)
-
-        # Hacky patch to prevent increasing total_timesteps when not swept
-        total_timesteps = args['train']['total_timesteps']
-        for score, cost, timestep in zip(scores, costs, timesteps):
-            args['train']['total_timesteps'] = timestep
-            sweep.observe(args, score, cost)
-
-        args['train']['total_timesteps'] = total_timesteps
-
-        print('Score:', score, 'Cost:', cost, 'Timesteps:', timestep)
-
-def train(args, make_env, policy_cls, rnn_cls, target_metric, min_eval_points=100,
-        elos={'model_random.pt': 1000}, vecenv=None, wandb=None, neptune=None):
-    if args['vec'] == 'serial':
-        vec = pufferlib.vector.Serial
-    elif args['vec'] == 'multiprocessing':
-        vec = pufferlib.vector.Multiprocessing
-    elif args['vec'] == 'ray':
-        vec = pufferlib.vector.Ray
-    elif args['vec'] == 'native':
-        vec = pufferlib.environment.PufferEnv
-    else:
-        raise ValueError(f'Invalid --vec (serial/multiprocessing/ray/native).')
-
-    env_name = args['env_name']
-    if vecenv is None:
-        vecenv = pufferlib.vector.make(
-            make_env,
-            env_kwargs=args['env'],
-            num_envs=args['train']['num_envs'],
-            num_workers=args['train']['num_workers'],
-            batch_size=args['train']['env_batch_size'],
-            zero_copy=args['train']['zero_copy'],
-            overwork=args['vec_overwork'],
-            seed=args['train']['seed'],
-            backend=vec,
-        )
-
-    policy = make_policy(vecenv.driver_env, policy_cls, rnn_cls, args)
-
-    if args['ddp']:
-        from torch.nn.parallel import DistributedDataParallel as DDP
-        orig_policy = policy
-        policy = DDP(policy, device_ids=[args['rank']])
-        # TODO: Test this? isinstance?
-        if hasattr(orig_policy, 'lstm'):
-            policy.lstm = orig_policy.lstm
-
-    neptune = None
-    wandb = None
-    if args['neptune']:
-        neptune = init_neptune(args, env_name, id=args['exp_id'], tag=args['tag'])
-        for k, v in pufferlib.utils.unroll_nested_dict(args):
-            neptune[k].append(v)
-    elif args['wandb']:
-        wandb = init_wandb(args, env_name, id=args['exp_id'], tag=args['tag'])
-
-    train_config = pufferlib.namespace(**args['train'], env=env_name,
-        exp_id=args['exp_id'] or env_name + '-' + str(uuid.uuid4())[:8])
-    data = clean_pufferl.create(train_config, vecenv, policy, wandb=wandb, neptune=neptune)
-
-    timesteps = []
-    scores = []
-    costs = []
-    target_key = f'environment/{target_metric}'
-
-    vecenv.async_reset(train_config.seed)
-    while data.global_step < train_config.total_timesteps:
-        clean_pufferl.evaluate(data)
-        logs = clean_pufferl.train(data)
-        if logs is not None and target_key in logs:
-            timesteps.append(logs['agent_steps'])
-            scores.append(logs[target_key])
-            #costs.append(data.profile.uptime)
-
-    steps_evaluated = 0
-    cost = time.time() - data.start_time
-    batch_size = args['train']['batch_size']
-    while len(data.stats[target_metric]) < min_eval_points:
-        stats, _ = clean_pufferl.evaluate(data)
-        steps_evaluated += batch_size
-
-    clean_pufferl.mean_and_log(data)
-    score = stats[target_metric]
-    print(f'Evaluated {steps_evaluated} steps. Score: {score}')
-
-    scores.append(score)
-    costs.append(cost)
-    timesteps.append(data.global_step)
-
-    def downsample_linear(arr, m):
-        n = len(arr)
-        x_old = np.linspace(0, 1, n)  # Original indices normalized
-        x_new = np.linspace(0, 1, m)  # New indices normalized
-        return np.interp(x_new, x_old, arr)
-     
-    scores = downsample_linear(scores, 10)
-    costs = downsample_linear(costs, 10)
-    timesteps = downsample_linear(timesteps, 10)
-
-    if args['neptune']:
-        neptune['score'].append(score)
-        neptune['cost'].append(cost)
-    elif args['wandb']:
-        wandb.log({'score': score, 'cost': cost})
-
-    clean_pufferl.close(data)
-    return scores, costs, timesteps, elos, vecenv
-
-def train_ddp(rank, world_size, args, make_env, policy_cls, rnn_cls, target_metric):
-    import torch.distributed as dist
-    args['rank'] = rank
-    args['train']['device'] = f'cuda:{rank}'
-    dist.init_process_group(backend='nccl', rank=rank, world_size=world_size)
-    train(args, make_env, policy_cls, rnn_cls, target_metric)
-    dist.destroy_process_group()
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description=f':blowfish: PufferLib [bright_cyan]{pufferlib.__version__}[/]'
-        ' demo options. Shows valid args for your env and policy',
-        formatter_class=RichHelpFormatter, add_help=False)
-    parser.add_argument('--env', '--environment', type=str,
-        default='puffer_squared', help='Name of specific environment to run')
-    parser.add_argument('--mode', type=str, default='train',
-        choices='train eval evaluate sweep autotune profile'.split())
-    parser.add_argument('--vec-overwork', action='store_true',
-        help='Allow vectorization to use >1 worker/core. Not recommended.')
-    parser.add_argument('--eval-model-path', type=str, default=None,
-        help='Path to a pretrained checkpoint')
-    parser.add_argument('--baseline', action='store_true',
-        help='Load pretrained model from WandB if available')
-    parser.add_argument('--ddp', action='store_true', help='Distributed data parallel')
-    parser.add_argument('--render-mode', type=str, default='auto',
-        choices=['auto', 'human', 'ansi', 'rgb_array', 'raylib', 'None'])
-    parser.add_argument('--exp-id', '--exp-name', type=str,
-        default=None, help='Resume from experiment')
-    parser.add_argument('--data-path', type=str, default=None,
-        help='Used for testing hparam algorithms')
-    parser.add_argument('--track', action='store_true', help='Track on WandB')
-    parser.add_argument('--max-runs', type=int, default=200, help='Max number of sweep runs')
-    parser.add_argument('--wandb-project', type=str, default='pufferlib')
-    parser.add_argument('--wandb-group', type=str, default='debug')
-    parser.add_argument('--tag', type=str, default=None, help='Tag for experiment')
-    parser.add_argument('--wandb', action='store_true', help='Track on WandB')
-    parser.add_argument('--neptune', action='store_true', help='Track on Neptune')
-    #parser.add_argument('--wandb-project', type=str, default='pufferlib')
-    #parser.add_argument('--wandb-group', type=str, default='debug')
-    args = parser.parse_known_args()[0]
-
-    file_paths = glob.glob('config/**/*.ini', recursive=True)
-    for path in file_paths:
-        p = configparser.ConfigParser()
-        p.read('config/default.ini')
-
-        subconfig = os.path.join(*path.split('/')[:-1] + ['default.ini'])
-        if subconfig in file_paths:
-            p.read(subconfig)
-
-        p.read(path)
-        if args.env in p['base']['env_name'].split():
-            break
-    else:
-        raise Exception('No config for env_name {}'.format(args.env))
-
-    for section in p.sections():
-        for key in p[section]:
-            if section == 'base':
-                argparse_key = f'--{key}'.replace('_', '-')
-            else:
-                argparse_key = f'--{section}.{key}'.replace('_', '-')
-            parser.add_argument(argparse_key, default=p[section][key])
-
-    # Late add help so you get a dynamic menu based on the env
-    parser.add_argument('-h', '--help', default=argparse.SUPPRESS,
-        action='help', help='Show this help message and exit')
-
-    parsed = parser.parse_args().__dict__
-    args = {'env': {}, 'policy': {}, 'rnn': {}}
-    env_name = parsed.pop('env')
-    for key, value in parsed.items():
-        next = args
-        for subkey in key.split('.'):
-            if subkey not in next:
-                next[subkey] = {}
-            prev = next
-            next = next[subkey]
-        try:
-            prev[subkey] = ast.literal_eval(value)
-        except:
-            prev[subkey] = value
-
-    package = args['package']
-    module_name = f'pufferlib.environments.{package}'
-    if package == 'ocean':
-        module_name = 'pufferlib.ocean'
-
-    import importlib
-    env_module = importlib.import_module(module_name)
-
-    make_env = env_module.env_creator(env_name)
-    policy_cls = getattr(env_module.torch, args['policy_name'])
-    
-    rnn_name = args['rnn_name']
-    rnn_cls = None
-    if rnn_name is not None:
-        rnn_cls = getattr(env_module.torch, args['rnn_name'])
-
-    if args['baseline']:
-        assert args['mode'] in ('train', 'eval', 'evaluate')
-        args['track'] = True
-        version = '.'.join(pufferlib.__version__.split('.')[:2])
-        args['exp_id'] = f'puf-{version}-{env_name}'
-        args['wandb_group'] = f'puf-{version}-baseline'
-        shutil.rmtree(f'experiments/{args["exp_id"]}', ignore_errors=True)
-        run = init_wandb(args, args['exp_id'], resume=False)
-        if args['mode'] in ('eval', 'evaluate'):
-            model_name = f'puf-{version}-{env_name}_model:latest'
-            artifact = run.use_artifact(model_name)
-            data_dir = artifact.download()
-            model_file = max(os.listdir(data_dir))
-            args['eval_model_path'] = os.path.join(data_dir, model_file)
-    if args['mode'] == 'train' and args['ddp']:
-        import torch.multiprocessing as mp
-        world_size = 1
-        os.environ["MASTER_ADDR"] = "localhost"
-        os.environ["MASTER_PORT"] = "29500"
-        target_metric = args['sweep']['metric']['name']
-        mp.spawn(train_ddp,
-            args=(world_size, args, make_env, policy_cls, rnn_cls, target_metric),
-            nprocs=world_size,
-            join=True,
-        )
-    elif args['mode'] == 'train':
-        target_metric = args['sweep']['metric']['name']
-        train(args, make_env, policy_cls, rnn_cls, target_metric)
-    elif args['mode'] in ('eval', 'evaluate'):
-        vec = pufferlib.vector.Serial
-        if args['vec'] == 'native': vec = pufferlib.environment.PufferEnv
-        clean_pufferl.rollout(
-            make_env,
-            args['env'],
-            policy_cls=policy_cls,
-            rnn_cls=rnn_cls,
-            agent_creator=make_policy,
-            agent_kwargs=args,
-            backend=vec,
-            model_path=args['eval_model_path'],
-            render_mode=args['render_mode'],
-            device=args['train']['device'],
-        )
-    elif args['mode'] == 'sweep':
-        assert args['wandb'] or args['neptune'], 'Sweeps require either wandb or neptune'
-        sweep(args, env_name, make_env, policy_cls, rnn_cls)
-    elif args['mode'] == 'autotune':
-        pufferlib.vector.autotune(make_env, batch_size=args['train']['env_batch_size'])
-    elif args['mode'] == 'profile':
-        import cProfile
-        target_metric = args['sweep']['metric']['name']
-        cProfile.run('train(args, make_env, policy_cls, rnn_cls, target_metric)', 'stats.profile')
-        import pstats
-        from pstats import SortKey
-        p = pstats.Stats('stats.profile')
-        p.sort_stats(SortKey.TIME).print_stats(10)
-        breakpoint()
-        pass
diff --git a/pufferlib.cpp b/pufferlib.cpp
index 6c8aab7fe5..4260234779 100644
--- a/pufferlib.cpp
+++ b/pufferlib.cpp
@@ -1,80 +1,97 @@
-#include "shared.cpp"
+#include <Python.h>
+#include <ATen/Operators.h>
+#include <torch/all.h>
+#include <torch/library.h>
+#include <vector>
 
-// [num_steps, horizon]
-void gae(float* values, float* rewards, float* dones, float* advantages,
-        float gamma, float gae_lambda, int num_steps, int horizon){
-    for (int offset = 0; offset < num_steps*horizon; offset+=horizon) {
-        gae_row(values + offset, rewards + offset, dones + offset,
-            advantages + offset, gamma, gae_lambda, horizon);
-    }
+extern "C" {
+  /* Creates a dummy empty _C module that can be imported from Python.
+     The import from Python will load the .so consisting of this file
+     in this extension, so that the TORCH_LIBRARY static initializers
+     below are run. */
+  PyObject* PyInit__C(void)
+  {
+      static struct PyModuleDef module_def = {
+          PyModuleDef_HEAD_INIT,
+          "_C",   /* name of module */
+          NULL,   /* module documentation, may be NULL */
+          -1,     /* size of per-interpreter state of the module,
+                     or -1 if the module keeps state in global variables. */
+          NULL,   /* methods */
+      };
+      return PyModule_Create(&module_def);
+  }
 }
 
-torch::Tensor compute_gae(torch::Tensor values, torch::Tensor rewards,
-        torch::Tensor dones, float gamma, float gae_lambda) {
-    int num_steps = values.size(0);
-    int horizon = values.size(1);
-    torch::Tensor advantages = gae_check(values, rewards, dones, num_steps, horizon);
-    gae(values.data_ptr<float>(), rewards.data_ptr<float>(),
-        dones.data_ptr<float>(), advantages.data_ptr<float>(),
-        gamma, gae_lambda, num_steps, horizon
-    );
-    return advantages;
+namespace pufferlib {
+
+static const int max_horizon = 256;
+void puff_advantage_row(float* values, float* rewards, float* dones,
+        float* importance, float* advantages, float gamma, float lambda,
+        float rho_clip, float c_clip, int horizon) {
+    float lastpufferlam = 0;
+    for (int t = horizon-2; t >= 0; t--) {
+        int t_next = t + 1;
+        float nextnonterminal = 1.0 - dones[t_next];
+        float rho_t = fminf(importance[t], rho_clip);
+        float c_t = fminf(importance[t], c_clip);
+        float delta = rho_t*(rewards[t_next] + gamma*values[t_next]*nextnonterminal - values[t]);
+        lastpufferlam = delta + gamma*lambda*c_t*lastpufferlam*nextnonterminal;
+        advantages[t] = lastpufferlam;
+    }
 }
 
-// [num_steps, horizon]
-void vtrace(float* values, float* rewards, float* dones, float* importance,
-        float* vs, float* advantages, float gamma, float rho_clip, float c_clip,
-        int num_steps, const int horizon){
-    for (int offset = 0; offset < num_steps*horizon; offset+=horizon) {
-        vtrace_row(values + offset, rewards + offset,
-            dones + offset, importance + offset,
-            vs + offset, advantages + offset,
-            gamma, rho_clip, c_clip, horizon
-        );
+void vtrace_check(torch::Tensor values, torch::Tensor rewards,
+        torch::Tensor dones, torch::Tensor importance, torch::Tensor advantages,
+        int num_steps, int horizon) {
+
+    // Validate input tensors
+    torch::Device device = values.device();
+    for (const torch::Tensor& t : {values, rewards, dones, importance, advantages}) {
+        TORCH_CHECK(t.dim() == 2, "Tensor must be 2D");
+        TORCH_CHECK(t.device() == device, "All tensors must be on same device");
+        TORCH_CHECK(t.size(0) == num_steps, "First dimension must match num_steps");
+        TORCH_CHECK(t.size(1) == horizon, "Second dimension must match horizon");
+        TORCH_CHECK(t.dtype() == torch::kFloat32, "All tensors must be float32");
+        assert(horizon <= max_horizon);
+        if (!t.is_contiguous()) {
+            t.contiguous();
+        }
     }
 }
 
+
 // [num_steps, horizon]
 void puff_advantage(float* values, float* rewards, float* dones, float* importance,
-        float* vs, float* advantages, float gamma, float lambda, float rho_clip, float c_clip,
+        float* advantages, float gamma, float lambda, float rho_clip, float c_clip,
         int num_steps, const int horizon){
     for (int offset = 0; offset < num_steps*horizon; offset+=horizon) {
         puff_advantage_row(values + offset, rewards + offset,
-            dones + offset, importance + offset,
-            vs + offset, advantages + offset,
+            dones + offset, importance + offset, advantages + offset,
             gamma, lambda, rho_clip, c_clip, horizon
         );
     }
 }
 
-void compute_vtrace(torch::Tensor values, torch::Tensor rewards,
-        torch::Tensor dones, torch::Tensor importance, torch::Tensor vs, torch::Tensor advantages,
-        float gamma, float rho_clip, float c_clip) {
-    int num_steps = values.size(0);
-    int horizon = values.size(1);
-    vtrace_check(values, rewards, dones, importance, vs, advantages, num_steps, horizon);
-    vtrace(values.data_ptr<float>(), rewards.data_ptr<float>(),
-        dones.data_ptr<float>(), importance.data_ptr<float>(),
-        vs.data_ptr<float>(), advantages.data_ptr<float>(),
-        gamma, rho_clip, c_clip, num_steps, horizon
-    );
-}
 
-void compute_puff_advantage(torch::Tensor values, torch::Tensor rewards,
-        torch::Tensor dones, torch::Tensor importance, torch::Tensor vs, torch::Tensor advantages,
-        float gamma, float lambda, float rho_clip, float c_clip) {
+void compute_puff_advantage_cpu(torch::Tensor values, torch::Tensor rewards,
+        torch::Tensor dones, torch::Tensor importance, torch::Tensor advantages,
+        double gamma, double lambda, double rho_clip, double c_clip) {
     int num_steps = values.size(0);
     int horizon = values.size(1);
-    vtrace_check(values, rewards, dones, importance, vs, advantages, num_steps, horizon);
+    vtrace_check(values, rewards, dones, importance, advantages, num_steps, horizon);
     puff_advantage(values.data_ptr<float>(), rewards.data_ptr<float>(),
-        dones.data_ptr<float>(), importance.data_ptr<float>(),
-        vs.data_ptr<float>(), advantages.data_ptr<float>(),
+        dones.data_ptr<float>(), importance.data_ptr<float>(), advantages.data_ptr<float>(),
         gamma, lambda, rho_clip, c_clip, num_steps, horizon
     );
 }
 
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-    m.def("compute_gae", &compute_gae, "Compute GAE with C");
-    m.def("compute_vtrace", &compute_vtrace, "Compute VTrace with C");
-    m.def("compute_puff_advantage", &compute_puff_advantage, "Compute PuffAdvantage with C");
+TORCH_LIBRARY(pufferlib, m) {
+   m.def("compute_puff_advantage(Tensor(a!) values, Tensor(b!) rewards, Tensor(c!) dones, Tensor(d!) importance, Tensor(e!) advantages, float gamma, float lambda, float rho_clip, float c_clip) -> ()");
+ }
+
+TORCH_LIBRARY_IMPL(pufferlib, CPU, m) {
+  m.impl("compute_puff_advantage", &compute_puff_advantage_cpu);
+}
+
 }
diff --git a/pufferlib.cu b/pufferlib.cu
deleted file mode 100644
index 6cf490496c..0000000000
--- a/pufferlib.cu
+++ /dev/null
@@ -1,284 +0,0 @@
-#include "shared.cpp"
-
-__global__ void p3o_kernel(
-    float* reward_block,    // [num_steps, horizon]
-    float* reward_mask,     // [num_steps, horizon]
-    float* values_mean,     // [num_steps, horizon]
-    float* values_std,      // [num_steps, horizon]
-    float* buf,            // [num_steps, horizon]
-    float* dones,          // [num_steps]
-    float* rewards,        // [num_steps]
-    float* advantages,     // [num_steps]
-    int* bounds,          // [num_steps]
-    int num_steps,
-    float r_std,
-    float puf,
-    int horizon
-) {
-    int i = blockIdx.x * blockDim.x + threadIdx.x;
-    if (i >= num_steps) return;
-
-    int k = 0;
-    for (int j = 0; j < horizon-1; j++) {
-        int t = i + j;
-        if (t >= num_steps - 1) {
-            break;
-        }
-        if (dones[t+1]) {
-            k++;
-            break;
-        }
-        k++;
-    }
-
-    float gamma_max = 0.0f;
-    float n = 0.0f;
-    for (int j = k-1; j >= 0; j--) {
-        int idx = i * horizon + j;
-        n++;
-
-        float vstd = values_std[idx];
-        if (vstd == 0.0f) {
-            buf[idx] = 0.0f;
-            continue;
-        }
-
-        float gamma = 1.0f / (vstd*vstd);
-        if (r_std != 0.0f) {
-            gamma -= puf/(r_std*r_std);
-        }
-
-        if (gamma < 0.0f) {
-            gamma = 0.0f;
-        }
-
-        if (gamma > gamma_max) {
-            gamma_max = gamma;
-        }
-        buf[idx] = gamma;
-        reward_mask[idx] = 1.0f;
-    }
-
-    //float bootstrap = 0.0f;
-    //if (k == horizon-1) {
-    //    bootstrap = buf[i*horizon + horizon - 1]*values_mean[i*horizon + horizon - 1];
-    //}
-
-    float R = 0.0f;
-    for (int j = 0; j <= k-1; j++) {
-        int t = i + j;
-        int idx = i * horizon + j;
-        float r = rewards[t+1];
-
-        float gamma = buf[idx];
-        if (gamma_max > 0) {
-            gamma /= gamma_max;
-        }
-
-        if (j >= 16 && values_std[idx] > 0.95*r_std) {
-            break;
-        }
-
-        R += gamma * (r - values_mean[idx]);
-        reward_block[idx] = r;
-        buf[idx] = gamma;
-    }
-
-    advantages[i] = R;
-    bounds[i] = k;
-}
-
-
-void compute_p3o(torch::Tensor reward_block, torch::Tensor reward_mask,
-        torch::Tensor values_mean, torch::Tensor values_std, torch::Tensor buf,
-        torch::Tensor dones, torch::Tensor rewards, torch::Tensor advantages,
-        torch::Tensor bounds, int num_steps, float vstd_max, float puf,
-        int horizon) {
-
-    // TODO: Port from python
-    /*
-    assert all(t.is_cuda for t in [reward_block, reward_mask, values_mean, values_std, 
-                                  buf, dones, rewards, advantages, bounds]), "All tensors must be on GPU"
-    
-    # Ensure contiguous memory
-    tensors = [reward_block, reward_mask, values_mean, values_std, buf, dones, rewards, advantages, bounds]
-    for t in tensors:
-        t.contiguous()
-        assert t.is_cuda
-
-    num_steps = rewards.shape[0]
-    
-    # Precompute vstd_min and vstd_max
-    #vstd_max = values_std.max().item()
-    #vstd_min = values_std.min().item()
-
-    # Launch kernel
-    threads_per_block = 256
-    assert num_steps % threads_per_block == 0
-    blocks = (num_steps + threads_per_block - 1) // threads_per_block
-    */
- 
-    // Launch the kernel
-    int threads_per_block = 256;
-    int blocks = (num_steps + threads_per_block - 1) / threads_per_block;
-
-    p3o_kernel<<<blocks, threads_per_block>>>(
-        reward_block.data_ptr<float>(),
-        reward_mask.data_ptr<float>(),
-        values_mean.data_ptr<float>(),
-        values_std.data_ptr<float>(),
-        buf.data_ptr<float>(),
-        dones.data_ptr<float>(),
-        rewards.data_ptr<float>(),
-        advantages.data_ptr<float>(),
-        bounds.data_ptr<int>(),
-        num_steps,
-        vstd_max, 
-        puf,
-        horizon
-    );
-
-    // Check for CUDA errors
-    cudaError_t err = cudaGetLastError();
-    if (err != cudaSuccess) {
-        throw std::runtime_error(cudaGetErrorString(err));
-    }
-    return;
-}
-
-// [num_steps, horizon]
-__global__ void gae_kernel(float* values, float* rewards, float* dones,
-        float* advantages, float gamma, float gae_lambda, int num_steps, int horizon) {
-    int row = blockIdx.x*blockDim.x + threadIdx.x;
-    int offset = row*horizon;
-    gae_row(values + offset, rewards + offset, dones + offset,
-        advantages + offset, gamma, gae_lambda, horizon);
-}
-
-torch::Tensor compute_gae(torch::Tensor values, torch::Tensor rewards,
-        torch::Tensor dones, float gamma, float gae_lambda) {
-    int num_steps = values.size(0);
-    int horizon = values.size(1);
-    torch::Tensor advantages = gae_check(values, rewards, dones, num_steps, horizon);
-    TORCH_CHECK(values.is_cuda(), "All tensors must be on GPU");
-
-    int threads_per_block = 256;
-    int blocks = (num_steps + threads_per_block - 1) / threads_per_block;
-    assert(num_steps % threads_per_block == 0);
-
-    gae_kernel<<<blocks, threads_per_block>>>(
-        values.data_ptr<float>(),
-        rewards.data_ptr<float>(),
-        dones.data_ptr<float>(),
-        advantages.data_ptr<float>(),
-        gamma,
-        gae_lambda,
-        num_steps,
-        horizon
-    );
-
-    cudaError_t err = cudaGetLastError();
-    if (err != cudaSuccess) {
-        throw std::runtime_error(cudaGetErrorString(err));
-    }
-
-    return advantages;
-}
-
- // [num_steps, horizon]
-__global__ void vtrace_kernel(float* values, float* rewards, float* dones, float* importance,
-        float* vs, float* advantages, float gamma, float rho_clip, float c_clip, int num_steps, int horizon) {
-    int row = blockIdx.x*blockDim.x + threadIdx.x;
-    int offset = row*horizon;
-    vtrace_row(values + offset, rewards + offset, dones + offset,
-        importance + offset, vs + offset, advantages + offset, gamma, rho_clip, c_clip, horizon);
-}
-
-void compute_vtrace(torch::Tensor values, torch::Tensor rewards,
-        torch::Tensor dones, torch::Tensor importance, torch::Tensor vs, torch::Tensor advantages,
-        float gamma, float rho_clip, float c_clip) {
-    int num_steps = values.size(0);
-    int horizon = values.size(1);
-    vtrace_check(values, rewards, dones, importance, vs, advantages, num_steps, horizon);
-    TORCH_CHECK(values.is_cuda(), "All tensors must be on GPU");
-    assert(horizon <= max_horizon);
-
-    int threads_per_block = 128;
-    int blocks = (num_steps + threads_per_block - 1) / threads_per_block;
-    assert(num_steps % threads_per_block == 0);
-
-    vtrace_kernel<<<blocks, threads_per_block>>>(
-        values.data_ptr<float>(),
-        rewards.data_ptr<float>(),
-        dones.data_ptr<float>(),
-        importance.data_ptr<float>(),
-        vs.data_ptr<float>(),
-        advantages.data_ptr<float>(),
-        gamma,
-        rho_clip,
-        c_clip,
-        num_steps,
-        horizon
-    );
-
-    cudaError_t err = cudaGetLastError();
-    if (err != cudaSuccess) {
-        throw std::runtime_error(cudaGetErrorString(err));
-    }
-}
-
- // [num_steps, horizon]
-__global__ void puff_advantage_kernel(float* values, float* rewards, float* dones, float* importance,
-        float* vs, float* advantages, float gamma, float lambda,
-        float rho_clip, float c_clip, int num_steps, int horizon) {
-    int row = blockIdx.x*blockDim.x + threadIdx.x;
-    int offset = row*horizon;
-    puff_advantage_row(values + offset, rewards + offset, dones + offset,
-        importance + offset, vs + offset, advantages + offset, gamma, lambda, rho_clip, c_clip, horizon);
-}
-
-void compute_puff_advantage(torch::Tensor values, torch::Tensor rewards,
-        torch::Tensor dones, torch::Tensor importance, torch::Tensor vs, torch::Tensor advantages,
-        float gamma, float lambda, float rho_clip, float c_clip) {
-    int num_steps = values.size(0);
-    int horizon = values.size(1);
-    vtrace_check(values, rewards, dones, importance, vs, advantages, num_steps, horizon);
-    TORCH_CHECK(values.is_cuda(), "All tensors must be on GPU");
-    assert(horizon <= max_horizon);
-
-    int threads_per_block = 256;
-    if (threads_per_block > num_steps) {
-        threads_per_block = 2*(num_steps/2);
-    }
-    int blocks = (num_steps + threads_per_block - 1) / threads_per_block;
-    assert(num_steps % threads_per_block == 0);
-
-    puff_advantage_kernel<<<blocks, threads_per_block>>>(
-        values.data_ptr<float>(),
-        rewards.data_ptr<float>(),
-        dones.data_ptr<float>(),
-        importance.data_ptr<float>(),
-        vs.data_ptr<float>(),
-        advantages.data_ptr<float>(),
-        gamma,
-        lambda,
-        rho_clip,
-        c_clip,
-        num_steps,
-        horizon
-    );
-
-    cudaError_t err = cudaGetLastError();
-    if (err != cudaSuccess) {
-        throw std::runtime_error(cudaGetErrorString(err));
-    }
-}
-
-
-// Pybind11 module definition
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-    m.def("compute_p3o", &compute_p3o, "Compute p3o advantages with CUDA");
-    m.def("compute_gae", &compute_gae, "Compute GAE with CUDA");
-    m.def("compute_vtrace", &compute_vtrace, "Compute VTrace with CUDA");
-    m.def("compute_puff_advantage", &compute_puff_advantage, "Compute PuffAdvantage with CUDA");
-}
diff --git a/pufferlib/__init__.py b/pufferlib/__init__.py
index c501c01945..cbdc7a463b 100644
--- a/pufferlib/__init__.py
+++ b/pufferlib/__init__.py
@@ -1,5 +1,4 @@
-from pufferlib import version
-__version__ = version.__version__
+__version__ = '2.0.6'
 
 import os
 import sys
@@ -23,6 +22,5 @@
 sys.stdout = original_stdout
 sys.stderr = original_stderr
 
-from pufferlib.namespace import namespace, dataclass, Namespace
+from pufferlib.pufferlib import *
 from pufferlib import environments
-from pufferlib.environment import PufferEnv
diff --git a/pufferlib/cleanrl.py b/pufferlib/cleanrl.py
deleted file mode 100644
index 9292a1d7c4..0000000000
--- a/pufferlib/cleanrl.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from pdb import set_trace as T
-
-class Policy(torch.nn.Module):
-    '''Wrap a non-recurrent PyTorch model for use with CleanRL'''
-    def __init__(self, policy):
-        super().__init__()
-        self.policy = policy
-        self.is_continuous = hasattr(policy, 'is_continuous') and policy.is_continuous
-        self.hidden_size = policy.hidden_size
-
-    def get_value(self, x, state=None):
-        _, value = self.policy(x)
-        return value
-
-    def get_action_and_value(self, x, action=None):
-         logits, value, e3b, intrinsic_reward = self.policy(x, e3b=e3b)
-         action, logprob, entropy = sample_logits(logits, action, self.is_continuous)
-         return action, logprob, entropy, value, e3b, intrinsic_reward
-
-    def forward(self, x, action=None, e3b=None):
-        return self.get_action_and_value(x, action, e3b)
-
-
-class RecurrentPolicy(torch.nn.Module):
-    '''Wrap a recurrent PyTorch model for use with CleanRL'''
-    def __init__(self, policy):
-        super().__init__()
-        self.policy = policy
-        self.is_continuous = hasattr(policy.policy, 'is_continuous') and policy.policy.is_continuous
-        self.hidden_size = policy.hidden_size
-
-    @property
-    def lstm(self):
-        if hasattr(self.policy, 'recurrent'):
-            return self.policy.recurrent
-        elif hasattr(self.policy, 'lstm'):
-            return self.policy.lstm
-        else:
-            raise ValueError('Policy must have a subnetwork named lstm or recurrent')
-
-    def get_value(self, x, state=None):
-        _, value, _ = self.policy(x, state)
-
-    def get_action_and_value(self, x, state=None, action=None, e3b=None):
-        #logits, value, state, e3b, intrinsic_reward = self.policy(x, state, e3b=e3b)
-        logits, value_mean, value_logstd, state = self.policy(x, state, e3b=e3b)
-        action, logprob, entropy = sample_logits(logits, action, self.is_continuous)
-        return action, logprob, entropy, value_mean, value_logstd, state#, e3b, intrinsic_reward
-
-    def forward(self, x, state=None, action=None, e3b=None):
-        return self.get_action_and_value(x, state, action, e3b)
diff --git a/pufferlib/emulation.py b/pufferlib/emulation.py
index a1aba05e98..844104fc68 100644
--- a/pufferlib/emulation.py
+++ b/pufferlib/emulation.py
@@ -8,11 +8,7 @@
 
 import pufferlib
 import pufferlib.spaces
-from pufferlib import utils, exceptions
-from pufferlib.environment import set_buffers
 from pufferlib.spaces import Discrete, Tuple, Dict
-import pufferlib.environment
-
 
 def emulate(struct, sample):
     if isinstance(sample, dict):
@@ -58,6 +54,7 @@ def nativize(arr, space, struct_dtype):
     struct = np.asarray(arr).view(struct_dtype)[0]
     return _nativize(struct, space)
 
+# TODO: Uncomment?
 '''
 try:
     from pufferlib.extensions import emulate, nativize
@@ -65,6 +62,20 @@ def nativize(arr, space, struct_dtype):
     warnings.warn('PufferLib Cython extensions not installed. Using slow Python versions')
 '''
 
+def get_dtype_bounds(dtype):
+    if dtype == bool:
+        return 0, 1
+    elif np.issubdtype(dtype, np.integer):
+        return np.iinfo(dtype).min, np.iinfo(dtype).max
+    elif np.issubdtype(dtype, np.unsignedinteger):
+        return np.iinfo(dtype).min, np.iinfo(dtype).max
+    elif np.issubdtype(dtype, np.floating):
+        # Gym fails on float64
+        return np.finfo(np.float32).min, np.finfo(np.float32).max
+    else:
+        raise ValueError(f"Unsupported dtype: {dtype}")
+
+
 def dtype_from_space(space):
     if isinstance(space, pufferlib.spaces.Tuple):
         dtype = []
@@ -110,7 +121,7 @@ def emulate_observation_space(space):
     else:
         dtype = np.dtype(np.uint8)
 
-    mmin, mmax = utils._get_dtype_bounds(dtype)
+    mmin, mmax = get_dtype_bounds(dtype)
     numel = emulated_dtype.itemsize // dtype.itemsize
     emulated_space = gymnasium.spaces.Box(low=mmin, high=mmax, shape=(numel,), dtype=dtype)
     return emulated_space, emulated_dtype
@@ -128,7 +139,7 @@ def emulate_action_space(space):
 
 
 class GymnasiumPufferEnv(gymnasium.Env):
-    def __init__(self, env=None, env_creator=None, env_args=[], env_kwargs={}, buf=None):
+    def __init__(self, env=None, env_creator=None, env_args=[], env_kwargs={}, buf=None, seed=0):
         self.env = make_object(env, env_creator, env_args, env_kwargs)
 
         self.initialized = False
@@ -147,14 +158,14 @@ def __init__(self, env=None, env_creator=None, env_args=[], env_kwargs={}, buf=N
 
         self.is_obs_emulated = self.single_observation_space is not self.env.observation_space
         self.is_atn_emulated = self.single_action_space is not self.env.action_space
-        self.emulated = pufferlib.namespace(
-            observation_dtype = self.observation_space.dtype,
-            emulated_observation_dtype = self.obs_dtype,
+        self.emulated = dict(
+            observation_dtype=self.observation_space.dtype,
+            emulated_observation_dtype=self.obs_dtype,
         )
 
         self.render_modes = 'human rgb_array'.split()
 
-        set_buffers(self, buf)
+        pufferlib.set_buffers(self, buf)
         if isinstance(self.env.observation_space, pufferlib.spaces.Box):
             self.obs_struct = self.observations
         else:
@@ -191,9 +202,9 @@ def reset(self, seed=None):
     def step(self, action):
         '''Execute an action and return (observation, reward, done, info)'''
         if not self.initialized:
-            raise exceptions.APIUsageError('step() called before reset()')
+            raise pufferlib.APIUsageError('step() called before reset()')
         if self.done:
-            raise exceptions.APIUsageError('step() called after environment is done')
+            raise pufferlib.APIUsageError('step() called after environment is done')
 
         # Unpack actions from multidiscrete into the original action space
         if self.is_atn_emulated:
@@ -249,14 +260,14 @@ def __init__(self, env=None, env_creator=None, env_args=[], buf=None, env_kwargs
             emulate_action_space(self.env_single_action_space))
         self.is_obs_emulated = self.single_observation_space is not self.env_single_observation_space
         self.is_atn_emulated = self.single_action_space is not self.env_single_action_space
-        self.emulated = pufferlib.namespace(
+        self.emulated = dict(
             observation_dtype = self.single_observation_space.dtype,
             emulated_observation_dtype = self.obs_dtype,
         )
 
         self.num_agents = len(self.possible_agents)
 
-        set_buffers(self, buf)
+        pufferlib.set_buffers(self, buf)
         if isinstance(self.env_single_observation_space, pufferlib.spaces.Box):
             self.obs_struct = self.observations
         else:
@@ -281,14 +292,14 @@ def done(self):
     def observation_space(self, agent):
         '''Returns the observation space for a single agent'''
         if agent not in self.possible_agents:
-            raise pufferlib.exceptions.InvalidAgentError(agent, self.possible_agents)
+            raise pufferlib.InvalidAgentError(agent, self.possible_agents)
 
         return self.single_observation_space
 
     def action_space(self, agent):
         '''Returns the action space for a single agent'''
         if agent not in self.possible_agents:
-            raise pufferlib.exceptions.InvalidAgentError(agent, self.possible_agents)
+            raise pufferlib.InvalidAgentError(agent, self.possible_agents)
 
         return self.single_action_space
 
@@ -329,13 +340,13 @@ def reset(self, seed=None):
     def step(self, actions):
         '''Step the environment and return (observations, rewards, dones, infos)'''
         if not self.initialized:
-            raise exceptions.APIUsageError('step() called before reset()')
+            raise pufferlib.APIUsageError('step() called before reset()')
         if self.done:
-            raise exceptions.APIUsageError('step() called after environment is done')
+            raise pufferlib.APIUsageError('step() called after environment is done')
 
         if isinstance(actions, np.ndarray):
             if not self.is_action_checked and len(actions) != self.num_agents:
-                raise exceptions.APIUsageError(
+                raise pufferlib.APIUsageError(
                     f'Actions specified as len {len(actions)} but environment has {self.num_agents} agents')
 
             actions = {agent: actions[i] for i, agent in enumerate(self.possible_agents)}
@@ -344,7 +355,7 @@ def step(self, actions):
         if not self.is_action_checked:
             for agent in actions:
                 if agent not in self.possible_agents:
-                    raise exceptions.InvalidAgentError(agent, self.possible_agents)
+                    raise pufferlib.InvalidAgentError(agent, self.possible_agents)
 
             self.is_action_checked = check_space(
                 next(iter(actions.values())),
@@ -355,7 +366,7 @@ def step(self, actions):
         unpacked_actions = {}
         for agent, atn in actions.items():
             if agent not in self.possible_agents:
-                raise exceptions.InvalidAgentError(agent, self.agents)
+                raise pufferlib.InvalidAgentError(agent, self.agents)
 
             if agent not in self.agents:
                 continue
@@ -435,11 +446,11 @@ def check_space(data, space):
     try:
         contains = space.contains(data)
     except:
-        raise exceptions.APIUsageError(
+        raise pufferlib.APIUsageError(
             f'Error checking space {space} with sample :\n{data}')
 
     if not contains:
-        raise exceptions.APIUsageError(
+        raise pufferlib.APIUsageError(
             f'Data:\n{data}\n not in space:\n{space}')
     
     return True
@@ -462,9 +473,9 @@ def _seed_and_reset(env, seed):
 
     return obs, info
 
-class GymnaxPufferEnv(pufferlib.environment.PufferEnv):
+class GymnaxPufferEnv(pufferlib.PufferEnv):
     def __init__(self, env, env_params, num_envs=1, buf=None):
-        from gymnax.environments.spaces import gymnax_space_to_gym_space
+        from gymnax.spaces import gymnax_space_to_gym_space
 
         gymnax_obs_space = env.observation_space(env_params)
         self.single_observation_space = gymnax_space_to_gym_space(gymnax_obs_space)
diff --git a/pufferlib/environment.py b/pufferlib/environment.py
deleted file mode 100644
index bce092fbc6..0000000000
--- a/pufferlib/environment.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import numpy as np
-
-from pufferlib.exceptions import APIUsageError
-import pufferlib.spaces
-
-ERROR = '''
-Environment missing required attribute {}. The most common cause is
-calling super() before you have assigned the attribute.
-'''
-
-def set_buffers(env, buf=None):
-    if buf is None:
-        obs_space = env.single_observation_space
-        env.observations = np.zeros((env.num_agents, *obs_space.shape), dtype=obs_space.dtype)
-        env.rewards = np.zeros(env.num_agents, dtype=np.float32)
-        env.terminals = np.zeros(env.num_agents, dtype=bool)
-        env.truncations = np.zeros(env.num_agents, dtype=bool)
-        env.masks = np.ones(env.num_agents, dtype=bool)
-
-        # TODO: Major kerfuffle on inferring action space dtype. This needs some asserts?
-        atn_space = env.single_action_space
-        if isinstance(env.single_action_space, pufferlib.spaces.Box):
-            env.actions = np.zeros((env.num_agents, *atn_space.shape), dtype=atn_space.dtype)
-        else:
-            env.actions = np.zeros((env.num_agents, *atn_space.shape), dtype=np.int32)
-    else:
-        env.observations = buf.observations
-        env.rewards = buf.rewards
-        env.terminals = buf.terminals
-        env.truncations = buf.truncations
-        env.masks = buf.masks
-        env.actions = buf.actions
-
-class PufferEnv:
-    def __init__(self, buf=None):
-        if not hasattr(self, 'single_observation_space'):
-            raise APIUsageError(ERROR.format('single_observation_space'))
-        if not hasattr(self, 'single_action_space'):
-            raise APIUsageError(ERROR.format('single_action_space'))
-        if not hasattr(self, 'num_agents'):
-            raise APIUsageError(ERROR.format('num_agents'))
-
-        if hasattr(self, 'observation_space'):
-            raise APIUsageError('PufferEnvs must define single_observation_space, not observation_space')
-        if hasattr(self, 'action_space'):
-            raise APIUsageError('PufferEnvs must define single_action_space, not action_space')
-        if not isinstance(self.single_observation_space, pufferlib.spaces.Box):
-            raise APIUsageError('Native observation_space must be a Box')
-        if (not isinstance(self.single_action_space, pufferlib.spaces.Discrete)
-                and not isinstance(self.single_action_space, pufferlib.spaces.MultiDiscrete)
-                and not isinstance(self.single_action_space, pufferlib.spaces.Box)):
-            raise APIUsageError('Native action_space must be a Discrete, MultiDiscrete, or Box')
-
-        set_buffers(self, buf)
-
-        self.action_space = pufferlib.spaces.joint_space(self.single_action_space, self.num_agents)
-        self.observation_space = pufferlib.spaces.joint_space(self.single_observation_space, self.num_agents)
-        self.agent_ids = np.arange(self.num_agents)
-
-    @property
-    def emulated(self):
-        '''Native envs do not use emulation'''
-        return False
-
-    @property
-    def done(self):
-        '''Native envs handle resets internally'''
-        return False
-
-    @property
-    def driver_env(self):
-        '''For compatibility with Multiprocessing'''
-        return self
-
-    def reset(self, seed=None):
-        raise NotImplementedError
-
-    def step(self, actions):
-        raise NotImplementedError
-
-    def close(self):
-        raise NotImplementedError
-
-    def async_reset(self, seed=None):
-        _, self.infos = self.reset(seed)
-        assert isinstance(self.infos, list), 'PufferEnvs must return info as a list of dicts'
-
-    def send(self, actions):
-        _, _, _, _, self.infos = self.step(actions)
-        assert isinstance(self.infos, list), 'PufferEnvs must return info as a list of dicts'
-
-    def recv(self):
-        return (self.observations, self.rewards, self.terminals,
-            self.truncations, self.infos, self.agent_ids, self.masks)
diff --git a/pufferlib/environments/metta/environment.py b/pufferlib/environments/metta/environment.py
index af57437e72..c4ef967c26 100644
--- a/pufferlib/environments/metta/environment.py
+++ b/pufferlib/environments/metta/environment.py
@@ -15,7 +15,11 @@ class MettaPuff(pufferlib.PufferEnv):
     def __init__(self, config, render_mode='human', buf=None, seed=0):
         self.render_mode = render_mode
         import mettagrid.mettagrid_env
-        self.env = mettagrid.mettagrid_env.make_env_from_cfg(config, render_mode, buf=buf)
+        from omegaconf import OmegaConf
+        cfg = OmegaConf.load(config)
+
+        from mettagrid.mettagrid_env import MettaGridEnv
+        self.env = MettaGridEnv(cfg, render_mode=render_mode, buf=buf)
 
         if render_mode == 'human':
             from mettagrid.gym_wrapper import RaylibRendererWrapper
@@ -26,12 +30,6 @@ def __init__(self, config, render_mode='human', buf=None, seed=0):
         self.num_agents = self.env.num_agents
         super().__init__(buf)
 
-        #cfg = self.env._env_cfg
-        #cfg.eval.env = config_from_path(cfg.eval.env, cfg.eval.env_overrides)
-        #from mettagrid.renderer.raylib.raylib_renderer import MettaGridRaylibRenderer
-        #self.env._renderer =  MettaGridRaylibRenderer(self.env._c_env, self.env._env_cfg['game'])
-
-
     def step(self, actions):
         obs, rew, term, trunc, info = self.env.step(actions)
 
diff --git a/pufferlib/environments/trade_sim/environment.py b/pufferlib/environments/trade_sim/environment.py
index af47073e42..0c245c0b25 100644
--- a/pufferlib/environments/trade_sim/environment.py
+++ b/pufferlib/environments/trade_sim/environment.py
@@ -8,7 +8,7 @@
 def env_creator(name='metta'):
     return functools.partial(make, name)
 
-def make(name, config_path='../nof1-trading-sim/config/experiment_config_3.yaml', render_mode='human', buf=None, seed=1):
+def make(name, config_path='../nof1-trading-sim/config/experiment_cv.yaml', render_mode='human', buf=None, seed=1):
     '''Crafter creation function'''
     from nof1.utils.config_manager import ConfigManager
     from nof1.data_ingestion.historical_data_reader import HistoricalDataReader
diff --git a/pufferlib/exceptions.py b/pufferlib/exceptions.py
deleted file mode 100644
index ec8d2cc844..0000000000
--- a/pufferlib/exceptions.py
+++ /dev/null
@@ -1,20 +0,0 @@
-class EnvironmentSetupError(RuntimeError):
-    def __init__(self, e, package):
-        super().__init__(self.message)
-
-class APIUsageError(RuntimeError):
-    """Exception raised when the API is used incorrectly."""
-
-    def __init__(self, message="API usage error."):
-        self.message = message
-        super().__init__(self.message)
-
-class InvalidAgentError(ValueError):
-    """Exception raised when an invalid agent key is used."""
-
-    def __init__(self, agent_id, agents):
-        message = (
-            f'Invalid agent/team ({agent_id}) specified. '
-            f'Valid values:\n{agents}'
-        )
-        super().__init__(message)
diff --git a/pufferlib/models.py b/pufferlib/models.py
index 067c37c6e5..0b76fe310e 100644
--- a/pufferlib/models.py
+++ b/pufferlib/models.py
@@ -21,7 +21,7 @@ class Default(nn.Module):
     the recurrent cell into encode_observations and put everything after
     into decode_actions.
     '''
-    def __init__(self, env, hidden_size=128, use_p3o=False, p3o_horizon=32, use_diayn=False, diayn_skills=128):
+    def __init__(self, env, hidden_size=128):
         super().__init__()
         self.hidden_size = hidden_size
         self.is_multidiscrete = isinstance(env.single_action_space,
@@ -38,7 +38,6 @@ def __init__(self, env, hidden_size=128, use_p3o=False, p3o_horizon=32, use_diay
             input_size = int(sum(np.prod(v.shape) for v in env.env.observation_space.values()))
             self.encoder = nn.Linear(input_size, self.hidden_size)
         else:
-            #self.encoder = nn.Linear(np.prod(env.single_observation_space.shape), hidden_size)
             self.encoder = torch.nn.Sequential(
                 nn.Linear(np.prod(env.single_observation_space.shape), hidden_size),
                 nn.GELU(),
@@ -58,32 +57,11 @@ def __init__(self, env, hidden_size=128, use_p3o=False, p3o_horizon=32, use_diay
             self.decoder_logstd = nn.Parameter(torch.zeros(
                 1, env.single_action_space.shape[0]))
 
-        if use_diayn:
-            self.diayn_discriminator = nn.Sequential(
-                pufferlib.pytorch.layer_init(nn.Linear(hidden_size, hidden_size)),
-                nn.ReLU(),
-                pufferlib.pytorch.layer_init(nn.Linear(hidden_size, diayn_skills)),
-            )
-
-        self.use_p3o = use_p3o
-        self.p3o_horizon = p3o_horizon
-        if use_p3o:
-            self.value_mean = pufferlib.pytorch.layer_init(
-                nn.Linear(hidden_size, p3o_horizon), std=1)
-            self.value_logstd = nn.Parameter(torch.zeros(1, p3o_horizon))
-
-            #param = np.log10(np.arange(1, N+1))
-            #param = 1 - np.exp(-np.sqrt(np.arange(N)))
-            #self.value_logstd = nn.Parameter(torch.tensor(param).view(1, N))
-            #self.value_logstd = pufferlib.pytorch.layer_init(
-            #    nn.Linear(hidden_size, 32), std=0.01)
-        else:
-            self.value = pufferlib.pytorch.layer_init(
-                nn.Linear(hidden_size, 1), std=1)
+        self.value = pufferlib.pytorch.layer_init(
+            nn.Linear(hidden_size, 1), std=1)
 
     def forward(self, observations, state=None):
         hidden = self.encode_observations(observations, state=state)
-        state.hidden = hidden
         logits, values = self.decode_actions(hidden)
         return logits, values
 
@@ -114,24 +92,16 @@ def decode_actions(self, hidden):
         else:
             logits = self.decoder(hidden)
 
-        if self.use_p3o:
-            mean=self.value_mean(hidden)
-            values = pufferlib.namespace(
-                mean=mean,
-                std=torch.exp(torch.clamp(self.value_logstd, -10, 10)).expand_as(mean),
-            )
-        else:
-            values = self.value(hidden)
-
+        values = self.value(hidden)
         return logits, values
 
-class LSTMWrapper(nn.LSTM):
+class LSTMWrapper(nn.Module):
     def __init__(self, env, policy, input_size=128, hidden_size=128):
         '''Wraps your policy with an LSTM without letting you shoot yourself in the
         foot with bad transpose and shape operations. This saves much pain.
         Requires that your policy define encode_observations and decode_actions.
         See the Default policy for an example.'''
-        super().__init__(input_size, hidden_size)
+        super().__init__()
         self.obs_shape = env.single_observation_space.shape
 
         self.policy = policy
@@ -147,11 +117,13 @@ def __init__(self, env, policy, input_size=128, hidden_size=128):
             elif "weight" in name:
                 nn.init.orthogonal_(param, 1.0)
 
+        self.lstm = nn.LSTM(input_size, hidden_size)
+
         self.cell = torch.nn.LSTMCell(input_size, hidden_size)
-        self.cell.weight_ih = self.weight_ih_l0
-        self.cell.weight_hh = self.weight_hh_l0
-        self.cell.bias_ih = self.bias_ih_l0
-        self.cell.bias_hh = self.bias_hh_l0
+        self.cell.weight_ih = self.lstm.weight_ih_l0
+        self.cell.weight_hh = self.lstm.weight_hh_l0
+        self.cell.bias_ih = self.lstm.bias_ih_l0
+        self.cell.bias_hh = self.lstm.bias_hh_l0
 
         #self.pre_layernorm = nn.LayerNorm(hidden_size)
         #self.post_layernorm = nn.LayerNorm(hidden_size)
@@ -159,8 +131,8 @@ def __init__(self, env, policy, input_size=128, hidden_size=128):
     def forward(self, observations, state):
         '''Forward function for inference. 3x faster than using LSTM directly'''
         hidden = self.policy.encode_observations(observations, state=state)
-        h = state.lstm_h
-        c = state.lstm_c
+        h = state['lstm_h']
+        c = state['lstm_c']
 
         # TODO: Don't break compile
         if h is not None:
@@ -172,17 +144,17 @@ def forward(self, observations, state):
         #hidden = self.pre_layernorm(hidden)
         hidden, c = self.cell(hidden, lstm_state)
         #hidden = self.post_layernorm(hidden)
-        state.hidden = hidden
-        state.lstm_h = hidden
-        state.lstm_c = c
+        state['hidden'] = hidden
+        state['lstm_h'] = hidden
+        state['lstm_c'] = c
         logits, values = self.policy.decode_actions(hidden)
         return logits, values
 
     def forward_train(self, observations, state):
         '''Forward function for training. Uses LSTM for fast time-batching'''
         x = observations
-        lstm_h = state.lstm_h
-        lstm_c = state.lstm_c
+        lstm_h = state['lstm_h']
+        lstm_c = state['lstm_c']
 
         x_shape, space_shape = x.shape, self.obs_shape
         x_n, space_n = len(x_shape), len(space_shape)
@@ -210,7 +182,7 @@ def forward_train(self, observations, state):
 
         hidden = hidden.transpose(0, 1)
         #hidden = self.pre_layernorm(hidden)
-        hidden, (lstm_h, lstm_c) = super().forward(hidden, lstm_state)
+        hidden, (lstm_h, lstm_c) = self.lstm.forward(hidden, lstm_state)
         #hidden = self.post_layernorm(hidden)
         hidden = hidden.transpose(0, 1)
 
@@ -218,9 +190,9 @@ def forward_train(self, observations, state):
         logits, values = self.policy.decode_actions(flat_hidden)
         values = values.reshape(B, TT)
         #state.batch_logits = logits.reshape(B, TT, -1)
-        state.hidden = hidden
-        state.lstm_h = lstm_h.detach()
-        state.lstm_c = lstm_c.detach()
+        state['hidden'] = hidden
+        state['lstm_h'] = lstm_h.detach()
+        state['lstm_c'] = lstm_c.detach()
         return logits, values
 
 class Convolutional(nn.Module):
diff --git a/pufferlib/namespace.py b/pufferlib/namespace.py
deleted file mode 100644
index a6ecfe3529..0000000000
--- a/pufferlib/namespace.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from pdb import set_trace as T
-from types import SimpleNamespace
-from collections.abc import Mapping
-
-def __getitem__(self, key):
-    return self.__dict__[key]
-
-def __setitem__(self, key, value):
-    self.__dict__[key] = value
-
-def keys(self):
-    return self.__dict__.keys()
-
-def values(self):
-    return self.__dict__.values()
-
-def items(self):
-    return self.__dict__.items()
-
-def __iter__(self):
-    return iter(self.__dict__)
-
-def __len__(self):
-    return len(self.__dict__)
-
-class Namespace(SimpleNamespace, Mapping):
-    __getitem__ = __getitem__
-    __setitem__ = __setitem__
-    __iter__ = __iter__
-    __len__ = __len__
-    keys = keys
-    values = values
-    items = items
-
-def dataclass(cls):
-    # Safely get annotations
-    annotations = getattr(cls, '__annotations__', {})
-
-    # Combine both annotated and non-annotated fields
-    all_fields = {**{k: None for k in annotations.keys()}, **cls.__dict__}
-    all_fields = {k: v for k, v in all_fields.items() if not callable(v) and not k.startswith('__')}
-
-    def __init__(self, **kwargs):
-        for field, default_value in all_fields.items():
-            setattr(self, field, kwargs.get(field, default_value))
-
-    cls.__init__ = __init__
-    setattr(cls, "__getitem__", __getitem__)
-    setattr(cls, "__setitem__", __setitem__)
-    setattr(cls, "__iter__", __iter__)
-    setattr(cls, "__len__", __len__)
-    setattr(cls, "keys", keys)
-    setattr(cls, "values", values)
-    setattr(cls, "items", items)
-    return cls
-
-def namespace(self=None, **kwargs):
-    if self is None:
-        return Namespace(**kwargs)
-    self.__dict__.update(kwargs)
diff --git a/pufferlib/ocean/breakout/breakout.c b/pufferlib/ocean/breakout/breakout.c
index 1785ebd215..a7c79ada53 100644
--- a/pufferlib/ocean/breakout/breakout.c
+++ b/pufferlib/ocean/breakout/breakout.c
@@ -2,7 +2,7 @@
 #include "breakout.h"
 #include "puffernet.h"
 
-int main() {
+void demo() {
     Weights* weights = load_weights("resources/breakout_weights.bin", 147972);
     LinearLSTM* net = make_linearlstm(weights, 1, 119, 3);
 
@@ -51,3 +51,38 @@ int main() {
     free_allocated(&env);
     close_client(env.client);
 }
+
+void test_performance(int timeout) {
+    Breakout env = {
+        .width = 512,
+        .height = 512,
+        .paddle_width = 20,
+        .paddle_height = 70,
+        .ball_width = 10,
+        .ball_height = 15,
+        .brick_width = 10,
+        .brick_height = 10,
+        .brick_rows = 5,
+        .brick_cols = 10,
+        .continuous = 0,
+    };
+    allocate(&env);
+    c_reset(&env);
+
+    int start = time(NULL);
+    int num_steps = 0;
+    while (time(NULL) - start < timeout) {
+        env.actions[0] = rand() % 3;
+        c_step(&env);
+        num_steps++;
+    }
+
+    int end = time(NULL);
+    float sps = num_steps / (end - start);
+    printf("Test Environment SPS: %f\n", sps);
+    free_allocated(&env);
+}
+
+int main() {
+    test_performance(10);
+}
diff --git a/pufferlib/ocean/breakout/breakout.h b/pufferlib/ocean/breakout/breakout.h
index 6b89a1ea6e..c85e5564a0 100644
--- a/pufferlib/ocean/breakout/breakout.h
+++ b/pufferlib/ocean/breakout/breakout.h
@@ -19,18 +19,25 @@
 #define BRICK_INDEX_BACKWALL_COLLISION -2
 #define BRICK_INDEX_PADDLE_COLLISION -1
 
-typedef struct Log Log;
-struct Log {
+typedef struct Log {
     float perf;
     float score;
     float episode_return;
     float episode_length;
     float n;
-};
+} Log;
+
+typedef struct Client {
+    float width;
+    float height;
+    float paddle_width;
+    float paddle_height;
+    float ball_width;
+    float ball_height;    
+    Texture2D ball;
+} Client;
 
-typedef struct Client Client;
-typedef struct Breakout Breakout;
-struct Breakout {
+typedef struct Breakout {
     Client* client;
     Log log;
     float* observations;
@@ -68,7 +75,7 @@ struct Breakout {
     int frameskip;
     unsigned char hit_brick;
     int continuous;
-};
+} Breakout;
 
 typedef struct CollisionInfo CollisionInfo;
 struct CollisionInfo {
@@ -470,17 +477,6 @@ void c_step(Breakout* env) {
 
 Color BRICK_COLORS[6] = {RED, ORANGE, YELLOW, GREEN, SKYBLUE, BLUE};
 
-typedef struct Client Client;
-struct Client {
-    float width;
-    float height;
-    float paddle_width;
-    float paddle_height;
-    float ball_width;
-    float ball_height;    
-    Texture2D ball;
-};
-
 static inline bool file_exists(const char* path) {
     return access(path, F_OK) != -1;
 }
diff --git a/pufferlib/ocean/breakout/breakout.py b/pufferlib/ocean/breakout/breakout.py
index 59300888c9..f482701a8c 100644
--- a/pufferlib/ocean/breakout/breakout.py
+++ b/pufferlib/ocean/breakout/breakout.py
@@ -46,7 +46,7 @@ def __init__(self, num_envs=1, render_mode=None,
             brick_cols=brick_cols, continuous=continuous
         )
 
-    def reset(self, seed=None):
+    def reset(self, seed=0):
         binding.vec_reset(self.c_envs, seed)
         self.tick = 0
         return self.observations, []
diff --git a/pufferlib/ocean/cartpole/cartpole.py b/pufferlib/ocean/cartpole/cartpole.py
index 9b3eecca3c..62e3ba6d3d 100644
--- a/pufferlib/ocean/cartpole/cartpole.py
+++ b/pufferlib/ocean/cartpole/cartpole.py
@@ -4,7 +4,7 @@
 from pufferlib.ocean.cartpole import binding
 
 class Cartpole(pufferlib.PufferEnv):
-    def __init__(self, num_envs=1, render_mode='human', report_interval=1, continuous=False, buf=None, seed=0):
+    def __init__(self, num_envs=1, render_mode='human', report_interval=1, continuous=True, buf=None, seed=0):
         self.render_mode = render_mode
         self.num_agents = num_envs
         self.report_interval = report_interval
@@ -18,17 +18,14 @@ def __init__(self, num_envs=1, render_mode='human', report_interval=1, continuou
         )
         if self.continuous:
             self.single_action_space = gymnasium.spaces.Box(
-                low=-1.0, high=1.0, shape=(1,), dtype=np.float32
+                low=-1.0, high=1.0, shape=(1,)
             )
             
         else:
             self.single_action_space = gymnasium.spaces.Discrete(2)
 
         super().__init__(buf)
-
-        self.actions = np.zeros(self.num_agents, dtype=np.float32)
-        self.terminals = np.zeros(self.num_agents, dtype=np.uint8)
-        self.truncations = np.zeros(self.num_agents, dtype=np.uint8)
+        self.actions = np.zeros(num_envs, dtype=np.float32)
 
         self.c_envs = binding.vec_init(
             self.observations,
@@ -37,7 +34,8 @@ def __init__(self, num_envs=1, render_mode='human', report_interval=1, continuou
             self.terminals,
             self.truncations,
             num_envs,
-            int(self.continuous),
+            seed,
+            continuous=int(self.continuous),
         )
    
     def reset(self, seed=None):
@@ -98,4 +96,4 @@ def test_performance(timeout=10, atn_cache=8192, continuous=True):
 
 if __name__ == '__main__':
     test_performance()
-    
\ No newline at end of file
+    
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index 2230c53692..12f54d4990 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -12,6 +12,13 @@ static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
 }
 #endif
 
+static PyObject* my_get(PyObject* dict, Env* env);
+#ifndef MY_GET
+static PyObject* my_get(PyObject* dict, Env* env) {
+    return NULL;
+}
+#endif
+
 static Env* unpack_env(PyObject* args) {
     PyObject* handle_obj = PyTuple_GetItem(args, 0);
     if (!PyObject_TypeCheck(handle_obj, &PyLong_Type)) {
@@ -64,6 +71,10 @@ static PyObject* env_init(PyObject* self, PyObject* args, PyObject* kwargs) {
         return NULL;
     }
     env->actions = PyArray_DATA(actions);
+    if (PyArray_STRIDE(actions, 0) == sizeof(double)) {
+        PyErr_SetString(PyExc_ValueError, "Action tensor passed as float64 (pass np.float32 buffer)");
+        return NULL;
+    }
 
     PyObject* rew = PyTuple_GetItem(args, 2);
     if (!PyObject_TypeCheck(rew, &PyArray_Type)) {
@@ -142,18 +153,22 @@ static PyObject* env_init(PyObject* self, PyObject* args, PyObject* kwargs) {
     Py_DECREF(py_seed);
 
     PyObject* empty_args = PyTuple_New(0);
-    if (my_init(env, empty_args, kwargs)) {
-        //PyErr_SetString(PyExc_TypeError, "env_init failed");
-        Py_DECREF(kwargs);
+    my_init(env, empty_args, kwargs);
+    Py_DECREF(kwargs);
+    if (PyErr_Occurred()) {
         return NULL;
     }
 
-    Py_DECREF(kwargs);
     return PyLong_FromVoidPtr(env);
 }
 
 // Python function to reset the environment
 static PyObject* env_reset(PyObject* self, PyObject* args) {
+    if (PyTuple_Size(args) != 2) {
+        PyErr_SetString(PyExc_TypeError, "env_reset requires 2 arguments");
+        return NULL;
+    }
+
     Env* env = unpack_env(args);
     if (!env){
         return NULL;
@@ -162,9 +177,14 @@ static PyObject* env_reset(PyObject* self, PyObject* args) {
     Py_RETURN_NONE;
 }
 
-
 // Python function to step the environment
 static PyObject* env_step(PyObject* self, PyObject* args) {
+    int num_args = PyTuple_Size(args);
+    if (num_args != 1) {
+        PyErr_SetString(PyExc_TypeError, "vec_render requires 1 argument");
+        return NULL;
+    }
+
     Env* env = unpack_env(args);
     if (!env){
         return NULL;
@@ -194,6 +214,19 @@ static PyObject* env_close(PyObject* self, PyObject* args) {
     Py_RETURN_NONE;
 }
 
+static PyObject* env_get(PyObject* self, PyObject* args) {
+    Env* env = unpack_env(args);
+    if (!env){
+        return NULL;
+    }
+    PyObject* dict = PyDict_New();
+    my_get(dict, env);
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    return dict;
+}
+
 typedef struct {
     Env** envs;
     int num_envs;
@@ -208,7 +241,12 @@ static VecEnv* unpack_vecenv(PyObject* args) {
 
     VecEnv* vec = (VecEnv*)PyLong_AsVoidPtr(handle_obj);
     if (!vec) {
-        PyErr_SetString(PyExc_ValueError, "Invalid vec env handle");
+        PyErr_SetString(PyExc_ValueError, "Missing or invalid vec env handle");
+        return NULL;
+    }
+
+    if (vec->num_envs <= 0) {
+        PyErr_SetString(PyExc_ValueError, "Missing or invalid vec env handle");
         return NULL;
     }
 
@@ -275,6 +313,10 @@ static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) {
         PyErr_SetString(PyExc_ValueError, "Actions must be contiguous");
         return NULL;
     }
+    if (PyArray_STRIDE(actions, 0) == sizeof(double)) {
+        PyErr_SetString(PyExc_ValueError, "Action tensor passed as float64 (pass np.float32 buffer)");
+        return NULL;
+    }
 
     PyObject* rew = PyTuple_GetItem(args, 2);
     if (!PyObject_TypeCheck(rew, &PyArray_Type)) {
@@ -361,9 +403,9 @@ static PyObject* vec_init(PyObject* self, PyObject* args, PyObject* kwargs) {
         Py_DECREF(py_seed);
 
         PyObject* empty_args = PyTuple_New(0);
-        if (my_init(env, empty_args, kwargs)) {
-            PyErr_SetString(PyExc_TypeError, "env_init failed");
-            Py_DECREF(kwargs);
+        my_init(env, empty_args, kwargs);
+        Py_DECREF(kwargs);
+        if (PyErr_Occurred()) {
             return NULL;
         }
     }
@@ -407,6 +449,11 @@ static PyObject* vectorize(PyObject* self, PyObject* args) {
 }
 
 static PyObject* vec_reset(PyObject* self, PyObject* args) {
+    if (PyTuple_Size(args) != 2) {
+        PyErr_SetString(PyExc_TypeError, "vec_reset requires 2 arguments");
+        return NULL;
+    }
+
     VecEnv* vec = unpack_vecenv(args);
     if (!vec) {
         return NULL;
@@ -428,6 +475,12 @@ static PyObject* vec_reset(PyObject* self, PyObject* args) {
 }
 
 static PyObject* vec_step(PyObject* self, PyObject* arg) {
+    int num_args = PyTuple_Size(arg);
+    if (num_args != 1) {
+        PyErr_SetString(PyExc_TypeError, "vec_step requires 1 argument");
+        return NULL;
+    }
+
     VecEnv* vec = unpack_vecenv(arg);
     if (!vec) {
         return NULL;
@@ -530,9 +583,10 @@ static PyObject* vec_close(PyObject* self, PyObject* args) {
 static double unpack(PyObject* kwargs, char* key) {
     PyObject* val = PyDict_GetItemString(kwargs, key);
     if (val == NULL) {
-        // If the key doesn't exist, don't set an error - this allows optional parameters
-        // Just return a default value that the caller can check for
-        return 0.0;
+        char error_msg[100];
+        snprintf(error_msg, sizeof(error_msg), "Missing required keyword argument '%s'", key);
+        PyErr_SetString(PyExc_TypeError, error_msg);
+        return 1;
     }
     if (PyLong_Check(val)) {
         long out = PyLong_AsLong(val);
@@ -561,6 +615,7 @@ static PyMethodDef methods[] = {
     {"env_step", env_step, METH_VARARGS, "Step the environment"},
     {"env_render", env_render, METH_VARARGS, "Render the environment"},
     {"env_close", env_close, METH_VARARGS, "Close the environment"},
+    {"env_get", env_get, METH_VARARGS, "Get the environment state"},
     {"vectorize", vectorize, METH_VARARGS, "Make a vector of environment handles"},
     {"vec_init", (PyCFunction)vec_init, METH_VARARGS | METH_KEYWORDS, "Initialize a vector of environments"},
     {"vec_reset", (PyCFunction)vec_reset, METH_VARARGS, "Reset the vector of environments"},
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
index 34728c3087..3e2404aafe 100644
--- a/pufferlib/ocean/environment.py
+++ b/pufferlib/ocean/environment.py
@@ -1,5 +1,5 @@
+import importlib
 import pufferlib.emulation
-import pufferlib.postprocess
 
 def lazy_import(module_path, attr):
     """
@@ -57,110 +57,101 @@ def make_continuous(discretize=False, buf=None, **kwargs):
     from . import sanity
     env = sanity.Continuous(discretize=discretize)
     if not discretize:
-        env = pufferlib.postprocess.ClipAction(env)
-    env = pufferlib.postprocess.EpisodeStats(env)
+        env = pufferlib.ClipAction(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
 
 def make_squared(distance_to_target=3, num_targets=1, buf=None, **kwargs):
     from . import sanity
     env = sanity.Squared(distance_to_target=distance_to_target, num_targets=num_targets, **kwargs)
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf, **kwargs)
 
 def make_bandit(num_actions=10, reward_scale=1, reward_noise=1, buf=None):
     from . import sanity
     env = sanity.Bandit(num_actions=num_actions, reward_scale=reward_scale,
         reward_noise=reward_noise)
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
 
 def make_memory(mem_length=2, mem_delay=2, buf=None, **kwargs):
     from . import sanity
     env = sanity.Memory(mem_length=mem_length, mem_delay=mem_delay)
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
 
 def make_password(password_length=5, buf=None, **kwargs):
     from . import sanity
     env = sanity.Password(password_length=password_length)
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
 
 def make_performance(delay_mean=0, delay_std=0, bandwidth=1, buf=None, **kwargs):
     from . import sanity
     env = sanity.Performance(delay_mean=delay_mean, delay_std=delay_std, bandwidth=bandwidth)
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
 
 def make_performance_empiric(count_n=0, count_std=0, bandwidth=1, buf=None, **kwargs):
     from . import sanity
     env = sanity.PerformanceEmpiric(count_n=count_n, count_std=count_std, bandwidth=bandwidth)
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
 
 def make_stochastic(p=0.7, horizon=100, buf=None, **kwargs):
     from . import sanity
     env = sanity.Stochastic(p=p, horizon=100)
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
 
 def make_spaces(buf=None, **kwargs):
     from . import sanity
     env = sanity.Spaces()
-    env = pufferlib.postprocess.EpisodeStats(env)
+    env = pufferlib.EpisodeStats(env)
     return pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf, **kwargs)
 
 def make_multiagent(buf=None, **kwargs):
     from . import sanity
     env = sanity.Multiagent()
-    env = pufferlib.postprocess.MultiagentEpisodeStats(env)
+    env = pufferlib.MultiagentEpisodeStats(env)
     return pufferlib.emulation.PettingZooPufferEnv(env=env, buf=buf)
 
-MAKE_FNS = {
-    'breakout':      lambda: lazy_import('pufferlib.ocean.breakout.breakout', 'Breakout'),
-    'blastar':       lambda: lazy_import('pufferlib.ocean.blastar.blastar', 'Blastar'),
-    'pong':          lambda: lazy_import('pufferlib.ocean.pong.pong', 'Pong'),
-    'enduro':        lambda: lazy_import('pufferlib.ocean.enduro.enduro', 'Enduro'),
-    'cartpole':      lambda: lazy_import('pufferlib.ocean.cartpole.cartpole', 'Cartpole'),
-    'moba':          lambda: lazy_import('pufferlib.ocean.moba.moba', 'Moba'),
-    'nmmo3':         lambda: lazy_import('pufferlib.ocean.nmmo3.nmmo3', 'NMMO3'),
-    'snake':         lambda: lazy_import('pufferlib.ocean.snake.snake', 'Snake'),
-    'squared':       lambda: lazy_import('pufferlib.ocean.squared.squared', 'Squared'),
-    'pysquared':     lambda: lazy_import('pufferlib.ocean.squared.pysquared', 'PySquared'),
-    'connect4':      lambda: lazy_import('pufferlib.ocean.connect4.connect4', 'Connect4'),
-    'tripletriad':   lambda: lazy_import('pufferlib.ocean.tripletriad.tripletriad', 'TripleTriad'),
-    'tactical':      lambda: lazy_import('pufferlib.ocean.tactical.tactical', 'Tactical'),
-    'go':            lambda: lazy_import('pufferlib.ocean.go.go', 'Go'),
-    'rware':         lambda: lazy_import('pufferlib.ocean.rware.rware', 'Rware'),
-    'trash_pickup':  lambda: lazy_import('pufferlib.ocean.trash_pickup.trash_pickup', 'TrashPickupEnv'),
-    'tower_climb':   lambda: lazy_import('pufferlib.ocean.tower_climb.tower_climb', 'TowerClimb'),
-    'grid':          lambda: lazy_import('pufferlib.ocean.grid.grid', 'Grid'),
-    'cpr':           lambda: lazy_import('pufferlib.ocean.cpr.cpr', 'PyCPR'),
-    'impulse_wars':  lambda: lazy_import('pufferlib.ocean.impulse_wars.impulse_wars', 'ImpulseWars'),
-    'gpudrive':      lambda: lazy_import('pufferlib.ocean.gpudrive.gpudrive', 'GPUDrive'),
-    #'rocket_lander': rocket_lander.RocketLander,
-    'foraging': make_foraging,
-    'predator_prey': make_predator_prey,
-    'group': make_group,
-    'puffer': make_puffer,
-    'continuous': make_continuous,
-    'bandit': make_bandit,
-    'memory': make_memory,
-    'password': make_password,
-    'stochastic': make_stochastic,
-    'multiagent': make_multiagent,
+MAKE_FUNCTIONS = {
+    'breakout': 'Breakout',
+    'blastar': 'Blastar',
+    'pong': 'Pong',
+    'enduro': 'Enduro',
+    'cartpole': 'Cartpole',
+    'moba': 'Moba',
+    'nmmo3': 'NMMO3',
+    'snake': 'Snake',
+    'squared': 'Squared',
+    'pysquared': 'PySquared',
+    'connect4': 'Connect4',
+    'tripletriad': 'TripleTriad',
+    'tactical': 'Tactical',
+    'go': 'Go',
+    'rware': 'Rware',
+    'trash_pickup': 'TrashPickupEnv',
+    'tower_climb': 'TowerClimb',
+    'grid': 'Grid',
+    'cpr': 'PyCPR',
+    'impulse_wars': 'ImpulseWars',
+    'gpudrive': 'GPUDrive',
     'spaces': make_spaces,
-    'performance': make_performance,
-    'performance_empiric': make_performance_empiric,
+    'multiagent': make_multiagent,
 }
 
-# Alias puffer_ to all names
-MAKE_FNS = {**MAKE_FNS, **{'puffer_' + k: v for k, v in MAKE_FNS.items()}}
-
 def env_creator(name='squared', *args, **kwargs):
-    if name in MAKE_FNS:
-        return MAKE_FNS[name](*args, **kwargs)
-    else:
-        raise ValueError(f'Invalid environment name: {name}')
+    if 'puffer_' not in name:
+        raise pufferlib.exceptions.APIUsageError(f'Invalid environment name: {name}')
+
+    # TODO: Robust sanity / ocean imports
+    name = name.replace('puffer_', '')
+    try:
+        module = importlib.import_module(f'pufferlib.ocean.{name}.{name}')
+        return getattr(module, MAKE_FUNCTIONS[name])
+    except ModuleNotFoundError:
+        return MAKE_FUNCTIONS[name]
 
 
diff --git a/pufferlib/ocean/gpudrive/binding.c b/pufferlib/ocean/gpudrive/binding.c
new file mode 100644
index 0000000000..a8999d90f3
--- /dev/null
+++ b/pufferlib/ocean/gpudrive/binding.c
@@ -0,0 +1,62 @@
+#include "gpudrive.h"
+#define Env GPUDrive
+#define MY_SHARED
+#include "../env_binding.h"
+
+static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) {
+    int num_envs = unpack(kwargs, "num_envs");
+    GPUDrive* temp_envs = calloc(num_envs, sizeof(GPUDrive));
+    PyObject* agent_offsets = PyList_New(num_envs+1);
+    int total_count = 0;
+    // getting  agent counts and offsets
+    for(int i = 0;i< num_envs;i++) {
+        char map_file[100];
+        sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", i);
+        temp_envs[i].entities = load_map_binary(map_file, &temp_envs[i]);
+        set_active_agents(&temp_envs[i]);
+        PyObject* num = PyLong_FromLong(total_count);
+        PyList_SetItem(agent_offsets, i, num);
+        //Py_DECREF(num);
+        total_count += temp_envs[i].active_agent_count;
+    }
+    PyObject* num = PyLong_FromLong(total_count);
+    PyList_SetItem(agent_offsets, num_envs, num);
+    //Py_DECREF(num);
+    /*
+    for(int i = 0;i<num_envs; i++) {
+        for(int j=0;j<temp_envs[i].num_entities;j++) {
+            free_entity(&temp_envs[i].entities[j]);
+        }
+        free(temp_envs[i].entities);
+        free(temp_envs[i].active_agent_indices);
+        free(temp_envs[i].static_car_indices);
+    }
+    free(temp_envs);
+    */
+    return agent_offsets;
+}
+
+static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
+    env->human_agent_idx = unpack(kwargs, "human_agent_idx");
+    env->reward_vehicle_collision = unpack(kwargs, "reward_vehicle_collision");
+    env->reward_offroad_collision = unpack(kwargs, "reward_offroad_collision");
+    int env_id = unpack(kwargs, "env_id");
+
+    char map_file[100];
+    sprintf(map_file, "resources/gpudrive/binaries/map_%03d.bin", env_id);
+    env->map_name = map_file;
+    init(env);
+    return 0;
+}
+
+static int my_log(PyObject* dict, Log* log) {
+    assign_to_dict(dict, "perf", log->perf);
+    assign_to_dict(dict, "score", log->score);
+    assign_to_dict(dict, "episode_return", log->episode_return);
+    assign_to_dict(dict, "episode_length", log->episode_length);
+    assign_to_dict(dict, "offroad_rate", log->offroad_rate);
+    assign_to_dict(dict, "collision_rate", log->collision_rate);
+    assign_to_dict(dict, "dnf_rate", log->dnf_rate);
+    assign_to_dict(dict, "n", log->n);
+    return 0;
+}
diff --git a/pufferlib/ocean/gpudrive/gpudrive.c b/pufferlib/ocean/gpudrive/gpudrive.c
index 78c5731bf7..123074e677 100644
--- a/pufferlib/ocean/gpudrive/gpudrive.c
+++ b/pufferlib/ocean/gpudrive/gpudrive.c
@@ -102,17 +102,18 @@ void demo() {
         .human_agent_idx = 0,
         .reward_vehicle_collision = -0.1f,
         .reward_offroad_collision = -0.1f,
-	    .map_name = "resources/gpudrive/binaries/map_063.bin"
+	    .map_name = "resources/gpudrive/binaries/map_000.bin"
     };
     allocate(&env);
     c_reset(&env);
-    Client* client = make_client(&env);
+    c_render(&env);
+    //Client* client = make_client(&env);
     printf("Human controlling agent index: %d\n", env.active_agent_indices[env.human_agent_idx]);
     int accel_delta = 1;
     int steer_delta = 1;
     while (!WindowShouldClose()) {
         // Handle camera controls
-        handle_camera_controls(client);
+        handle_camera_controls(env.client);
         int (*actions)[2] = (int(*)[2])env.actions;
         // // Reset all agent actions at the beginning of each frame
         // for(int i = 0; i < env.active_agent_count; i++) {
@@ -160,10 +161,10 @@ void demo() {
         // Handle human input for the controlled agent
         // handle_human_input(&env);
         c_step(&env);
-        c_render(client, &env);
+        c_render(&env);
     }
 
-    close_client(client);
+    close_client(env.client);
     free_allocated(&env);
 }
 
diff --git a/pufferlib/ocean/gpudrive/gpudrive.h b/pufferlib/ocean/gpudrive/gpudrive.h
index e01445a15b..bb0153a1bd 100644
--- a/pufferlib/ocean/gpudrive/gpudrive.h
+++ b/pufferlib/ocean/gpudrive/gpudrive.h
@@ -81,65 +81,22 @@ static const int collision_offsets[25][2] = {
     {-2,  1}, {-1,  1}, {0,  1}, {1,  1}, {2,  1},  // Fourth row
     {-2,  2}, {-1,  2}, {0,  2}, {1,  2}, {2,  2}   // Bottom row
 };
-#define LOG_BUFFER_SIZE 1024
 
+typedef struct GPUDrive GPUDrive;
+typedef struct Client Client;
 typedef struct Log Log;
+
 struct Log {
     float episode_return;
     float episode_length;
+    float perf;
     float score;
     float offroad_rate;
     float collision_rate;
     float dnf_rate;
+    float n;
 };
 
-
-typedef struct LogBuffer LogBuffer;
-struct LogBuffer {
-    Log* logs;
-    int length;
-    int idx;
-};
-
-LogBuffer* allocate_logbuffer(int size) {
-    LogBuffer* logs = (LogBuffer*)calloc(1, sizeof(LogBuffer));
-    logs->logs = (Log*)calloc(size, sizeof(Log));
-    logs->length = size;
-    logs->idx = 0;
-    return logs;
-}
-
-void free_logbuffer(LogBuffer* buffer) {
-    free(buffer->logs);
-    free(buffer);
-}
-
-void add_log(LogBuffer* logs, Log* log) {
-    if (logs->idx == logs->length) {
-        return;
-    }
-    logs->logs[logs->idx] = *log;
-    logs->idx += 1;
-    //printf("Log: %f, %f,\n", log->episode_return, log->episode_length);
-}
-
-Log aggregate_and_clear(LogBuffer* logs) {
-    Log log = {0};
-    if (logs->idx == 0) {
-        return log;
-    }
-    for (int i = 0; i < logs->idx; i++) {
-        log.episode_return += logs->logs[i].episode_return / logs->idx;
-        log.episode_length += logs->logs[i].episode_length / logs->idx;
-	    log.score += logs->logs[i].score / logs->idx;
-	    log.offroad_rate += logs->logs[i].offroad_rate / logs->idx;
-	    log.collision_rate += logs->logs[i].collision_rate / logs->idx;
-	log.dnf_rate += logs->logs[i].dnf_rate / logs->idx;
-    }
-    logs->idx = 0;
-    return log;
-}
-
 typedef struct Entity Entity;
 struct Entity {
     int type;
@@ -196,14 +153,13 @@ float relative_distance_2d(float x1, float y1, float x2, float y2){
     return distance;
 }
 
-typedef struct GPUDrive GPUDrive;
 struct GPUDrive {
+    Client* client;
     float* observations;
     int* actions;
     float* rewards;
-    unsigned char* masks;
-    unsigned char* dones;
-    LogBuffer* log_buffer;
+    unsigned char* terminals;
+    Log log;
     Log* logs;
     int num_agents;
     int active_agent_count;
@@ -233,14 +189,37 @@ struct GPUDrive {
     float reward_vehicle_collision;
     float reward_offroad_collision;
     char* map_name;
+<<<<<<< HEAD
+    char* reached_goal_this_episode;
+=======
     char* reached_goal_this_turn;
+>>>>>>> cf2b09c9d525bf784c8dd5c03818450cabeae914
     float world_mean_x;
     float world_mean_y;
 };
 
+void add_log(GPUDrive* env) {
+    for(int i = 0; i < env->active_agent_count; i++){
+        if(env->reached_goal_this_episode[i]) {
+            env->log.score += 1.0f;
+            env->log.perf += 1.0f;
+        }
+        int offroad = env->logs[i].offroad_rate;
+        env->log.offroad_rate += offroad;
+        int collided = env->logs[i].collision_rate;
+        env->log.collision_rate += collided;
+        if(!offroad && !collided && !env->reached_goal_this_episode[i]){
+            env->log.dnf_rate += 1.0f;
+        }
+        env->log.episode_length += env->logs[i].episode_length;
+        env->log.episode_return += env->logs[i].episode_return;
+        env->log.n += 1;
+    }
+}
+
 Entity* load_map_binary(const char* filename, GPUDrive* env) {
     FILE* file = fopen(filename, "rb");
-    printf("fileanme: %s\n", filename);
+    //printf("fileanme: %s\n", filename);
     if (!file) return NULL;
     fread(&env->num_objects, sizeof(int), 1, file);
     fread(&env->num_roads, sizeof(int), 1, file);
@@ -304,6 +283,8 @@ Entity* load_map_binary(const char* filename, GPUDrive* env) {
 }
 
 void set_start_position(GPUDrive* env){
+    //InitWindow(800, 600, "GPU Drive");
+    //BeginDrawing();
     for(int i = 0; i < env->num_entities; i++){
         int is_active = 0;
         for(int j = 0; j < env->active_agent_count; j++){
@@ -316,6 +297,10 @@ void set_start_position(GPUDrive* env){
         e->x = e->traj_x[0];
         e->y = e->traj_y[0];
         e->z = e->traj_z[0];
+        //printf("Entity %d is at (%f, %f, %f)\n", i, e->x, e->y, e->z);
+        //if (e->type < 4) {
+        //    DrawRectangle(200+2*e->x, 200+2*e->y, 2.0, 2.0, RED);
+        //}    
         if(e->type >3 || e->type == 0){
             continue;
         }
@@ -331,6 +316,10 @@ void set_start_position(GPUDrive* env){
         e->heading = e->traj_heading[0];
         e->valid = e->traj_valid[0];
     }
+    //EndDrawing();
+    int x = 0;
+
+
 }
 
 void set_active_agents(GPUDrive* env){
@@ -342,7 +331,11 @@ void set_active_agents(GPUDrive* env){
     int expert_static_car_indices[MAX_CARS];
     env->active_agent_count = 1;
     active_agent_indices[0] = env->num_objects-1;
+<<<<<<< HEAD
+    for(int i = 0; i < env->num_objects-1 && env->num_cars < MAX_CARS; i++){
+=======
     for(int i = 0; i < env->num_objects && env->num_cars < MAX_CARS; i++){
+>>>>>>> cf2b09c9d525bf784c8dd5c03818450cabeae914
         if(env->entities[i].type != 1) continue;
         if(env->entities[i].traj_valid[0] != 1) continue;
         env->num_cars++;
@@ -444,8 +437,6 @@ void init_grid_map(GPUDrive* env){
             }
         }
     }
-    printf("top left: %f, %f\n", top_left_x, top_left_y);
-    printf("bottom right: %f, %f\n", bottom_right_x, bottom_right_y);
 
     env->map_corners = (float*)calloc(4, sizeof(float));
     env->map_corners[0] = top_left_x;
@@ -623,13 +614,16 @@ void init(GPUDrive* env){
     // printf("num entities: %d\n", env->num_entities);
     env->dynamics_model = CLASSIC;
     set_means(env);
+<<<<<<< HEAD
+=======
     printf("world mean: %f, %f\n", env->world_mean_x, env->world_mean_y);
+>>>>>>> cf2b09c9d525bf784c8dd5c03818450cabeae914
     set_active_agents(env);
     set_start_position(env);
     // printf("Active agents: %d\n", env->active_agent_count);
     env->logs = (Log*)calloc(env->active_agent_count, sizeof(Log));
     env->goal_reached = (char*)calloc(env->active_agent_count, sizeof(char));
-    env->reached_goal_this_turn = (char*)calloc(env->active_agent_count, sizeof(char));
+    env->reached_goal_this_episode = (char*)calloc(env->active_agent_count, sizeof(char));
     init_grid_map(env);
     env->vision_range = 21;
     init_neighbor_offsets(env);
@@ -646,7 +640,7 @@ void free_initialized(GPUDrive* env){
     free(env->logs);
     free(env->fake_data);
     free(env->goal_reached);
-    free(env->reached_goal_this_turn);
+    free(env->reached_goal_this_episode);
     free(env->map_corners);
     free(env->grid_cells);
     free(env->neighbor_offsets);
@@ -667,9 +661,7 @@ void allocate(GPUDrive* env){
     env->observations = (float*)calloc(env->active_agent_count*max_obs, sizeof(float));
     env->actions = (int*)calloc(env->active_agent_count*2, sizeof(int));
     env->rewards = (float*)calloc(env->active_agent_count, sizeof(float));
-    env->masks = (unsigned char*)calloc(env->active_agent_count, sizeof(unsigned char));
-    env->dones = (unsigned char*)calloc(env->active_agent_count, sizeof(unsigned char));
-    env->log_buffer = allocate_logbuffer(LOG_BUFFER_SIZE);
+    env->terminals= (unsigned char*)calloc(env->active_agent_count, sizeof(unsigned char));
     // printf("allocated\n");
 }
 
@@ -677,9 +669,7 @@ void free_allocated(GPUDrive* env){
     free(env->observations);
     free(env->actions);
     free(env->rewards);
-    free(env->masks);
-    free(env->dones);
-    free_logbuffer(env->log_buffer);
+    free(env->terminals);
     free_initialized(env);
 }
 
@@ -921,9 +911,6 @@ void compute_observations(GPUDrive* env) {
     memset(env->observations, 0, max_obs*env->active_agent_count*sizeof(float));
     float (*observations)[max_obs] = (float(*)[max_obs])env->observations; 
     for(int i = 0; i < env->active_agent_count; i++) {
-        if(env->goal_reached[i] && !env->reached_goal_this_turn[i]){
-            continue;
-        }
         float* obs = &observations[i][0];
         Entity* ego_entity = &env->entities[env->active_agent_indices[i]];
         if(ego_entity->type > 3) break;
@@ -937,9 +924,12 @@ void compute_observations(GPUDrive* env) {
         // Rotate to ego vehicle's frame
         float rel_goal_x = goal_x*cos_heading + goal_y*sin_heading;
         float rel_goal_y = -goal_x*sin_heading + goal_y*cos_heading;
-        obs[0] = normalize_value(rel_goal_x, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD);
-        obs[1] = normalize_value(rel_goal_y, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD);
-        obs[2] = ego_speed / MAX_SPEED;
+        //obs[0] = normalize_value(rel_goal_x, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD);
+        //obs[1] = normalize_value(rel_goal_y, MIN_REL_GOAL_COORD, MAX_REL_GOAL_COORD);
+        obs[0] = rel_goal_x/20.0f;
+        obs[1] = rel_goal_y/20.0f;
+        //obs[2] = ego_speed / MAX_SPEED;
+        obs[2] = ego_speed / 5.0f;
         obs[3] = ego_entity->width / MAX_VEH_WIDTH;
         obs[4] = ego_entity->length / MAX_VEH_LEN;
         obs[5] = (ego_entity->collision_state > 0) ? 1 : 0;
@@ -967,8 +957,8 @@ void compute_observations(GPUDrive* env) {
             float rel_x = dx*cos_heading + dy*sin_heading;
             float rel_y = -dx*sin_heading + dy*cos_heading;
             // Store observations with correct indexing
-            obs[obs_idx] = normalize_value(rel_x, MIN_REL_AGENT_POS, MAX_REL_AGENT_POS);
-            obs[obs_idx + 1] = normalize_value(rel_y, MIN_REL_AGENT_POS, MAX_REL_AGENT_POS);
+            obs[obs_idx] = rel_x / 20.0f;
+            obs[obs_idx + 1] = rel_y / 20.0f;
             obs[obs_idx + 2] = other_entity->width / MAX_VEH_WIDTH;
             obs[obs_idx + 3] = other_entity->length / MAX_VEH_LEN;
             // relative heading
@@ -1017,8 +1007,8 @@ void compute_observations(GPUDrive* env) {
             // Compute sin and cos of relative angle directly without atan2f
             float cos_angle = dx_norm*cos_heading + dy_norm*sin_heading;
             float sin_angle = -dx_norm*sin_heading + dy_norm*cos_heading;
-            obs[obs_idx] = normalize_value(x_obs, MIN_RG_COORD, MAX_RG_COORD);
-            obs[obs_idx + 1] = normalize_value(y_obs, MIN_RG_COORD, MAX_RG_COORD);
+            obs[obs_idx] = x_obs / 20.0f;
+            obs[obs_idx + 1] = y_obs / 20.0f;
             obs[obs_idx + 2] = length / MAX_ROAD_SEGMENT_LENGTH;
             obs[obs_idx + 3] = width / MAX_ROAD_SCALE;
             obs[obs_idx + 4] = cos_angle / MAX_ORIENTATION_RAD;
@@ -1041,33 +1031,23 @@ void c_reset(GPUDrive* env){
         collision_check(env, agent_idx);
     }
     memset(env->goal_reached, 0, env->active_agent_count*sizeof(char));
-    memset(env->masks, 1, env->active_agent_count*sizeof(char));  
-    memset(env->dones, 0, env->active_agent_count*sizeof(char));
+    memset(env->reached_goal_this_episode, 0, env->active_agent_count*sizeof(char));
     compute_observations(env);
 }
 
+void respawn_agent(GPUDrive* env, int agent_idx){
+    env->entities[agent_idx].x = env->entities[agent_idx].traj_x[0];
+    env->entities[agent_idx].y = env->entities[agent_idx].traj_y[0];
+    env->entities[agent_idx].heading = env->entities[agent_idx].traj_heading[0];
+    env->entities[agent_idx].vx = env->entities[agent_idx].traj_vx[0];
+    env->entities[agent_idx].vy = env->entities[agent_idx].traj_vy[0];
+}
+
 void c_step(GPUDrive* env){
     memset(env->rewards, 0, env->active_agent_count * sizeof(float));
-    memset(env->reached_goal_this_turn, 0, env->active_agent_count * sizeof(char));
     env->timestep++;
     if(env->timestep == 91){
-	    for(int i = 0; i < env->active_agent_count; i++){
-            if(env->goal_reached[i] == 0){
-                env->logs[i].score = 0.0f;
-            } 
-	        else {
-                env->logs[i].score = 1.0f;
-		        env->logs[i].dnf_rate = 0.0f;
-            }
-            int offroad = env->logs[i].offroad_rate;
-            int collided = env->logs[i].collision_rate;
-            int goal_reached = env->goal_reached[i];
-            if(!offroad && !collided && !goal_reached){
-                env->logs[i].dnf_rate = 1.0f;
-            }
-
-            add_log(env->log_buffer, &env->logs[i]);
-	    }
+        add_log(env);
 	    c_reset(env);
     }
     // Move statix experts
@@ -1081,13 +1061,20 @@ void c_step(GPUDrive* env){
         env->logs[i].score = 0.0f;
 	    env->logs[i].episode_length += 1;
         int agent_idx = env->active_agent_indices[i];
+        if(env->goal_reached[i] || env->entities[agent_idx].collision_state > 0){
+            respawn_agent(env, agent_idx);
+            env->goal_reached[i] = 0;
+        }
         env->entities[agent_idx].collision_state = 0;
+<<<<<<< HEAD
+=======
         if(env->goal_reached[i]){
             env->masks[i] = 0;
             env->entities[agent_idx].x = -10000;
             env->entities[agent_idx].y = -10000;
             continue;
 	    }
+>>>>>>> cf2b09c9d525bf784c8dd5c03818450cabeae914
         move_dynamics(env, i, agent_idx);
         // move_expert(env, env->actions, agent_idx);
         collision_check(env, agent_idx);
@@ -1113,10 +1100,13 @@ void c_step(GPUDrive* env){
         if(reached_goal && env->goal_reached[i] == 0){            
             env->rewards[i] += 1.0f;
 	        env->goal_reached[i] = 1;
-		    env->reached_goal_this_turn[i] = 1;
 	        env->logs[i].episode_return += 1.0f;
+<<<<<<< HEAD
+            env->reached_goal_this_episode[i] = 1;
+=======
             env->dones[i] = 1;
             continue;
+>>>>>>> cf2b09c9d525bf784c8dd5c03818450cabeae914
 	    }
     }
     compute_observations(env);
@@ -1382,8 +1372,16 @@ void draw_road_edge(GPUDrive* env, float start_x, float start_y, float end_x, fl
     DrawTriangle3D(b4, t4, b1, CURB_SIDE);
     DrawTriangle3D(t4, t1, b1, CURB_SIDE);
 }
+<<<<<<< HEAD
+
+void c_render(GPUDrive* env) {
+    if (env->client == NULL) {
+        env->client = make_client(env);
+    }
+    Client* client = env->client;
+=======
+>>>>>>> cf2b09c9d525bf784c8dd5c03818450cabeae914
 
-void c_render(Client* client, GPUDrive* env) {
     BeginDrawing();
     Color road = (Color){35, 35, 37, 255};
     ClearBackground(road);
diff --git a/pufferlib/ocean/gpudrive/gpudrive.py b/pufferlib/ocean/gpudrive/gpudrive.py
index 36f87e716b..528496663e 100644
--- a/pufferlib/ocean/gpudrive/gpudrive.py
+++ b/pufferlib/ocean/gpudrive/gpudrive.py
@@ -4,7 +4,7 @@
 import struct
 
 import pufferlib
-from pufferlib.ocean.gpudrive.cy_gpudrive import CyGPUDrive, entity_dtype
+from pufferlib.ocean.gpudrive import binding
 
 class GPUDrive(pufferlib.PufferEnv):
     def __init__(self, num_envs=1, render_mode=None, report_interval=1,
@@ -19,46 +19,59 @@ def __init__(self, num_envs=1, render_mode=None, report_interval=1,
         self.num_agents = num_envs
         self.render_mode = render_mode
         self.report_interval = report_interval
-        print("Num envs: ", num_envs)
         
         self.num_obs = 6 + 63*7 + 200*7
         self.single_observation_space = gymnasium.spaces.Box(low=-1, high=1,
             shape=(self.num_obs,), dtype=np.float32)
         self.single_action_space = gymnasium.spaces.MultiDiscrete([7, 13])
-        
-        total_agents, agent_offsets =CyGPUDrive.get_total_agent_count(
-            num_envs, human_agent_idx, reward_vehicle_collision, reward_offroad_collision)
-        
-        self.num_agents = total_agents * 8
-        print("Num agents: ", self.num_agents)
+        agent_offsets = binding.shared(num_envs=num_envs)
+        total_agents = agent_offsets[-1]
+        self.num_agents = total_agents
         super().__init__(buf=buf)
-        self.c_envs = CyGPUDrive(self.observations, self.actions, self.rewards, self.masks,
-            self.terminals, num_envs, human_agent_idx, reward_vehicle_collision, reward_offroad_collision, offsets = agent_offsets)
+        env_ids = []
+        for i in range(num_envs):
+            cur = agent_offsets[i]
+            nxt = agent_offsets[i+1]
+            env_id = binding.env_init(
+                self.observations[cur:nxt],
+                self.actions[cur:nxt],
+                self.rewards[cur:nxt],
+                self.terminals[cur:nxt],
+                self.truncations[cur:nxt],
+                seed,
+                human_agent_idx=human_agent_idx,
+                reward_vehicle_collision=reward_vehicle_collision,
+                reward_offroad_collision=reward_offroad_collision,
+                env_id=i
+            )
+            env_ids.append(env_id)
 
+        self.c_envs = binding.vectorize(*env_ids)
 
-    def reset(self, seed=None):
-        self.c_envs.reset()
+    def reset(self, seed=0):
+        binding.vec_reset(self.c_envs, seed)
         self.tick = 0
         return self.observations, []
 
     def step(self, actions):
         self.actions[:] = actions
-        self.c_envs.step()
+        binding.vec_step(self.c_envs)
         self.tick+=1
         info = []
         if self.tick % self.report_interval == 0:
-            log = self.c_envs.log()
-            if log['episode_length'] > 0:
+            log = binding.vec_log(self.c_envs)
+            if log:
                 info.append(log)
-                info.append({'total_agents': self.num_agents}) 
+
         return (self.observations, self.rewards,
             self.terminals, self.truncations, info)
 
     def render(self):
-        self.c_envs.render()
+        binding.vec_render(self.c_envs, 63)
         
     def close(self):
-        self.c_envs.close() 
+        binding.vec_close(self.c_envs)
+
 def calculate_area(p1, p2, p3):
     # Calculate the area of the triangle using the determinant method
     return 0.5 * abs((p1['x'] - p3['x']) * (p2['y'] - p1['y']) - (p1['x'] - p2['x']) * (p3['y'] - p1['y']))
@@ -204,6 +217,7 @@ def save_map_binary(map_data, output_file):
             f.write(struct.pack('f', float(goal_pos.get('y', 0.0))))  # Get y value
             f.write(struct.pack('f', float(goal_pos.get('z', 0.0))))  # Get z value
             f.write(struct.pack('i', road.get('mark_as_expert', 0)))
+
 def load_map(map_name, binary_output=None):
     """Loads a JSON map and optionally saves it as binary"""
     with open(map_name, 'r') as f:
@@ -211,9 +225,6 @@ def load_map(map_name, binary_output=None):
     
     if binary_output:
         save_map_binary(map_data, binary_output)
-    
-    entities = np.zeros(1, dtype=entity_dtype())
-    return entities
 
 def process_all_maps():
     """Process all maps and save them as binaries"""
diff --git a/pufferlib/ocean/grid/grid.h b/pufferlib/ocean/grid/grid.h
index 161e725d0b..6c22b69406 100644
--- a/pufferlib/ocean/grid/grid.h
+++ b/pufferlib/ocean/grid/grid.h
@@ -496,7 +496,7 @@ void c_render(Grid* env) {
     float frac = 0.0;
     float overlay = 0.0;
     if (env->renderer == NULL) {
-        env->renderer = init_renderer(16, env->width, env->height);
+        env->renderer = init_renderer(16, env->max_size, env->max_size);
     }
     Renderer* renderer = env->renderer;
  
diff --git a/pufferlib/ocean/pong/pong.c b/pufferlib/ocean/pong/pong.c
index e3c104ab0c..fa1c6f5981 100644
--- a/pufferlib/ocean/pong/pong.c
+++ b/pufferlib/ocean/pong/pong.c
@@ -1,7 +1,8 @@
+#include <time.h>
 #include "pong.h"
 #include "puffernet.h"
 
-int main() {
+void demo() {
     Weights* weights = load_weights("resources/pong_weights.bin", 133764);
     LinearLSTM* net = make_linearlstm(weights, 1, 8, 3);
 
@@ -56,3 +57,40 @@ int main() {
     close_client(env.client);
 }
 
+void test_performance(int timeout) {
+    Pong env = {
+        .width = 500,
+        .height = 640,
+        .paddle_width = 20,
+        .paddle_height = 70,
+        .ball_width = 32,
+        .ball_height = 32,
+        .paddle_speed = 8,
+        .ball_initial_speed_x = 10,
+        .ball_initial_speed_y = 1,
+        .ball_speed_y_increment = 3,
+        .ball_max_speed_y = 13,
+        .max_score = 21,
+        .frameskip = 1,
+        .continuous = 0,
+    };
+    allocate(&env);
+    c_reset(&env);
+
+    int start = time(NULL);
+    int num_steps = 0;
+    while (time(NULL) - start < timeout) {
+        env.actions[0] = rand() % 3;
+        c_step(&env);
+        num_steps++;
+    }
+
+    int end = time(NULL);
+    float sps = num_steps / (end - start);
+    printf("Test Environment SPS: %f\n", sps);
+    free_allocated(&env);
+}
+
+int main() {
+    test_performance(10);
+}
diff --git a/pufferlib/ocean/pong/pong.py b/pufferlib/ocean/pong/pong.py
index affcd86cd2..1a37a693be 100644
--- a/pufferlib/ocean/pong/pong.py
+++ b/pufferlib/ocean/pong/pong.py
@@ -53,7 +53,7 @@ def __init__(self, num_envs=1, render_mode=None,
             max_score=max_score, frameskip=frameskip, continuous=continuous
         )
 
-    def reset(self, seed=None):
+    def reset(self, seed=0):
         binding.vec_reset(self.c_envs, seed)
         self.tick = 0
         return self.observations, []
@@ -80,7 +80,6 @@ def render(self):
     def close(self):
         binding.vec_close(self.c_envs)
 
-from pufferlib.ocean.pong.cy_pong import CyPong
 #from cy_pong import CyPong
 class CythonPong(pufferlib.PufferEnv):
     def __init__(self, num_envs=1, render_mode=None,
@@ -158,4 +157,3 @@ def test_performance(cls, timeout=10, atn_cache=1024):
 
 if __name__ == '__main__':
     test_performance(Pong)
-    test_performance(CythonPong)
diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py
index feeedecf32..ef05533b69 100644
--- a/pufferlib/ocean/torch.py
+++ b/pufferlib/ocean/torch.py
@@ -29,16 +29,17 @@ def __init__(self, env, hidden_size=512, output_size=512, **kwargs):
         #self.dtype = pufferlib.pytorch.nativize_dtype(env.emulated)
         self.num_actions = env.single_action_space.n
         self.factors = np.array([4, 4, 17, 5, 3, 5, 5, 5, 7, 4])
-        self.offsets = torch.tensor([0] + list(np.cumsum(self.factors)[:-1])).cuda().view(1, -1, 1, 1)
+        offsets = torch.tensor([0] + list(np.cumsum(self.factors)[:-1])).view(1, -1, 1, 1)
+        self.register_buffer('offsets', offsets)
         self.cum_facs = np.cumsum(self.factors)
 
         self.multihot_dim = self.factors.sum()
         self.is_continuous = False
 
         self.map_2d = nn.Sequential(
-            pufferlib.pytorch.layer_init(nn.Conv2d(self.multihot_dim, 256, 5, stride=3)),
+            pufferlib.pytorch.layer_init(nn.Conv2d(self.multihot_dim, 128, 5, stride=3)),
             nn.ReLU(),
-            pufferlib.pytorch.layer_init(nn.Conv2d(256, 256, 3, stride=1)),
+            pufferlib.pytorch.layer_init(nn.Conv2d(128, 128, 3, stride=1)),
             nn.Flatten(),
         )
 
@@ -47,7 +48,7 @@ def __init__(self, env, hidden_size=512, output_size=512, **kwargs):
             nn.Flatten(),
         )
         self.proj = nn.Sequential(
-            pufferlib.pytorch.layer_init(nn.Linear(2073, hidden_size)),
+            pufferlib.pytorch.layer_init(nn.Linear(1817, hidden_size)),
             nn.ReLU(),
         )
 
@@ -56,10 +57,6 @@ def __init__(self, env, hidden_size=512, output_size=512, **kwargs):
             nn.Linear(output_size, self.num_actions), std=0.01)
         self.value_fn = pufferlib.pytorch.layer_init(nn.Linear(output_size, 1), std=1)
 
-        # Pre-allocate allows compilation
-        map_buf = torch.zeros(32768, self.multihot_dim, 11, 15, dtype=torch.float32)
-        self.register_buffer('map_buf', map_buf)
-
     def forward(self, x, state=None):
         hidden = self.encode_observations(x)
         actions, value = self.decode_actions(hidden)
@@ -75,15 +72,14 @@ def encode_observations(self, observations, state=None):
         ob_reward = observations[:, -10:]
 
         batch = ob_map.shape[0]
-        map_buf = self.map_buf[:batch]
-        map_buf.zero_()
+        map_buf = torch.zeros(batch, 59, 11, 15, dtype=torch.float32, device=observations.device)
         codes = ob_map.permute(0, 3, 1, 2) + self.offsets
         map_buf.scatter_(1, codes, 1)
         ob_map = self.map_2d(map_buf)
 
         player_discrete = self.player_discrete_encoder(ob_player.int())
 
-        obs = torch.cat([ob_map, player_discrete, ob_player.float(), ob_reward], dim=1)
+        obs = torch.cat([ob_map, player_discrete, ob_player.to(ob_map.dtype), ob_reward], dim=1)
         obs = self.proj(obs)
         return obs
 
@@ -344,10 +340,6 @@ def forward_train(self, x, state=None):
 
     def encode_observations(self, observations, state=None):
         cnn_features = observations[:, :-26].view(-1, 11, 11, 4).long()
-        if cnn_features[:, :, :, 0].max() > 15:
-            print('Invalid map value:', cnn_features[:, :, :, 0].max())
-            breakpoint()
-            exit(1)
         map_features = F.one_hot(cnn_features[:, :, :, 0], 16).permute(0, 3, 1, 2).float()
         extra_map_features = (cnn_features[:, :, :, -3:].float() / 255).permute(0, 3, 1, 2)
         cnn_features = torch.cat([map_features, extra_map_features], dim=1)
diff --git a/pufferlib/policy_ranker.py b/pufferlib/policy_ranker.py
deleted file mode 100644
index 8282f9fc9e..0000000000
--- a/pufferlib/policy_ranker.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from pdb import set_trace as T
-import numpy as np
-
-import sqlite3
-
-ANCHOR_ELO = 1000.0
-
-
-def win_prob(elo1, elo2):
-    '''Calculate win probability such that a difference of
-    50/100/150 elo corresponds to win probabilitit 68/95/99.7%'''
-    return 1 / (1 + 10 ** ((elo2 - elo1) / 400))
-
-def update_elos(elos: np.ndarray, scores: np.ndarray, k: float = 4.0):
-    '''Update elos based on the result of a game
-
-    The parameter k controls the magnitude of the update.
-    A higher k means that the elo will change more after a game.
-    This means that elos will converge faster but less precisely.
-    In particular, low k cannot distinguish between players of
-    similar skill, while a high k will just take longer to converge.
-
-    The default is tuned for normally distributed player skill
-    You should lower it if you have very similar players.
-    Raise it if you are evaluating a diverse skill pool.
-    '''
-    num_players = len(elos)
-    assert num_players == len(scores)
-
-    elo_update = [[] for _ in range(num_players)]
-    for i in range(num_players):
-        for j in range(i+1, num_players):
-            delta = scores[i] - scores[j]
-
-            # Convert to elo scoring format
-            if delta > 0:
-                score_i = 1
-            elif delta == 0:
-                score_i = 0.5
-            else:
-                score_i = 0
-
-            # Calculate elo update for pairs
-            expected_i = win_prob(elos[i], elos[j])
-            expected_j = 1 - expected_i
-            score_j = 1 - score_i
-
-            elo_update[i].append(k * (score_i - expected_i))
-            elo_update[j].append(k * (score_j - expected_j))
-
-    elo_update = [np.mean(e) for e in elo_update]
-    return [elo + update for elo, update in zip(elos, elo_update)]
-
-class Ranker:
-    def __init__(self, db_path):
-        self.conn = sqlite3.connect(db_path)
-        with self.conn:
-            self.conn.execute("""
-                CREATE TABLE IF NOT EXISTS ratings (
-                    policy TEXT PRIMARY KEY,
-                    elo REAL
-                );
-            """)
-
-    def __repr__(self):
-        if len(self.ratings) == 0:
-            return ''
-
-        sorted_dict = sorted(self.ratings.items(), key=lambda x: x[1], reverse=True)
-        return '\n'.join([
-            f' - Policy: {name}, Elo: {elo:.3f}'
-            for name, elo in sorted_dict
-        ])
-
-    @property
-    def ratings(self):
-        with self.conn:
-            cursor = self.conn.execute("SELECT * FROM ratings;")
-
-        return {row[0]: row[1] for row in cursor.fetchall()}
-
-    def update(self, scores: dict):
-        if len(scores) < 2:
-            return
-
-        # Load all elos from DB
-        elos = self.ratings
-
-        flat_scores = []
-        flat_elos = []
-        for policy in scores.keys():
-            flat_scores.append(scores[policy])
-            if policy in elos:
-                flat_elos.append(elos[policy])
-            else:
-                flat_elos.append(ANCHOR_ELO)
-
-        flat_elos = update_elos(flat_elos, flat_scores)
-        elos = zip(scores.keys(), flat_elos)
-        with self.conn:
-            self.conn.executemany("""
-                INSERT OR REPLACE INTO ratings (policy, elo)
-                VALUES (?, ?);
-            """, elos)
diff --git a/pufferlib/policy_store.py b/pufferlib/policy_store.py
deleted file mode 100644
index 7bbd96ad1a..0000000000
--- a/pufferlib/policy_store.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from pdb import set_trace as T
-import os
-import torch
-
-
-def get_policy_names(path: str) -> list:
-    # Assumeing that all pt files other than trainer_state.pt in the path are policy files
-    names = []
-    for file in os.listdir(path):
-        if file.endswith(".pt") and file != 'trainer_state.pt':
-            names.append(file[:-3])
-    return sorted(names)
-
-class PolicyStore:
-    def __init__(self, path: str):
-        self.path = path
-
-    def policy_names(self) -> list:
-        return get_policy_names(self.path)
-
-    def get_policy(self, name: str) -> torch.nn.Module:
-        path = os.path.join(self.path, name + '.pt')
-        try:
-            return torch.load(path)
-        except:
-            return torch.load(path, map_location=torch.device('cpu'))
diff --git a/pufferlib/postprocess.py b/pufferlib/postprocess.py
deleted file mode 100644
index cb311cc145..0000000000
--- a/pufferlib/postprocess.py
+++ /dev/null
@@ -1,219 +0,0 @@
-from pdb import set_trace as T
-import numpy as np
-import gymnasium
-
-import pufferlib.utils
-
-class ResizeObservation(gymnasium.Wrapper):
-    '''Fixed downscaling wrapper. Do NOT use gym.wrappers.ResizeObservation
-    It uses a laughably slow OpenCV resize. -50% on Atari just from that.'''
-    def __init__(self, env, downscale=2):
-        super().__init__(env)
-        self.downscale = downscale
-        y_size, x_size = env.observation_space.shape
-        assert y_size % downscale == 0 and x_size % downscale == 0
-        y_size = env.observation_space.shape[0] // downscale
-        x_size = env.observation_space.shape[1] // downscale
-        self.observation_space = gymnasium.spaces.Box(
-            low=0, high=255, shape=(y_size, x_size), dtype=np.uint8)
-
-    def reset(self, seed=None, options=None):
-        obs, info = self.env.reset(seed=seed, options=options)
-        return obs[::self.downscale, ::self.downscale], info
-
-    def step(self, action):
-        obs, reward, terminal, truncated, info = self.env.step(action)
-        return obs[::self.downscale, ::self.downscale], reward, terminal, truncated, info
-
-class ClipAction(gymnasium.Wrapper):
-    '''Wrapper for Gymnasium environments that clips actions'''
-    def __init__(self, env):
-        self.env = env
-        assert isinstance(env.action_space, gymnasium.spaces.Box)
-        dtype_info = np.finfo(env.action_space.dtype)
-        self.action_space = gymnasium.spaces.Box(
-            low=dtype_info.min,
-            high=dtype_info.max,
-            shape=env.action_space.shape,
-            dtype=env.action_space.dtype,
-        )
-
-    def step(self, action):
-        action = np.clip(action, self.env.action_space.low, self.env.action_space.high)
-        return self.env.step(action)
-
-
-class EpisodeStats(gymnasium.Wrapper):
-    '''Wrapper for Gymnasium environments that stores
-    episodic returns and lengths in infos'''
-    def __init__(self, env):
-        self.env = env
-        self.observation_space = env.observation_space
-        self.action_space = env.action_space
-        self.reset()
-
-    def reset(self, seed=None, options=None):
-        self.info = dict(episode_return=[], episode_length=0)
-        # TODO: options
-        return self.env.reset(seed=seed)#, options=options)
-
-    def step(self, action):
-        observation, reward, terminated, truncated, info = super().step(action)
-
-        for k, v in pufferlib.utils.unroll_nested_dict(info):
-            if k not in self.info:
-                self.info[k] = []
-
-            self.info[k].append(v)
-
-        self.info['episode_return'].append(reward)
-        self.info['episode_length'] += 1
-
-        info = {}
-        if terminated or truncated:
-            for k, v in self.info.items():
-                try:
-                    info[k] = sum(v)
-                    continue
-                except TypeError:
-                    pass
-
-                if isinstance(v, str):
-                    info[k] = v
-                    continue
-
-                try:
-                    x = int(v) # probably a value
-                    info[k] = v
-                    continue
-                except TypeError:
-                    pass
-
-        return observation, reward, terminated, truncated, info
-
-class PettingZooWrapper:
-    '''PettingZoo does not provide a ParallelEnv wrapper. This code is adapted from
-    their AEC wrapper, to prevent unneeded conversions to/from AEC'''
-    def __init__(self, env):
-        self.env = env
-
-    def __getattr__(self, name):
-        '''Returns an attribute with ``name``, unless ``name`` starts with an underscore.'''
-        if name.startswith('_') and name != '_cumulative_rewards':
-            raise AttributeError(f'accessing private attribute "{name}" is prohibited')
-        return getattr(self.env, name)
-
-    @property
-    def unwrapped(self):
-        return self.env.unwrapped
-
-    def close(self):
-        self.env.close()
-
-    def render(self):
-        return self.env.render()
-
-    def reset(self, seed=None, options=None):
-        try:
-            return self.env.reset(seed=seed, options=options)
-        except TypeError:
-            return self.env.reset(seed=seed)
-
-    def observe(self, agent):
-        return self.env.observe(agent)
-
-    def state(self):
-        return self.env.state()
-
-    def step(self, action):
-        return self.env.step(action)
-
-    def observation_space(self, agent):
-        return self.env.observation_space(agent)
-
-    def action_space(self, agent):
-        return self.env.action_space(agent)
-
-    def __str__(self) -> str:
-        '''Returns a name which looks like: "max_observation<space_invaders_v1>".'''
-        return f'{type(self).__name__}<{str(self.env)}>'
-
-class MeanOverAgents(PettingZooWrapper):
-    '''Averages over agent infos'''
-    def _mean(self, infos):
-        list_infos = {}
-        for agent, info in infos.items():
-            for k, v in info.items():
-                if k not in list_infos:
-                    list_infos[k] = []
-
-                list_infos[k].append(v)
-
-        mean_infos = {}
-        for k, v in list_infos.items():
-            try:
-                mean_infos[k] = np.mean(v)
-            except:
-                pass
-
-        return mean_infos
-
-    def reset(self, seed=None, options=None):
-        observations, infos = super().reset(seed, options)
-        infos = self._mean(infos)
-        return observations, infos
-
-    def step(self, actions):
-        observations, rewards, terminations, truncations, infos = super().step(actions)
-        infos = self._mean(infos)
-        return observations, rewards, terminations, truncations, infos
-
-class MultiagentEpisodeStats(PettingZooWrapper):
-    '''Wrapper for PettingZoo environments that stores
-    episodic returns and lengths in infos'''
-    def reset(self, seed=None, options=None):
-        observations, infos = super().reset(seed=seed, options=options)
-        self.infos = {
-            agent: dict(episode_return=[], episode_length=0)
-            for agent in self.possible_agents
-        }
-        return observations, infos
-
-    def step(self, actions):
-        observations, rewards, terminations, truncations, infos = super().step(actions)
-
-        all_infos = {}
-        for agent in infos:
-            agent_info = self.infos[agent]
-            for k, v in pufferlib.utils.unroll_nested_dict(infos[agent]):
-                if k not in agent_info:
-                    agent_info[k] = []
-
-                agent_info[k].append(v)
-
-            # Saved to self. TODO: Clean up
-            agent_info['episode_return'].append(rewards[agent])
-            agent_info['episode_length'] += 1
-
-            agent_info = {}
-            all_infos[agent] = agent_info
-            if terminations[agent] or truncations[agent]:
-                for k, v in self.infos[agent].items():
-                    try:
-                        agent_info[k] = sum(v)
-                        continue
-                    except TypeError:
-                        pass
-
-                    if isinstance(v, str):
-                        agent_info[k] = v
-                        continue
-
-                    try:
-                        x = int(v) # probably a value
-                        agent_info[k] = v
-                        continue
-                    except TypeError:
-                        pass
-
-        return observations, rewards, terminations, truncations, all_infos
diff --git a/pufferlib/pufferlib.cu b/pufferlib/pufferlib.cu
new file mode 100644
index 0000000000..c979fcf790
--- /dev/null
+++ b/pufferlib/pufferlib.cu
@@ -0,0 +1,94 @@
+#include <torch/extension.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+namespace pufferlib {
+
+static const int max_horizon = 256;
+__host__ __device__ void puff_advantage_row_cuda(float* values, float* rewards, float* dones,
+        float* importance, float* advantages, float gamma, float lambda,
+        float rho_clip, float c_clip, int horizon) {
+    float lastpufferlam = 0;
+    for (int t = horizon-2; t >= 0; t--) {
+        int t_next = t + 1;
+        float nextnonterminal = 1.0 - dones[t_next];
+        float rho_t = fminf(importance[t], rho_clip);
+        float c_t = fminf(importance[t], c_clip);
+        // TODO: t_next works and t doesn't. Check original formula
+        float delta = rho_t*(rewards[t_next] + gamma*values[t_next]*nextnonterminal - values[t]);
+        lastpufferlam = delta + gamma*lambda*c_t*lastpufferlam*nextnonterminal;
+        advantages[t] = lastpufferlam;
+    }
+}
+
+void vtrace_check_cuda(torch::Tensor values, torch::Tensor rewards,
+        torch::Tensor dones, torch::Tensor importance, torch::Tensor advantages,
+        int num_steps, int horizon) {
+
+    // Validate input tensors
+    torch::Device device = values.device();
+    for (const torch::Tensor& t : {values, rewards, dones, importance, advantages}) {
+        TORCH_CHECK(t.dim() == 2, "Tensor must be 2D");
+        TORCH_CHECK(t.device() == device, "All tensors must be on same device");
+        TORCH_CHECK(t.size(0) == num_steps, "First dimension must match num_steps");
+        TORCH_CHECK(t.size(1) == horizon, "Second dimension must match horizon");
+        TORCH_CHECK(t.dtype() == torch::kFloat32, "All tensors must be float32");
+        assert(horizon <= max_horizon);
+        if (!t.is_contiguous()) {
+            t.contiguous();
+        }
+    }
+}
+
+
+ // [num_steps, horizon]
+__global__ void puff_advantage_kernel(float* values, float* rewards,
+        float* dones, float* importance, float* advantages, float gamma,
+        float lambda, float rho_clip, float c_clip, int num_steps, int horizon) {
+    int row = blockIdx.x*blockDim.x + threadIdx.x;
+    int offset = row*horizon;
+    puff_advantage_row_cuda(values + offset, rewards + offset, dones + offset,
+        importance + offset, advantages + offset, gamma, lambda, rho_clip, c_clip, horizon);
+}
+
+void compute_puff_advantage_cuda(torch::Tensor values, torch::Tensor rewards,
+        torch::Tensor dones, torch::Tensor importance, torch::Tensor advantages,
+        double gamma, double lambda, double rho_clip, double c_clip) {
+    int num_steps = values.size(0);
+    int horizon = values.size(1);
+    vtrace_check_cuda(values, rewards, dones, importance, advantages, num_steps, horizon);
+    TORCH_CHECK(values.is_cuda(), "All tensors must be on GPU");
+    assert(horizon <= max_horizon);
+
+    int threads_per_block = 256;
+    if (threads_per_block > num_steps) {
+        threads_per_block = 2*(num_steps/2);
+    }
+    int blocks = (num_steps + threads_per_block - 1) / threads_per_block;
+    assert(num_steps % threads_per_block == 0);
+
+    puff_advantage_kernel<<<blocks, threads_per_block>>>(
+        values.data_ptr<float>(),
+        rewards.data_ptr<float>(),
+        dones.data_ptr<float>(),
+        importance.data_ptr<float>(),
+        advantages.data_ptr<float>(),
+        gamma,
+        lambda,
+        rho_clip,
+        c_clip,
+        num_steps,
+        horizon
+    );
+
+    cudaError_t err = cudaGetLastError();
+    if (err != cudaSuccess) {
+        throw std::runtime_error(cudaGetErrorString(err));
+    }
+}
+
+TORCH_LIBRARY_IMPL(pufferlib, CUDA, m) {
+  m.impl("compute_puff_advantage", &compute_puff_advantage_cuda);
+}
+
+}
diff --git a/pufferlib/pufferlib.py b/pufferlib/pufferlib.py
new file mode 100644
index 0000000000..e3f95643ba
--- /dev/null
+++ b/pufferlib/pufferlib.py
@@ -0,0 +1,455 @@
+import os
+import sys
+import warnings
+
+from contextlib import redirect_stdout, redirect_stderr, contextmanager
+from types import SimpleNamespace
+from collections.abc import Mapping
+from io import StringIO
+from functools import wraps
+
+import numpy as np
+import gymnasium
+
+import pufferlib.spaces
+
+ENV_ERROR = '''
+Environment missing required attribute {}. The most common cause is
+calling super() before you have assigned the attribute.
+'''
+
+
+def set_buffers(env, buf=None):
+    if buf is None:
+        obs_space = env.single_observation_space
+        env.observations = np.zeros((env.num_agents, *obs_space.shape), dtype=obs_space.dtype)
+        env.rewards = np.zeros(env.num_agents, dtype=np.float32)
+        env.terminals = np.zeros(env.num_agents, dtype=bool)
+        env.truncations = np.zeros(env.num_agents, dtype=bool)
+        env.masks = np.ones(env.num_agents, dtype=bool)
+
+        # TODO: Major kerfuffle on inferring action space dtype. This needs some asserts?
+        atn_space = pufferlib.spaces.joint_space(env.single_action_space, env.num_agents)
+        if isinstance(env.single_action_space, pufferlib.spaces.Box):
+            env.actions = np.zeros(atn_space.shape, dtype=atn_space.dtype)
+        else:
+            env.actions = np.zeros(atn_space.shape, dtype=np.int32)
+    else:
+        env.observations = buf['observations']
+        env.rewards = buf['rewards']
+        env.terminals = buf['terminals']
+        env.truncations = buf['truncations']
+        env.masks = buf['masks']
+        env.actions = buf['actions']
+
+class PufferEnv:
+    def __init__(self, buf=None):
+        if not hasattr(self, 'single_observation_space'):
+            raise APIUsageError(ENV_ERROR.format('single_observation_space'))
+        if not hasattr(self, 'single_action_space'):
+            raise APIUsageError(ENV_ERROR.format('single_action_space'))
+        if not hasattr(self, 'num_agents'):
+            raise APIUsageError(ENV_ERROR.format('num_agents'))
+        if self.num_agents < 1:
+            raise APIUsageError('num_agents must be >= 1')
+
+        if hasattr(self, 'observation_space'):
+            raise APIUsageError('PufferEnvs must define single_observation_space, not observation_space')
+        if hasattr(self, 'action_space'):
+            raise APIUsageError('PufferEnvs must define single_action_space, not action_space')
+        if not isinstance(self.single_observation_space, pufferlib.spaces.Box):
+            raise APIUsageError('Native observation_space must be a Box')
+        if (not isinstance(self.single_action_space, pufferlib.spaces.Discrete)
+                and not isinstance(self.single_action_space, pufferlib.spaces.MultiDiscrete)
+                and not isinstance(self.single_action_space, pufferlib.spaces.Box)):
+            raise APIUsageError('Native action_space must be a Discrete, MultiDiscrete, or Box')
+
+        set_buffers(self, buf)
+
+        self.action_space = pufferlib.spaces.joint_space(self.single_action_space, self.num_agents)
+        self.observation_space = pufferlib.spaces.joint_space(self.single_observation_space, self.num_agents)
+        self.agent_ids = np.arange(self.num_agents)
+
+    @property
+    def agent_per_batch(self):
+        return self.num_agents
+
+    @property
+    def emulated(self):
+        '''Native envs do not use emulation'''
+        return False
+
+    @property
+    def done(self):
+        '''Native envs handle resets internally'''
+        return False
+
+    @property
+    def driver_env(self):
+        '''For compatibility with Multiprocessing'''
+        return self
+
+    def reset(self, seed=None):
+        raise NotImplementedError
+
+    def step(self, actions):
+        raise NotImplementedError
+
+    def close(self):
+        raise NotImplementedError
+
+    def async_reset(self, seed=None):
+        _, self.infos = self.reset(seed)
+        assert isinstance(self.infos, list), 'PufferEnvs must return info as a list of dicts'
+
+    def send(self, actions):
+        _, _, _, _, self.infos = self.step(actions)
+        assert isinstance(self.infos, list), 'PufferEnvs must return info as a list of dicts'
+
+    def recv(self):
+        return (self.observations, self.rewards, self.terminals,
+            self.truncations, self.infos, self.agent_ids, self.masks)
+### Postprocessing
+class ResizeObservation(gymnasium.Wrapper):
+    '''Fixed downscaling wrapper. Do NOT use gym.wrappers.ResizeObservation
+    It uses a laughably slow OpenCV resize. -50% on Atari just from that.'''
+    def __init__(self, env, downscale=2):
+        super().__init__(env)
+        self.downscale = downscale
+        y_size, x_size = env.observation_space.shape
+        assert y_size % downscale == 0 and x_size % downscale == 0
+        y_size = env.observation_space.shape[0] // downscale
+        x_size = env.observation_space.shape[1] // downscale
+        self.observation_space = gymnasium.spaces.Box(
+            low=0, high=255, shape=(y_size, x_size), dtype=np.uint8)
+
+    def reset(self, seed=None, options=None):
+        obs, info = self.env.reset(seed=seed, options=options)
+        return obs[::self.downscale, ::self.downscale], info
+
+    def step(self, action):
+        obs, reward, terminal, truncated, info = self.env.step(action)
+        return obs[::self.downscale, ::self.downscale], reward, terminal, truncated, info
+
+class ClipAction(gymnasium.Wrapper):
+    '''Wrapper for Gymnasium environments that clips actions'''
+    def __init__(self, env):
+        self.env = env
+        assert isinstance(env.action_space, gymnasium.spaces.Box)
+        dtype_info = np.finfo(env.action_space.dtype)
+        self.action_space = gymnasium.spaces.Box(
+            low=dtype_info.min,
+            high=dtype_info.max,
+            shape=env.action_space.shape,
+            dtype=env.action_space.dtype,
+        )
+
+    def step(self, action):
+        action = np.clip(action, self.env.action_space.low, self.env.action_space.high)
+        return self.env.step(action)
+
+
+class EpisodeStats(gymnasium.Wrapper):
+    '''Wrapper for Gymnasium environments that stores
+    episodic returns and lengths in infos'''
+    def __init__(self, env):
+        self.env = env
+        self.observation_space = env.observation_space
+        self.action_space = env.action_space
+        self.reset()
+
+    def reset(self, seed=None, options=None):
+        self.info = dict(episode_return=[], episode_length=0)
+        # TODO: options
+        return self.env.reset(seed=seed)#, options=options)
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+
+        for k, v in unroll_nested_dict(info):
+            if k not in self.info:
+                self.info[k] = []
+
+            self.info[k].append(v)
+
+        self.info['episode_return'].append(reward)
+        self.info['episode_length'] += 1
+
+        info = {}
+        if terminated or truncated:
+            for k, v in self.info.items():
+                try:
+                    info[k] = sum(v)
+                    continue
+                except TypeError:
+                    pass
+
+                if isinstance(v, str):
+                    info[k] = v
+                    continue
+
+                try:
+                    x = int(v) # probably a value
+                    info[k] = v
+                    continue
+                except TypeError:
+                    pass
+
+        return observation, reward, terminated, truncated, info
+
+class PettingZooWrapper:
+    '''PettingZoo does not provide a ParallelEnv wrapper. This code is adapted from
+    their AEC wrapper, to prevent unneeded conversions to/from AEC'''
+    def __init__(self, env):
+        self.env = env
+
+    def __getattr__(self, name):
+        '''Returns an attribute with ``name``, unless ``name`` starts with an underscore.'''
+        if name.startswith('_') and name != '_cumulative_rewards':
+            raise AttributeError(f'accessing private attribute "{name}" is prohibited')
+        return getattr(self.env, name)
+
+    @property
+    def unwrapped(self):
+        return self.env.unwrapped
+
+    def close(self):
+        self.env.close()
+
+    def render(self):
+        return self.env.render()
+
+    def reset(self, seed=None, options=None):
+        try:
+            return self.env.reset(seed=seed, options=options)
+        except TypeError:
+            return self.env.reset(seed=seed)
+
+    def observe(self, agent):
+        return self.env.observe(agent)
+
+    def state(self):
+        return self.env.state()
+
+    def step(self, action):
+        return self.env.step(action)
+
+    def observation_space(self, agent):
+        return self.env.observation_space(agent)
+
+    def action_space(self, agent):
+        return self.env.action_space(agent)
+
+    def __str__(self) -> str:
+        '''Returns a name which looks like: "max_observation<space_invaders_v1>".'''
+        return f'{type(self).__name__}<{str(self.env)}>'
+
+class MeanOverAgents(PettingZooWrapper):
+    '''Averages over agent infos'''
+    def _mean(self, infos):
+        list_infos = {}
+        for agent, info in infos.items():
+            for k, v in info.items():
+                if k not in list_infos:
+                    list_infos[k] = []
+
+                list_infos[k].append(v)
+
+        mean_infos = {}
+        for k, v in list_infos.items():
+            try:
+                mean_infos[k] = np.mean(v)
+            except:
+                pass
+
+        return mean_infos
+
+    def reset(self, seed=None, options=None):
+        observations, infos = super().reset(seed, options)
+        infos = self._mean(infos)
+        return observations, infos
+
+    def step(self, actions):
+        observations, rewards, terminations, truncations, infos = super().step(actions)
+        infos = self._mean(infos)
+        return observations, rewards, terminations, truncations, infos
+
+class MultiagentEpisodeStats(PettingZooWrapper):
+    '''Wrapper for PettingZoo environments that stores
+    episodic returns and lengths in infos'''
+    def reset(self, seed=None, options=None):
+        observations, infos = super().reset(seed=seed, options=options)
+        self.infos = {
+            agent: dict(episode_return=[], episode_length=0)
+            for agent in self.possible_agents
+        }
+        return observations, infos
+
+    def step(self, actions):
+        observations, rewards, terminations, truncations, infos = super().step(actions)
+
+        all_infos = {}
+        for agent in infos:
+            agent_info = self.infos[agent]
+            for k, v in unroll_nested_dict(infos[agent]):
+                if k not in agent_info:
+                    agent_info[k] = []
+
+                agent_info[k].append(v)
+
+            # Saved to self. TODO: Clean up
+            agent_info['episode_return'].append(rewards[agent])
+            agent_info['episode_length'] += 1
+
+            agent_info = {}
+            all_infos[agent] = agent_info
+            if terminations[agent] or truncations[agent]:
+                for k, v in self.infos[agent].items():
+                    try:
+                        agent_info[k] = sum(v)
+                        continue
+                    except TypeError:
+                        pass
+
+                    if isinstance(v, str):
+                        agent_info[k] = v
+                        continue
+
+                    try:
+                        x = int(v) # probably a value
+                        agent_info[k] = v
+                        continue
+                    except TypeError:
+                        pass
+
+        return observations, rewards, terminations, truncations, all_infos
+### Exceptions
+class EnvironmentSetupError(RuntimeError):
+    def __init__(self, e, package):
+        super().__init__(self.message)
+
+class APIUsageError(RuntimeError):
+    """Exception raised when the API is used incorrectly."""
+
+    def __init__(self, message="API usage error."):
+        self.message = message
+        super().__init__(self.message)
+
+class InvalidAgentError(ValueError):
+    """Exception raised when an invalid agent key is used."""
+
+    def __init__(self, agent_id, agents):
+        message = (
+            f'Invalid agent/team ({agent_id}) specified. '
+            f'Valid values:\n{agents}'
+        )
+        super().__init__(message)
+
+class GymToGymnasium:
+    def __init__(self, env):
+        self.env = env
+        self.observation_space = env.observation_space
+        self.action_space = env.action_space
+        self.render = env.render
+        self.metadata = env.metadata
+
+    def reset(self, seed=None, options=None):
+        if seed is not None:
+            ob = self.env.reset(seed=seed)
+        else:
+            ob = self.env.reset()
+        return ob, {}
+
+    def step(self, action):
+        observation, reward, done, info = self.env.step(action)
+        return observation, reward, done, False, info
+
+    def close(self):
+        self.env.close()
+
+### Wrappers
+class PettingZooTruncatedWrapper:
+    def __init__(self, env):
+        self.env = env
+        self.observation_space = env.observation_space
+        self.action_space = env.action_space
+        self.render = env.render
+
+    @property
+    def render_mode(self):
+        return self.env.render_mode
+
+    @property
+    def possible_agents(self):
+        return self.env.possible_agents
+
+    @property
+    def agents(self):
+        return self.env.agents
+
+    def reset(self, seed=None):
+        if seed is not None:
+            ob, info = self.env.reset(seed=seed)
+        else:
+            ob, info = self.env.reset()
+        info = {k: {} for k in ob}
+        return ob, info
+
+    def step(self, actions):
+        observations, rewards, terminals, truncations, infos = self.env.step(actions)
+        return observations, rewards, terminals, truncations, infos
+
+    def close(self):
+        self.env.close()
+
+### Misc
+def unroll_nested_dict(d):
+    if not isinstance(d, dict):
+        return d
+
+    for k, v in d.items():
+        if isinstance(v, dict):
+            for k2, v2 in unroll_nested_dict(v):
+                yield f"{k}/{k2}", v2
+        else:
+            yield k, v
+
+def silence_warnings(original_func, category=DeprecationWarning):
+    @wraps(original_func)
+    def wrapper(*args, **kwargs):
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=category)
+            return original_func(*args, **kwargs)
+    return wrapper
+
+class Suppress():
+    def __init__(self):
+        self.f = StringIO()
+        self.null_1 = os.open(os.devnull, os.O_WRONLY | os.O_TRUNC | os.O_CREAT)
+        self.null_2 = os.open(os.devnull, os.O_WRONLY | os.O_TRUNC | os.O_CREAT)
+
+    def __enter__(self):
+        # Suppress C library outputs
+        self.orig_stdout = os.dup(1)
+        self.orig_stderr = os.dup(2)
+        os.dup2(self.null_1, 1)
+        os.dup2(self.null_2, 2)
+
+        # Suppress Python outputs
+        self._stdout_redirector = redirect_stdout(self.f)
+        self._stderr_redirector = redirect_stderr(self.f)
+        self._stdout_redirector.__enter__()
+        self._stderr_redirector.__enter__()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Enable C library outputs
+        os.dup2(self.orig_stdout, 1)
+        os.dup2(self.orig_stderr, 2)
+        os.close(self.orig_stdout)
+        os.close(self.orig_stderr)
+        os.close(self.null_1)
+        os.close(self.null_2)
+
+        # Enable Python outputs
+        self._stdout_redirector.__exit__(exc_type, exc_val, exc_tb)
+        self._stderr_redirector.__exit__(exc_type, exc_val, exc_tb)
diff --git a/pufferlib/pytorch.py b/pufferlib/pytorch.py
index 4d4581cd70..fd95fd714d 100644
--- a/pufferlib/pytorch.py
+++ b/pufferlib/pytorch.py
@@ -47,7 +47,7 @@
 
 # TODO: handle discrete obs
 # Spend some time trying to break this fn with differnt obs
-def nativize_dtype(emulated: pufferlib.namespace) -> NativeDType:
+def nativize_dtype(emulated) -> NativeDType:
     # sample dtype - the dtype of what we obtain from the environment (usually bytes)
     sample_dtype: np.dtype = emulated.observation_dtype
     # structured dtype - the gym.Space converted numpy dtype
@@ -100,10 +100,7 @@ def _nativize_dtype(sample_dtype: np.dtype,
         return subviews, dtype, shape, start_offset, all_delta
 
 
-def nativize_tensor(
-    observation: torch.Tensor,
-    native_dtype: NativeDType,
-) -> torch.Tensor | dict[str, torch.Tensor]:
+def nativize_tensor(observation: torch.Tensor, native_dtype: NativeDType):
     return _nativize_tensor(observation, native_dtype)
 
 
@@ -124,9 +121,7 @@ def compilable_cast(u8, dtype):
     return u8.view(dtype)  # breaking cast
 
 
-def _nativize_tensor(
-    observation: torch.Tensor, native_dtype: NativeDType
-) -> torch.Tensor | dict[str, torch.Tensor]:
+def _nativize_tensor(observation: torch.Tensor, native_dtype: NativeDType):
     if isinstance(native_dtype, tuple):
         dtype, shape, offset, delta = native_dtype
         torch._check_is_size(offset)
@@ -157,13 +152,11 @@ def nativize_observation(observation, emulated):
     )
 
 
-def flattened_tensor_size(native_dtype: tuple[torch.dtype, tuple[int], int, int]):
+def flattened_tensor_size(native_dtype):
     return _flattened_tensor_size(native_dtype)
 
 
-def _flattened_tensor_size(
-    native_dtype: tuple[torch.dtype, tuple[int], int, int],
-) -> int:
+def _flattened_tensor_size(native_dtype):
     if isinstance(native_dtype, tuple):
         return np.prod(native_dtype[1])  # shape
     else:
@@ -277,11 +270,9 @@ def entropy_probs(logits, probs):
     p_log_p = logits * probs
     return -p_log_p.sum(-1)
 
-
-def sample_logits(logits: Union[torch.Tensor, List[torch.Tensor]],
-        action=None, is_continuous=False):
+def sample_logits(logits, action=None):
     is_discrete = isinstance(logits, torch.Tensor)
-    if is_continuous:
+    if isinstance(logits, torch.distributions.Normal):
         batch = logits.loc.shape[0]
         if action is None:
             action = logits.sample().view(batch, -1)
@@ -291,6 +282,7 @@ def sample_logits(logits: Union[torch.Tensor, List[torch.Tensor]],
         return action, log_probs, logits_entropy
     elif is_discrete:
         logits = logits.unsqueeze(0)
+    # TODO: Double check this
     else: #multi-discrete
         logits = torch.nn.utils.rnn.pad_sequence(
             [l.transpose(0,1) for l in logits], 
@@ -299,15 +291,15 @@ def sample_logits(logits: Union[torch.Tensor, List[torch.Tensor]],
         ).permute(1,2,0)
 
     normalized_logits = logits - logits.logsumexp(dim=-1, keepdim=True)
-    probs = logits_to_probs(normalized_logits)
+    probs = logits_to_probs(logits)
 
     if action is None:
+        probs = torch.nan_to_num(probs, 1e-8, 1e-8, 1e-8)
         action = torch.multinomial(probs.reshape(-1, probs.shape[-1]), 1, replacement=True)
         action = action.reshape(probs.shape[:-1])
     else:
         batch = logits[0].shape[0]
         action = action.view(batch, -1).T
-        probs = logits_to_probs(normalized_logits)
 
     assert len(logits) == len(action)
     logprob = log_prob(normalized_logits, action)
@@ -317,6 +309,3 @@ def sample_logits(logits: Union[torch.Tensor, List[torch.Tensor]],
         return action.squeeze(0), logprob.squeeze(0), logits_entropy.squeeze(0)
 
     return action.T, logprob.sum(0), logits_entropy
-
-
-
diff --git a/shared.cpp b/pufferlib/shared.cpp
similarity index 99%
rename from shared.cpp
rename to pufferlib/shared.cpp
index 1216564779..791215b0af 100644
--- a/shared.cpp
+++ b/pufferlib/shared.cpp
@@ -6,7 +6,9 @@
 #define __device__
 #endif
 
+const int max_horizon = 256;
 // [horizon]
+/*
 __host__ __device__ void gae_row(float* values, float* rewards, float* dones, float* advantages,
         float gamma, float gae_lambda, int horizon) {
     float lastgaelam = 0;
@@ -46,7 +48,6 @@ torch::Tensor gae_check(torch::Tensor values, torch::Tensor rewards,
 }
 
 // [horizon]
-const int max_horizon = 256;
 __host__ __device__ void vtrace_row(float* values, float* rewards, float* dones,
         float* importance, float* vs, float* advantages, float gamma, float rho_clip, float c_clip, int horizon) {
     float accum = 0.0;//values[horizon-1]; // Is this correct?
@@ -62,6 +63,7 @@ __host__ __device__ void vtrace_row(float* values, float* rewards, float* dones,
         vs[t] = accum + values[t];
     }
 }
+*/
 
 __host__ __device__ void puff_advantage_row(float* values, float* rewards, float* dones,
         float* importance, float* vs, float* advantages, float gamma, float lambda,
diff --git a/pufferlib/spaces.py b/pufferlib/spaces.py
index b5bab9e6cc..178513c02c 100644
--- a/pufferlib/spaces.py
+++ b/pufferlib/spaces.py
@@ -17,9 +17,10 @@ def joint_space(space, n):
             high=np.repeat(space.nvec[None] - 1, n, axis=0),
             shape=(n, len(space)), dtype=space.dtype)
     elif isinstance(space, Box):
-        return gymnasium.spaces.Box(
-            low=np.repeat(space.low[None], n, axis=0),
-            high=np.repeat(space.high[None], n, axis=0),
-            shape=(n, *space.shape), dtype=space.dtype)
+        low = np.repeat(space.low[None], n, axis=0).squeeze()
+        high = np.repeat(space.high[None], n, axis=0).squeeze()
+        shape = [n, *[e for e in space.shape if e != 1]]
+        return gymnasium.spaces.Box(low=low, high=high,
+            shape=shape, dtype=space.dtype)
     else:
         raise ValueError(f'Unsupported space: {space}')
diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py
index 49c2494a8d..63e06b186d 100644
--- a/pufferlib/sweep.py
+++ b/pufferlib/sweep.py
@@ -6,7 +6,6 @@
 from copy import deepcopy
 
 import pufferlib
-import scipy.stats
 
 import torch
 import pyro
@@ -115,7 +114,7 @@ def unnormalize(self, value):
 def _params_from_puffer_sweep(sweep_config):
     param_spaces = {}
     for name, param in sweep_config.items():
-        if name in ('method', 'name', 'metric', 'max_score'):
+        if name in ('method', 'metric', 'goal'):
             continue
 
         assert isinstance(param, dict)
@@ -152,12 +151,13 @@ def _params_from_puffer_sweep(sweep_config):
 class Hyperparameters:
     def __init__(self, config, verbose=True):
         self.spaces = _params_from_puffer_sweep(config)
-        self.flat_spaces = dict(pufferlib.utils.unroll_nested_dict(self.spaces))
+        self.flat_spaces = dict(pufferlib.unroll_nested_dict(self.spaces))
         self.num = len(self.flat_spaces)
 
         self.metric = config['metric']
-        assert self.metric['goal'] in ['maximize', 'minimize']
-        self.optimize_direction = 1 if self.metric['goal'] == 'maximize' else -1
+        goal = config['goal']
+        assert goal in ('maximize', 'minimize')
+        self.optimize_direction = 1 if goal == 'maximize' else -1
 
         self.search_centers = np.array([
             e.norm_mean for e in self.flat_spaces.values()])
@@ -191,7 +191,7 @@ def sample(self, n, mu=None, scale=1):
         return np.clip(samples, self.min_bounds, self.max_bounds)
 
     def from_dict(self, params):
-        flat_params = dict(pufferlib.utils.unroll_nested_dict(params))
+        flat_params = dict(pufferlib.unroll_nested_dict(params))
         values = []
         for key, space in self.flat_spaces.items():
             assert key in flat_params, f'Missing hyperparameter {key}'
@@ -325,15 +325,17 @@ def create_gp(x_dim, scale_length=1.0):
     optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
     return model, optimizer
 
+# TODO: Eval defaults
 class Protein:
     def __init__(self,
             sweep_config,
-            max_suggestion_cost = None,
-            resample_frequency = 5,
-            num_random_samples = 10,
+            max_suggestion_cost = 3600,
+            resample_frequency = 0,
+            num_random_samples = 50,
             global_search_scale = 1,
             random_suggestions = 1024,
             suggestions_per_pareto = 256,
+            seed_with_search_center = True,
             min_score = None,
             max_score = None,
         ):
@@ -350,6 +352,7 @@ def __init__(self,
         self.global_search_scale = global_search_scale
         self.random_suggestions = random_suggestions
         self.suggestions_per_pareto = suggestions_per_pareto
+        self.seed_with_search_center = seed_with_search_center
         self.resample_frequency = resample_frequency
         self.max_suggestion_cost = max_suggestion_cost
 
@@ -363,15 +366,11 @@ def __init__(self,
     def suggest(self, fill):
         # TODO: Clip random samples to bounds so we don't get bad high cost samples
         info = {}
-        #if self.suggestion_idx <= self.num_random_samples:
-        #    suggestions = self.hyperparameters.sample(self.random_suggestions)
-        #    best_idx = np.random.randint(0, self.random_suggestions)
-        #    best = suggestions[best_idx]
         self.suggestion_idx += 1
-        if len(self.success_observations) == 0:
+        if len(self.success_observations) == 0 and self.seed_with_search_center:
             best = self.hyperparameters.search_centers
             return self.hyperparameters.to_dict(best, fill), info
-        elif len(self.success_observations) < self.num_random_samples:
+        elif not self.seed_with_search_center and len(self.success_observations) < self.num_random_samples:
             suggestions = self.hyperparameters.sample(self.random_suggestions)
             self.suggestion = random.choice(suggestions)
             return self.hyperparameters.to_dict(self.suggestion, fill), info
@@ -392,26 +391,21 @@ def suggest(self, fill):
         # Transformed scores
         min_score = self.min_score
         if min_score is None:
-            min_score = np.min(y) - np.min(np.abs(y))
+            min_score = np.min(y)
 
         if np.min(y) < min_score - 1e-6:
             raise ValueError(f'Min score {min_score} is less than min score in data {np.min(y)}')
 
         max_score = self.max_score
         if max_score is None:
-            max_score = np.max(y) + np.max(np.abs(y))
+            max_score = np.max(y)
 
         if np.max(y) > max_score + 1e-6:
             raise ValueError(f'Max score {max_score} is greater than max score in data {np.max(y)}')
 
-        # Linearize, exp transform, linearize
-        y_norm = (y - min_score) / (max_score - min_score)
-        #yt = -np.log(1 - y_norm + eps)
-        #yt_min = np.min(yt)
-        #yt_max = np.max(yt)
-        #yt_norm = (yt - yt_min) / (yt_max - yt_min)
+        # Linearize
+        y_norm = (y - min_score) / (np.abs(max_score - min_score) + 1e-6)
 
-        #self.gp_score.set_data(params, torch.from_numpy(yt_norm))
         self.gp_score.set_data(params, torch.from_numpy(y_norm))
         self.gp_score.train()
         gp.util.train(self.gp_score, self.score_opt)
@@ -425,23 +419,20 @@ def suggest(self, fill):
         # Linear input norm creates clean 1 mean fn
         log_c_min = np.min(log_c)
         log_c_max = np.max(log_c)
-        log_c_norm = (log_c - log_c_min) / (log_c_max - log_c_min)
+        log_c_norm = (log_c - log_c_min) / (log_c_max - log_c_min + 1e-6)
 
         self.gp_cost.mean_function = lambda x: 1
         self.gp_cost.set_data(params, torch.from_numpy(log_c_norm))
         self.gp_cost.train()
-        gp.util.train(self.gp_cost, self.cost_opt)
+        try:
+            gp.util.train(self.gp_cost, self.cost_opt)
+        except:
+            breakpoint()
         self.gp_cost.eval()
 
         candidates, pareto_idxs = pareto_points(self.success_observations)
         pareto_costs = np.array([e['cost'] for e in candidates])
 
-        #cost_dists = np.abs(np.log(pareto_costs[:, None]) - np.log(pareto_costs[None, :]))
-        ###cost_dists = np.abs(pareto_costs[:, None] - pareto_costs[None, :])
-        #cost_dists += (np.max(pareto_costs) + 1)*np.eye(len(pareto_costs)) # mask self-distance
-        #idx = np.argmax(np.min(cost_dists, axis=1))
-        #search_centers = candidates[idx]['input']
-
         ### Sample suggestions
         search_centers = np.stack([e['input'] for e in candidates])
         suggestions = self.hyperparameters.sample(
@@ -456,10 +447,7 @@ def suggest(self, fill):
         gp_y_norm = gp_y_norm.numpy()
         gp_log_c_norm = gp_log_c_norm.numpy()
 
-        # Unlinearize, inverse exp transform, unlinearize
-        #gp_yt = gp_yt_norm*(yt_max - yt_min) + yt_min
-        #gp_y_norm = -(np.exp(-gp_yt) - 1 - eps)
-        #gp_y = gp_y_norm*(max_score - min_score) + min_score
+        # Unlinearize
         gp_y = gp_y_norm*(max_score - min_score) + min_score
 
         gp_log_c = gp_log_c_norm*(log_c_max - log_c_min) + log_c_min
@@ -467,94 +455,31 @@ def suggest(self, fill):
 
         gp_c_min = np.min(gp_c)
         gp_c_max = np.max(gp_c)
-        gp_c_norm = (gp_c - gp_c_min) / (gp_c_max - gp_c_min)
+        gp_c_norm = (gp_c - gp_c_min) / (gp_c_max - gp_c_min + 1e-6)
 
         pareto_y = y[pareto_idxs]
-        #pareto_yt = yt[pareto_idxs]
-        #pareto_yt_norm = yt_norm[pareto_idxs]
         pareto_c = c[pareto_idxs]
         pareto_log_c_norm = log_c_norm[pareto_idxs]
 
         max_c = np.max(c)
         min_c = np.min(c)
 
-        c_right = abs(pareto_log_c_norm[None, :] - gp_log_c_norm[:, None])
-
-        #pareto_c_norm = (pareto_c - min_c) / (max_c - min_c)
-        #gp_c_norm = (gp_c - min_c) / (max_c - min_c)
-        #c_right = np.abs(pareto_c_norm[None, :] - gp_c_norm[:, None])
-
-        #pareto_log_c_norm = (np.log(pareto_c) - log_c_min) / (log_c_max - log_c_min)
-        #c_right = np.abs(pareto_log_c_norm[None, :] - gp_log_c_norm[:, None])
-
-        sorted_dist = np.sort(c_right, axis=1)
-        #top_k = sorted_dist[:, :5]
-        #pareto_dist_weight = np.sum(top_k, axis=1) / top_k.shape[1]
-
-        nearest_idx = np.argmin(c_right, axis=1)
-        nearest_pareto_dist = np.min(c_right, axis=1)
-        nearest_pareto_y = pareto_y[nearest_idx]
-
-        #c_left = np.abs(gp_c[:, None] - pareto_c[None, :])
-        #c_left[c_left < 0] = np.inf
-        #nearest_idx = np.argmin(c_left, axis=1)
-        #nearest_pareto_yt_norm = pareto_yt_norm[nearest_idx]
-
         max_c_mask = gp_c < self.max_suggestion_cost
-        #suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * (
-        #        gp_yt_norm - nearest_pareto_yt_norm) * nearest_pareto_dist
-
-        #suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * (
-        #        gp_yt_norm - nearest_pareto_yt_norm)# / gp_c
-
-        #np.argwhere(gp_c > c)
-        cumsum_mask = c[None, :] <= np.clip(gp_c[:, None], min_c, max_c)
-        cumsum_mask = cumsum_mask * c[None, :]
-        cumsum = np.sum(cumsum_mask, axis=1) / np.sum(c)
-        target = gp_c_norm 
-        weight = target - cumsum
-
-        #if np.random.rand() < 0.5:
-        #    score = gp_y_norm
-        #else:
-        #    score = gp_y_norm * weight
-        #suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * (
-        #        score)# / gp_c
-
 
         target = 1.25*np.random.rand()
         weight = 1 - abs(target - gp_log_c_norm)
 
         suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * (
-                gp_y_norm*weight)# / gp_c
-
-        #suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * (
-        #        gp_y_norm*nearest_pareto_dist)# / gp_c
-
-        #exp_scores = np.exp(suggestion_scores)
-        #sum_exp_scores = np.sum(exp_scores)
-        #softmax_scores = exp_scores / sum_exp_scores
-        #idxs = np.arange(len(softmax_scores))
-        #best_idx = np.random.choice(idxs, p=softmax_scores)
-
-        # This works and uncovers approximate binary search when the GP is perfect
-        # Can't include cost in denom because it biases this case
-        # Instead, use conservative score and/or cost estimates
-        # Just need to figure out why the GP is overconfident
+                gp_y_norm*weight)
 
         best_idx = np.argmax(suggestion_scores)
-        #best_idx = np.argmax(gp_y_norm)
         info = dict(
             cost = gp_c[best_idx].item(),
             score = gp_y[best_idx].item(),
-            nearby = nearest_pareto_y[best_idx].item(),
-            dist = nearest_pareto_dist[best_idx].item(),
             rating = suggestion_scores[best_idx].item(),
         )
         print('Predicted -- ',
             f'Score: {info["score"]:.3f}',
-            f'Nearby: {info["nearby"]:.3f}',
-            f'Dist: {info["dist"]:.3f}',
             f'Cost: {info["cost"]:.3f}',
             f'Rating: {info["rating"]:.3f}',
         )
@@ -699,13 +624,13 @@ def _carbs_params_from_puffer_sweep(sweep_config):
 class Carbs:
     def __init__(self,
             sweep_config: dict,
-            max_suggestion_cost: float = None,
+            max_suggestion_cost: float = 3600,
             resample_frequency: int = 5,
             num_random_samples: int = 10,
         ):
 
         param_spaces = _carbs_params_from_puffer_sweep(sweep_config)
-        flat_spaces = [e[1] for e in pufferlib.utils.unroll_nested_dict(param_spaces)]
+        flat_spaces = [e[1] for e in pufferlib.unroll_nested_dict(param_spaces)]
         for e in flat_spaces:
             print(e.name, e.space)
 
diff --git a/pufferlib/utils.py b/pufferlib/utils.py
deleted file mode 100644
index 1150c76311..0000000000
--- a/pufferlib/utils.py
+++ /dev/null
@@ -1,410 +0,0 @@
-from pdb import set_trace as T
-
-from collections import OrderedDict
-from contextlib import nullcontext
-
-import numpy as np
-
-import time
-import os
-import sys
-import pickle
-import subprocess
-from contextlib import redirect_stdout, redirect_stderr, contextmanager
-from io import StringIO
-import psutil
-
-import warnings
-from functools import wraps
-
-import functools
-import inspect
-import importlib
-
-def validate_args(fn, kwargs):
-    fn_kwargs = get_init_args(fn)
-    for param, val in kwargs.items():
-        if param not in fn_kwargs:
-            raise ValueError(
-                f'Invalid argument\n{param}\nto\n{fn}\n'
-                f'which takes \n{fn_kwargs}\n'
-                f'Double check your config'
-            )
-
-def get_init_args(fn):
-    if fn is None:
-        return {}
-
-    if isinstance(fn, functools.partial):
-        return fn.keywords
-
-    sig = inspect.signature(fn)
-    kwargs = {}
-    for name, param in sig.parameters.items():
-        if name in ['env', 'policy']:
-            # Hack to avoid duplicate kwargs
-            continue
-        if param.kind == inspect.Parameter.VAR_POSITIONAL:
-            continue
-        elif param.kind == inspect.Parameter.VAR_KEYWORD:
-            continue
-        else:
-            kwargs[name] = param.default if param.default is not inspect.Parameter.empty else None
-    return kwargs
-
-
-def unroll_nested_dict(d):
-    if not isinstance(d, dict):
-        return d
-
-    for k, v in d.items():
-        if isinstance(v, dict):
-            for k2, v2 in unroll_nested_dict(v):
-                yield f"{k}/{k2}", v2
-        else:
-            yield k, v
-
-def install_requirements(env):
-    '''Pip install dependencies for specified environment'''
-    pip_install_cmd = [sys.executable, "-m", "pip", "install", "-e" f".[{env}]"]
-    proc = subprocess.run(pip_install_cmd, capture_output=True, text=True)
-    if proc.returncode != 0:
-        raise RuntimeError(f"Error installing requirements: {proc.stderr}")
-
-def install_and_import(package):
-    '''Install and import a package'''
-    try:
-        module = importlib.import_module(package)
-    except ImportError:
-        install_requirements(package)
-        module = importlib.import_module(package)
-
-    return module
-
-def silence_warnings(original_func, category=DeprecationWarning):
-    @wraps(original_func)
-    def wrapper(*args, **kwargs):
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", category=category)
-            return original_func(*args, **kwargs)
-    return wrapper
-
-def check_env(env):
-    #assert issubclass(env_cls, gym.Env), "Not a gymnasium env (are you on old gym?)"
-    assert hasattr(env, 'possible_agents')
-    assert len(env.possible_agents)
-    obs_space = env.observation_space(env.possible_agents[0])
-    atn_space = env.action_space(env.possible_agents[0])
-    for e in env.possible_agents:
-        assert env.observation_space(e) == obs_space, 'All agents must have same obs space'
-        assert env.action_space(e) == atn_space, 'All agents must have same atn space'
-
-def make_zeros_like(data):
-    if isinstance(data, dict):
-        return {k: make_zeros_like(v) for k, v in data.items()}
-    elif isinstance(data, (list, tuple)):
-        return [make_zeros_like(v) for v in data]
-    elif isinstance(data, np.ndarray):
-        return np.zeros_like(data)
-    elif isinstance(data, (int, float)):
-        return 0
-    else:
-        raise ValueError(f'Unsupported type: {type(data)}')
-
-def compare_arrays(array_1, array_2):
-    assert isinstance(array_1, np.ndarray)
-    assert isinstance(array_2, np.ndarray)
-    assert array_1.shape == array_2.shape
-    return np.allclose(array_1, array_2)
-
-def compare_dicts(dict_1, dict_2, idx):
-    assert isinstance(dict_1, (dict, OrderedDict))
-    assert isinstance(dict_2, (dict, OrderedDict))
-
-    if not all(k in dict_2 for k in dict_1):
-        raise ValueError("Keys do not match between dictionaries.")
-
-    for k, v in dict_1.items():
-        if not compare_space_samples(v, dict_2[k], idx):
-            return False
-
-    return True
-
-def compare_lists(list_1, list_2, idx):
-    assert isinstance(list_1, (list, tuple))
-    assert isinstance(list_2, (list, tuple))
-
-    if len(list_1) != len(list_2):
-        raise ValueError("Lengths do not match between lists/tuples.")
-
-    for v1, v2 in zip(list_1, list_2):
-        if not compare_space_samples(v1, v2, idx):
-            return False
-        
-    return True
-    
-def compare_space_samples(sample_1, sample_2, sample_2_batch_idx=None):
-    '''Compare two samples from the same space
-    
-    Optionally, sample_2 may be a batch of samples from the same space
-    concatenated along the first dimension of the leaves. In this case,
-    sample_2_batch_idx specifies which sample to compare.
-    '''
-    if isinstance(sample_1, (dict, OrderedDict)):
-        return compare_dicts(sample_1, sample_2, sample_2_batch_idx)
-    elif isinstance(sample_1, (list, tuple)):
-        return compare_lists(sample_1, sample_2, sample_2_batch_idx)
-    elif isinstance(sample_1, np.ndarray):
-        assert isinstance(sample_2, np.ndarray)
-        if sample_2_batch_idx is not None:
-            sample_2 = sample_2[sample_2_batch_idx]
-        return compare_arrays(sample_1, sample_2)
-    elif isinstance(sample_1, (int, float)):
-        if sample_2_batch_idx is not None:
-            sample_2 = sample_2[sample_2_batch_idx]
-        if isinstance(sample_2, np.ndarray):
-            assert sample_2.size == 1, "Cannot compare scalar to non-scalar."
-            sample_2 = sample_2[0]
-        return sample_1 == sample_2
-    else:
-        raise ValueError(f"Unsupported type: {type(sample_1)}")
-
-def _get_dtype_bounds(dtype):
-    if dtype == bool:
-        return 0, 1
-    elif np.issubdtype(dtype, np.integer):
-        return np.iinfo(dtype).min, np.iinfo(dtype).max
-    elif np.issubdtype(dtype, np.unsignedinteger):
-        return np.iinfo(dtype).min, np.iinfo(dtype).max
-    elif np.issubdtype(dtype, np.floating):
-        # Gym fails on float64
-        return np.finfo(np.float32).min, np.finfo(np.float32).max
-    else:
-        raise ValueError(f"Unsupported dtype: {dtype}")
-
-def is_dict_space(space):
-    # Compatible with gym/gymnasium
-    return type(space).__name__ == 'Dict'
-
-def is_multiagent(env):
-    import pettingzoo
-    import gym
-    if inspect.isclass(env):
-        env_cls = env
-    else:
-        env_cls = type(env)
-
-    if not issubclass(env_cls, pettingzoo.AECEnv) and not issubclass(env_cls, pettingzoo.ParallelEnv):
-        assert issubclass(env_cls, gym.Env), 'Environment must subclass pettingzoo.AECEnv/ParallelEnv or gym.Env'
-        return False
-    return True
-
-def current_datetime():
-    return time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime())
-
-def myprint(d):
-    stack = d.items()
-    while stack:
-        k, v = stack.pop()
-        if isinstance(v, dict):
-            stack.extend(v.iteritems())
-        else:
-            print("%s: %s" % (k, v))
-
-class RandomState:
-    def __init__(self, seed):
-        self.rng = np.random.RandomState(seed)
-
-    def random(self):
-        return self.rng.random()
-
-    def probabilistic_round(self, n):
-            frac, integer = np.modf(n)
-            if self.random() < frac:
-                return int(integer) + 1
-            else:
-                return int(integer)
-
-    def sample(self, ary, n):
-        n_rounded = self.probabilistic_round(n)
-        return self.rng.choice(ary, n_rounded, replace=False).tolist()
-
-    def choice(self, ary):
-        return self.sample(ary, 1)[0]
-
-def format_bytes(size):
-    if size >= 1024 ** 4:
-        return f'{size / (1024 ** 4):.2f} TB'
-    elif size >= 1024 ** 3:
-        return f'{size / (1024 ** 3):.2f} GB'
-    elif size >= 1024 ** 2:
-        return f'{size / (1024 ** 2):.2f} MB'
-    elif size >= 1024:
-        return f'{size / 1024:.2f} KB'
-    else:
-        return f'{size} B'
-
-# TODO: 5% perf gain by doing cuda sync less frequently
-class Profiler:
-    def __init__(self, elapsed=True, calls=True, memory=False,
-            pytorch_memory=False, sync_cuda=True, frequency=10, amp_context=nullcontext()):
-        self.elapsed = 0 if elapsed else None
-        self.calls = 0 if calls else None
-        self.memory = None
-        self.pytorch_memory = None
-        self.prev = 0
-        self.delta = 0
-        
-        self.track_elapsed = elapsed
-        self.track_calls = calls
-        self.track_memory = memory
-        self.track_pytorch_memory = pytorch_memory
-        self.sync_cuda = sync_cuda
-        self.frequency = frequency
-        self.epoch = 0
-        
-        if memory:
-            self.process = psutil.Process()
-
-        if pytorch_memory or sync_cuda:
-            import torch
-            self.torch = torch
-
-        self.amp_context = amp_context
-
-    '''
-    @property
-    def serial(self):
-        return {
-            'elapsed': self.elapsed,
-            'calls': self.calls,
-            'memory': self.memory,
-            'pytorch_memory': self.pytorch_memory,
-            'delta': self.delta
-        }
-
-    @property
-    def delta(self):
-        ret = self.elapsed - self.prev if self.elapsed is not None else None
-        self.prev = self.elapsed
-        return ret
-    '''
-
-    def __call__(self, epoch):
-        self.epoch = epoch
-        return self
-
-    def __enter__(self):
-        if self.epoch % self.frequency != 0:
-            return self
-
-        if self.sync_cuda:
-            self.torch.cuda.synchronize()
-        self.amp_context.__enter__()
-        if self.track_elapsed:
-            self.start_time = time.perf_counter()
-        if self.track_memory:
-            self.start_mem = self.process.memory_info().rss
-        if self.track_pytorch_memory:
-            self.start_torch_mem = self.torch.cuda.memory_allocated()
-        return self
-
-    def __exit__(self, *args):
-        if self.epoch % self.frequency != 0:
-            return self
-
-        self.amp_context.__exit__(None, None, None)
-        if self.sync_cuda:
-            self.torch.cuda.synchronize()
-        if self.track_elapsed:
-            self.end_time = time.perf_counter()
-            self.delta += self.end_time - self.start_time
-            self.elapsed += self.delta
-        if self.track_calls:
-            self.calls += 1
-        if self.track_memory:
-            self.end_mem = self.process.memory_info().rss
-            self.memory = self.end_mem - self.start_mem
-        if self.track_pytorch_memory:
-            self.end_torch_mem = self.torch.cuda.memory_allocated()
-            self.pytorch_memory = self.end_torch_mem - self.start_torch_mem
-
-    def __repr__(self):
-        parts = []
-        if self.track_elapsed:
-            parts.append(f'Elapsed: {self.elapsed:.4f} s')
-        if self.track_calls:
-            parts.append(f'Calls: {self.calls}')
-        if self.track_memory:
-            parts.append(f'Memory: {format_bytes(self.memory)}')
-        if self.track_pytorch_memory:
-            parts.append(f'PyTorch Memory: {format_bytes(self.pytorch_memory)}')
-        return ", ".join(parts)
-
-    # Aliases for use without context manager
-    start = __enter__
-    stop = __exit__
-
-def profile(func):
-    name = func.__name__
-
-    def wrapper(*args, **kwargs):
-        self = args[0]
-
-        if not hasattr(self, '_timers'):
-            self._timers = {}
-
-        if name not in self._timers:
-            self._timers[name] = Profiler()
-
-        timer = self._timers[name]
-
-        with timer:
-            result = func(*args, **kwargs)
-
-        return result
-
-    return wrapper
-
-def aggregate_profilers(profiler_dicts):
-    merged = {}
-
-    for key in list(profiler_dicts[0].keys()):
-        merged[key] = Profiler()
-        for prof_dict in profiler_dicts:
-            merged[key].elapsed += prof_dict[key].elapsed
-            merged[key].calls += prof_dict[key].calls
-
-    return merged
-
-class Suppress():
-    def __init__(self):
-        self.f = StringIO()
-        self.null_1 = os.open(os.devnull, os.O_WRONLY | os.O_TRUNC | os.O_CREAT)
-        self.null_2 = os.open(os.devnull, os.O_WRONLY | os.O_TRUNC | os.O_CREAT)
-
-    def __enter__(self):
-        # Suppress C library outputs
-        self.orig_stdout = os.dup(1)
-        self.orig_stderr = os.dup(2)
-        os.dup2(self.null_1, 1)
-        os.dup2(self.null_2, 2)
-
-        # Suppress Python outputs
-        self._stdout_redirector = redirect_stdout(self.f)
-        self._stderr_redirector = redirect_stderr(self.f)
-        self._stdout_redirector.__enter__()
-        self._stderr_redirector.__enter__()
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        # Enable C library outputs
-        os.dup2(self.orig_stdout, 1)
-        os.dup2(self.orig_stderr, 2)
-        os.close(self.orig_stdout)
-        os.close(self.orig_stderr)
-        os.close(self.null_1)
-        os.close(self.null_2)
-
-        # Enable Python outputs
-        self._stdout_redirector.__exit__(exc_type, exc_val, exc_tb)
-        self._stderr_redirector.__exit__(exc_type, exc_val, exc_tb)
diff --git a/pufferlib/vector.py b/pufferlib/vector.py
index 7b5d008f88..b5d2a1a704 100644
--- a/pufferlib/vector.py
+++ b/pufferlib/vector.py
@@ -6,11 +6,8 @@
 import time
 import psutil
 
-from pufferlib import namespace
 from pufferlib.emulation import GymnasiumPufferEnv, PettingZooPufferEnv
-from pufferlib.environment import PufferEnv, set_buffers
-from pufferlib.exceptions import APIUsageError
-from pufferlib.namespace import Namespace
+from pufferlib import PufferEnv, set_buffers
 import pufferlib.spaces
 import gymnasium
 
@@ -24,19 +21,19 @@
 
 def recv_precheck(vecenv):
     if vecenv.flag != RECV:
-        raise APIUsageError('Call reset before stepping')
+        raise pufferlib.APIUsageError('Call reset before stepping')
 
     vecenv.flag = SEND
 
 def send_precheck(vecenv, actions):
     if vecenv.flag != SEND:
-        raise APIUsageError('Call (async) reset + recv before sending')
+        raise pufferlib.APIUsageError('Call (async) reset + recv before sending')
 
     actions = np.asarray(actions)
     if not vecenv.initialized:
         vecenv.initialized = True
         if not vecenv.action_space.contains(actions):
-            raise APIUsageError('Actions do not match action space')
+            raise pufferlib.APIUsageError('Actions do not match action space')
 
     vecenv.flag = RECV
     return actions
@@ -77,7 +74,7 @@ def __init__(self, env_creators, env_args, env_kwargs, num_envs, buf=None, seed=
         ptr = 0
         for i in range(num_envs):
             end = ptr + self.driver_env.num_agents
-            buf_i = namespace(
+            buf_i = dict(
                 observations=self.observations[ptr:end],
                 rewards=self.rewards[ptr:end],
                 terminals=self.terminals[ptr:end],
@@ -102,7 +99,7 @@ def __init__(self, env_creators, env_args, env_kwargs, num_envs, buf=None, seed=
     def _avg_infos(self):
         infos = {}
         for e in self.infos:
-            for k, v in pufferlib.utils.unroll_nested_dict(e):
+            for k, v in pufferlib.unroll_nested_dict(e):
                 if k not in infos:
                     infos[k] = []
 
@@ -178,25 +175,25 @@ def _worker_process(env_creators, env_args, env_kwargs, obs_shape, obs_dtype, at
     # Environments read and write directly to shared memory
     shape = (num_workers, num_envs*num_agents)
     atn_arr = np.ndarray((*shape, *atn_shape),
-        dtype=atn_dtype, buffer=shm.actions)[worker_idx]
-    buf = namespace(
+        dtype=atn_dtype, buffer=shm['actions'])[worker_idx]
+    buf = dict(
         observations=np.ndarray((*shape, *obs_shape),
-            dtype=obs_dtype, buffer=shm.observations)[worker_idx],
-        rewards=np.ndarray(shape, dtype=np.float32, buffer=shm.rewards)[worker_idx],
-        terminals=np.ndarray(shape, dtype=bool, buffer=shm.terminals)[worker_idx],
-        truncations=np.ndarray(shape, dtype=bool, buffer=shm.truncateds)[worker_idx],
-        masks=np.ndarray(shape, dtype=bool, buffer=shm.masks)[worker_idx],
+            dtype=obs_dtype, buffer=shm['observations'])[worker_idx],
+        rewards=np.ndarray(shape, dtype=np.float32, buffer=shm['rewards'])[worker_idx],
+        terminals=np.ndarray(shape, dtype=bool, buffer=shm['terminals'])[worker_idx],
+        truncations=np.ndarray(shape, dtype=bool, buffer=shm['truncateds'])[worker_idx],
+        masks=np.ndarray(shape, dtype=bool, buffer=shm['masks'])[worker_idx],
         actions=atn_arr,
     )
-    buf.masks[:] = True
+    buf['masks'][:] = True
 
     if is_native and num_envs == 1:
         envs = env_creators[0](*env_args[0], **env_kwargs[0], buf=buf, seed=seed)
     else:
         envs = Serial(env_creators, env_args, env_kwargs, num_envs, buf=buf, seed=seed*num_envs)
 
-    semaphores=np.ndarray(num_workers, dtype=np.uint8, buffer=shm.semaphores)
-    notify=np.ndarray(num_workers, dtype=bool, buffer=shm.notify)
+    semaphores=np.ndarray(num_workers, dtype=np.uint8, buffer=shm['semaphores'])
+    notify=np.ndarray(num_workers, dtype=bool, buffer=shm['notify'])
     start = time.time()
     while True:
         if notify[worker_idx]:
@@ -249,7 +246,7 @@ def __init__(self, env_creators, env_args, env_kwargs,
         import psutil
         cpu_cores = psutil.cpu_count(logical=False)
         if num_workers > cpu_cores and not overwork:
-            raise APIUsageError(' '.join([
+            raise pufferlib.APIUsageError(' '.join([
                 f'num_workers ({num_workers}) > hardware cores ({cpu_cores}) is disallowed by default.',
                 'PufferLib multiprocessing is heavily optimized for 1 process per hardware core.',
                 'If you really want to do this, set overwork=True (--vec-overwork in our demo.py).',
@@ -258,7 +255,7 @@ def __init__(self, env_creators, env_args, env_kwargs,
         num_batches = num_envs / batch_size
         if zero_copy and num_batches != int(num_batches):
             # This is so you can have n equal buffers
-            raise APIUsageError(
+            raise pufferlib.APIUsageError(
                 'zero_copy: num_envs must be divisible by batch_size')
 
         self.num_environments = num_envs
@@ -300,7 +297,7 @@ def __init__(self, env_creators, env_args, env_kwargs,
         from multiprocessing import RawArray, set_start_method
         # Mac breaks without setting fork... but setting it breaks sweeps on 2nd run
         #set_start_method('fork')
-        self.shm = namespace(
+        self.shm = dict(
             observations=RawArray(obs_ctype, num_agents * int(np.prod(obs_shape))),
             actions=RawArray(atn_ctype, num_agents * int(np.prod(atn_shape))),
             rewards=RawArray('f', num_agents),
@@ -314,18 +311,18 @@ def __init__(self, env_creators, env_args, env_kwargs,
         self.obs_batch_shape = (self.agents_per_batch, *obs_shape)
         self.atn_batch_shape = (self.workers_per_batch, agents_per_worker, *atn_shape)
         self.actions = np.ndarray((*shape, *atn_shape),
-            dtype=atn_dtype, buffer=self.shm.actions)
-        self.buf = namespace(
+            dtype=atn_dtype, buffer=self.shm['actions'])
+        self.buf = dict(
             observations=np.ndarray((*shape, *obs_shape),
-                dtype=obs_dtype, buffer=self.shm.observations),
-            rewards=np.ndarray(shape, dtype=np.float32, buffer=self.shm.rewards),
-            terminals=np.ndarray(shape, dtype=bool, buffer=self.shm.terminals),
-            truncations=np.ndarray(shape, dtype=bool, buffer=self.shm.truncateds),
-            masks=np.ndarray(shape, dtype=bool, buffer=self.shm.masks),
-            semaphores=np.ndarray(num_workers, dtype=np.uint8, buffer=self.shm.semaphores),
-            notify=np.ndarray(num_workers, dtype=bool, buffer=self.shm.notify),
+                dtype=obs_dtype, buffer=self.shm['observations']),
+            rewards=np.ndarray(shape, dtype=np.float32, buffer=self.shm['rewards']),
+            terminals=np.ndarray(shape, dtype=bool, buffer=self.shm['terminals']),
+            truncations=np.ndarray(shape, dtype=bool, buffer=self.shm['truncateds']),
+            masks=np.ndarray(shape, dtype=bool, buffer=self.shm['masks']),
+            semaphores=np.ndarray(num_workers, dtype=np.uint8, buffer=self.shm['semaphores']),
+            notify=np.ndarray(num_workers, dtype=bool, buffer=self.shm['notify']),
         )
-        self.buf.semaphores[:] = MAIN
+        self.buf['semaphores'][:] = MAIN 
 
         from multiprocessing import Pipe, Process
         self.send_pipes, w_recv_pipes = zip(*[Pipe() for _ in range(num_workers)])
@@ -359,13 +356,13 @@ def recv(self):
             # Bandaid patch for new experience buffer desync
             if self.sync_traj:
                 worker = self.waiting_workers[0]
-                sem = self.buf.semaphores[worker]
+                sem = self.buf['semaphores'][worker]
                 if sem >= MAIN:
                     self.waiting_workers.pop(0)
                     self.ready_workers.append(worker)
             else:
                 worker = self.waiting_workers.pop(0)
-                sem = self.buf.semaphores[worker]
+                sem = self.buf['semaphores'][worker]
                 if sem >= MAIN:
                     self.ready_workers.append(worker)
                 else:
@@ -427,10 +424,10 @@ def recv(self):
         self.w_slice = w_slice
         buf = self.buf
 
-        o = buf.observations[w_slice].reshape(self.obs_batch_shape)
-        r = buf.rewards[w_slice].ravel()
-        d = buf.terminals[w_slice].ravel()
-        t = buf.truncations[w_slice].ravel()
+        o = buf['observations'][w_slice].reshape(self.obs_batch_shape)
+        r = buf['rewards'][w_slice].ravel()
+        d = buf['terminals'][w_slice].ravel()
+        t = buf['truncations'][w_slice].ravel()
 
         infos = []
         for i in s_range:
@@ -439,7 +436,7 @@ def recv(self):
                 self.infos[i] = []
 
         agent_ids = self.agent_ids[w_slice].ravel()
-        m = buf.masks[w_slice].ravel()
+        m = buf['masks'][w_slice].ravel()
         self.batch_mask = m
 
         return o, r, d, t, infos, agent_ids, m
@@ -450,7 +447,7 @@ def send(self, actions):
         
         idxs = self.w_slice
         self.actions[idxs] = actions
-        self.buf.semaphores[idxs] = STEP
+        self.buf['semaphores'][idxs] = STEP
 
     def async_reset(self, seed=0):
         self.flag = RECV
@@ -462,42 +459,16 @@ def async_reset(self, seed=0):
         self.waiting_workers = list(range(self.num_workers))
         self.infos = [[] for _ in range(self.num_workers)]
 
-        self.buf.semaphores[:] = RESET
+        self.buf['semaphores'][:] = RESET
         for i in range(self.num_workers):
             start = i*self.envs_per_worker
             end = (i+1)*self.envs_per_worker
             self.send_pipes[i].send(seed+i)
 
     def notify(self):
-        self.buf.notify[:] = True
+        self.buf['notify'][:] = True
 
     def close(self):
-        '''
-        while self.waiting_workers:
-            worker = self.waiting_workers.pop(0)
-            sem = self.buf.semaphores[worker]
-            if sem >= MAIN:
-                self.ready_workers.append(worker)
-                if sem == INFO:
-                    self.recv_pipes[worker].recv()
-            else:
-                self.waiting_workers.append(worker)
-
-        self.buf.semaphores[:] = CLOSE
-        self.waiting_workers = list(range(self.num_workers))
-
-        while self.waiting_workers:
-            worker = self.waiting_workers.pop(0)
-            sem = self.buf.semaphores[worker]
-            if sem >= MAIN:
-                self.ready_workers.append(worker)
-                if sem == INFO:
-                    self.recv_pipes[worker].recv()
- 
-            else:
-                self.waiting_workers.append(worker)
-        '''
-
         for p in self.processes:
             p.terminate()
 
@@ -631,35 +602,50 @@ def close(self):
 
 def make(env_creator_or_creators, env_args=None, env_kwargs=None, backend=PufferEnv, num_envs=1, seed=0, **kwargs):
     if num_envs < 1:
-        raise APIUsageError('num_envs must be at least 1')
+        raise pufferlib.APIUsageError('num_envs must be at least 1')
     if num_envs != int(num_envs):
-        raise APIUsageError('num_envs must be an integer')
+        raise pufferlib.APIUsageError('num_envs must be an integer')
+
+    if isinstance(backend, str):
+        try:
+            backend = getattr(pufferlib.vector, backend)
+        except:
+            raise pufferlib.APIUsageError(f'Invalid backend: {backend}')
 
     if backend == PufferEnv:
         env_args = env_args or []
         env_kwargs = env_kwargs or {}
         vecenv = env_creator_or_creators(*env_args, **env_kwargs)
         if not isinstance(vecenv, PufferEnv):
-            raise APIUsageError('Native vectorization requires a native PufferEnv. Use Serial or Multiprocessing instead.')
+            raise pufferlib.APIUsageError('Native vectorization requires a native PufferEnv. Use Serial or Multiprocessing instead.')
         if num_envs != 1:
-            raise APIUsageError('Native vectorization is for PufferEnvs that handle all per-process vectorization internally. If you want to run multiple separate Python instances on a single process, use Serial or Multiprocessing instead')
+            raise pufferlib.APIUsageError('Native vectorization is for PufferEnvs that handle all per-process vectorization internally. If you want to run multiple separate Python instances on a single process, use Serial or Multiprocessing instead')
 
         return vecenv
 
     if 'num_workers' in kwargs:
-        num_workers = kwargs['num_workers']
+        if kwargs['num_workers'] == 'auto':
+            kwargs['num_workers'] = num_envs
+        
+
         # TODO: None?
-        envs_per_worker = num_envs / num_workers
+        envs_per_worker = num_envs / kwargs['num_workers']
         if envs_per_worker != int(envs_per_worker):
-            raise APIUsageError('num_envs must be divisible by num_workers')
+            raise pufferlib.APIUsageError('num_envs must be divisible by num_workers')
 
         if 'batch_size' in kwargs:
+            if kwargs['batch_size'] == 'auto':
+                if num_envs == 1:
+                    kwargs['batch_size'] = 1
+                else:
+                    kwargs['batch_size'] = num_envs // 2
+
             batch_size = kwargs['batch_size']
             if batch_size is None:
                 batch_size = num_envs
 
             if batch_size % envs_per_worker != 0:
-                raise APIUsageError(
+                raise pufferlib.APIUsageError(
                     'batch_size must be divisible by (num_envs / num_workers)')
         
  
@@ -677,19 +663,19 @@ def make(env_creator_or_creators, env_args=None, env_kwargs=None, backend=Puffer
         env_creators = env_creator_or_creators
 
     if len(env_creators) != num_envs:
-        raise APIUsageError('env_creators must be a list of length num_envs')
+        raise pufferlib.APIUsageError('env_creators must be a list of length num_envs')
     if len(env_args) != num_envs:
-        raise APIUsageError('env_args must be a list of length num_envs')
+        raise pufferlib.APIUsageError('env_args must be a list of length num_envs')
     if len(env_kwargs) != num_envs:
-        raise APIUsageError('env_kwargs must be a list of length num_envs')
+        raise pufferlib.APIUsageError('env_kwargs must be a list of length num_envs')
 
     for i in range(num_envs):
         if not callable(env_creators[i]):
-            raise APIUsageError('env_creators must be a list of callables')
+            raise pufferlib.APIUsageError('env_creators must be a list of callables')
         if not isinstance(env_args[i], (list, tuple)):
-            raise APIUsageError('env_args must be a list of lists or tuples')
-        if not isinstance(env_kwargs[i], (dict, Namespace)):
-            raise APIUsageError('env_kwargs must be a list of dictionaries')
+            raise pufferlib.APIUsageError('env_args must be a list of lists or tuples')
+        if not isinstance(env_kwargs[i], dict):
+            raise pufferlib.APIUsageError('env_kwargs must be a list of dictionaries')
 
     # Keeps batch size consistent when debugging with Serial backend
     if backend is Serial and 'batch_size' in kwargs:
@@ -701,7 +687,7 @@ def make(env_creator_or_creators, env_args=None, env_kwargs=None, backend=Puffer
     # Sanity check args
     for k in kwargs:
         if k not in ['num_workers', 'batch_size', 'zero_copy', 'overwork', 'backend']:
-            raise APIUsageError(f'Invalid argument: {k}')
+            raise pufferlib.APIUsageError(f'Invalid argument: {k}')
 
     # TODO: First step action space check
     
@@ -714,28 +700,28 @@ def make_seeds(seed, num_envs):
     err = f'seed {seed} must be an integer or a list of integers'
     if isinstance(seed, (list, tuple)):
         if len(seed) != num_envs:
-            raise APIUsageError(err)
+            raise pufferlib.APIUsageError(err)
 
         return seed
 
-    raise APIUsageError(err)
+    raise pufferlib.APIUsageError(err)
 
 def check_envs(envs, driver):
     valid = (PufferEnv, GymnasiumPufferEnv, PettingZooPufferEnv)
     if not isinstance(driver, valid):
-        raise APIUsageError(f'env_creator must be {valid}')
+        raise pufferlib.APIUsageError(f'env_creator must be {valid}')
 
     driver_obs = driver.single_observation_space
     driver_atn = driver.single_action_space
     for env in envs:
         if not isinstance(env, valid):
-            raise APIUsageError(f'env_creators must be {valid}')
+            raise pufferlib.APIUsageError(f'env_creators must be {valid}')
         obs_space = env.single_observation_space
         if obs_space != driver_obs:
-            raise APIUsageError(f'\n{obs_space}\n{driver_obs} obs space mismatch')
+            raise pufferlib.APIUsageError(f'\n{obs_space}\n{driver_obs} obs space mismatch')
         atn_space = env.single_action_space
         if atn_space != driver_atn:
-            raise APIUsageError(f'\n{atn_space}\n{driver_atn} atn space mismatch')
+            raise pufferlib.APIUsageError(f'\n{atn_space}\n{driver_atn} atn space mismatch')
 
 def autotune(env_creator, batch_size, max_envs=194, model_forward_s=0.0,
         max_env_ram_gb=32, max_batch_vram_gb=0.05, time_per_test=5): 
@@ -891,7 +877,7 @@ def autotune(env_creator, batch_size, max_envs=194, model_forward_s=0.0,
     ))
 
     for config in configs:
-        with pufferlib.utils.Suppress():
+        with pufferlib.Suppress():
             envs = make(env_creator, **config)
             envs.reset()
         actions = [envs.action_space.sample() for _ in range(1000)]
diff --git a/pufferlib/version.py b/pufferlib/version.py
deleted file mode 100644
index 13ce17d8e8..0000000000
--- a/pufferlib/version.py
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = '2.0.6'
diff --git a/pufferlib/wrappers.py b/pufferlib/wrappers.py
deleted file mode 100644
index 3bda419715..0000000000
--- a/pufferlib/wrappers.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from pdb import set_trace as T
-
-class GymToGymnasium:
-    def __init__(self, env):
-        self.env = env
-        self.observation_space = env.observation_space
-        self.action_space = env.action_space
-        self.render = env.render
-        self.metadata = env.metadata
-
-    def reset(self, seed=None, options=None):
-        if seed is not None:
-            ob = self.env.reset(seed=seed)
-        else:
-            ob = self.env.reset()
-        return ob, {}
-
-    def step(self, action):
-        observation, reward, done, info = self.env.step(action)
-        return observation, reward, done, False, info
-
-    def close(self):
-        self.env.close()
-
-class PettingZooTruncatedWrapper:
-    def __init__(self, env):
-        self.env = env
-        self.observation_space = env.observation_space
-        self.action_space = env.action_space
-        self.render = env.render
-
-    @property
-    def render_mode(self):
-        return self.env.render_mode
-
-    @property
-    def possible_agents(self):
-        return self.env.possible_agents
-
-    @property
-    def agents(self):
-        return self.env.agents
-
-    def reset(self, seed=None):
-        if seed is not None:
-            ob, info = self.env.reset(seed=seed)
-        else:
-            ob, info = self.env.reset()
-        info = {k: {} for k in ob}
-        return ob, info
-
-    def step(self, actions):
-        observations, rewards, terminals, truncations, infos = self.env.step(actions)
-        return observations, rewards, terminals, truncations, infos
-
-    def close(self):
-        self.env.close()
diff --git a/pyproject.toml b/pyproject.toml
index b4d35b4021..d1bc56ce11 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,3 @@
 [build-system]
-requires = ["setuptools", "wheel", "Cython", "numpy"]
+requires = ["setuptools", "wheel", "Cython", "numpy", "torch"]
 build-backend = "setuptools.build_meta"
diff --git a/setup.py b/setup.py
index c43853440e..4b6a8414a7 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,7 @@
+#TODO:
+# --no-build-isolation for 5090
+# Make c and torch compile at the same time
+
 from setuptools import find_packages, find_namespace_packages, setup, Extension
 from Cython.Build import cythonize
 import numpy
@@ -6,46 +10,143 @@
 import zipfile
 import tarfile
 import platform
+import shutil
+
+from setuptools.command.build_ext import build_ext
+from torch.utils import cpp_extension
+from torch.utils.cpp_extension import (
+    CppExtension,
+    CUDAExtension,
+    BuildExtension,
+    CUDA_HOME,
+)
+
 	
-#  python3 setup.py built_ext --inplace
+import pufferlib
+VERSION = pufferlib.__version__
+
+# Build with DEBUG=1 to enable debug symbols
+DEBUG = os.getenv("DEBUG", "0") == "1"
+
+# Put C env names here. PufferLib will look for
+# pufferlib/ocean/<name>/binding.c
+c_extensions_names = [
+    'gpudrive',
+    'squared',
+    'pong',
+    'breakout',
+    'enduro',
+    'blastar',
+    'grid',
+    'nmmo3',
+    'tactical',
+    'go',
+    'cartpole'
+]
 
-VERSION = '2.0.6'
+# Put full paths to Cython extension here
+# Note we are trying to move away from Cython,
+# because our C envs are lighter weigh and
+# easier to debug (you can run gdb --args python ...)
+cython_extension_paths = [
+    'pufferlib/ocean/moba/cy_moba',
+    'pufferlib/ocean/snake/cy_snake',
+    'pufferlib/ocean/connect4/cy_connect4',
+    'pufferlib/ocean/tripletriad/cy_tripletriad',
+    'pufferlib/ocean/rware/cy_rware',
+    'pufferlib/ocean/trash_pickup/cy_trash_pickup',
+    'pufferlib/ocean/cpr/cy_cpr',
+    'pufferlib/ocean/tower_climb/cy_tower_climb',
+]
 
+# Build raylib for your platform
 RAYLIB_BASE = 'https://github.com/raysan5/raylib/releases/download/5.5/'
 RAYLIB_NAME = 'raylib-5.5_macos' if platform.system() == "Darwin" else 'raylib-5.5_linux_amd64'
-
-RAYLIB_LINUX = 'raylib-5.5_linux_amd64'
-RAYLIB_LINUX_URL = RAYLIB_BASE + RAYLIB_LINUX + '.tar.gz'
 RLIGHTS_URL = 'https://raw.githubusercontent.com/raysan5/raylib/refs/heads/master/examples/shaders/rlights.h'
 
-if not os.path.exists(RAYLIB_LINUX):
-    urllib.request.urlretrieve(RAYLIB_LINUX_URL, RAYLIB_LINUX + '.tar.gz')
-    with tarfile.open(RAYLIB_LINUX + '.tar.gz', 'r') as tar_ref:
-        tar_ref.extractall()
-
-    os.remove(RAYLIB_LINUX + '.tar.gz')
-    urllib.request.urlretrieve(RLIGHTS_URL, 'raylib-5.5_linux_amd64/include/rlights.h')
+def download_raylib(platform, url):
+    if not os.path.exists(platform):
+        urllib.request.urlretrieve(url, platform + '.tar.gz')
+        with tarfile.open(platform + '.tar.gz', 'r') as tar_ref:
+            tar_ref.extractall()
 
-RAYLIB_MACOS = 'raylib-5.5_macos'
-RAYLIB_MACOS_URL = RAYLIB_BASE + RAYLIB_MACOS + '.tar.gz'
-if not os.path.exists(RAYLIB_MACOS):
-    urllib.request.urlretrieve(RAYLIB_MACOS_URL, RAYLIB_MACOS + '.tar.gz')
-    with tarfile.open(RAYLIB_MACOS + '.tar.gz', 'r') as tar_ref:
-        tar_ref.extractall()
-
-    os.remove(RAYLIB_MACOS + '.tar.gz')
-    urllib.request.urlretrieve(RLIGHTS_URL, 'raylib-5.5_macos/include/rlights.h')
+        os.remove(platform + '.tar.gz')
+        urllib.request.urlretrieve(RLIGHTS_URL, platform + '/include/rlights.h')
 
 
 RAYLIB_WASM = 'raylib-5.5_webassembly'
 RAYLIB_WASM_URL = RAYLIB_BASE + RAYLIB_WASM + '.zip'
-if not os.path.exists(RAYLIB_WASM):
-    urllib.request.urlretrieve(RAYLIB_WASM_URL, RAYLIB_WASM + '.zip')
-    with zipfile.ZipFile(RAYLIB_WASM + '.zip', 'r') as zip_ref:
-        zip_ref.extractall()
+download_raylib(RAYLIB_WASM, RAYLIB_WASM_URL)
 
-    os.remove(RAYLIB_WASM + '.zip')
-    urllib.request.urlretrieve(RLIGHTS_URL, 'raylib-5.5_webassembly/include/rlights.h')
+# Shared compile args for all platforms
+extra_compile_args = [
+    '-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION',
+    '-DPLATFORM_DESKTOP',
+]
+extra_link_args = [
+    '-fwrapv'
+]
+cxx_args = [
+    '-fdiagnostics-color=always',
+]
+nvcc_args = []
+
+if DEBUG:
+    extra_compile_args += [
+        '-O0',
+        '-g',
+        '-fsanitize=address,undefined,bounds,pointer-overflow,leak',
+    ]
+    extra_link_args += [
+        '-g',
+    ]
+    cxx_args += [
+        '-O0',
+        '-g',
+    ]
+    nvcc_args += [
+        '-O0',
+        '-g',
+    ]
+else:
+    extra_compile_args += [
+        '-O2',
+    ]
+    extra_link_args += [
+        '-O2',
+    ]
+    cxx_args += [
+        '-O3',
+    ]
+    nvcc_args += [
+        '-O3',
+    ]
+
+system = platform.system()
+if system == 'Linux':
+    extra_compile_args += [
+        '-Wno-alloc-size-larger-than',
+        '-fmax-errors=3',
+    ]
+    extra_link_args += [
+        '-Bsymbolic-functions',
+    ]
+    RAYLIB_LINUX = 'raylib-5.5_linux_amd64'
+    RAYLIB_LINUX_URL = RAYLIB_BASE + RAYLIB_LINUX + '.tar.gz'
+    download_raylib(RAYLIB_LINUX, RAYLIB_LINUX_URL)
+elif system == 'Darwin':
+    extra_compile_args += [
+    ]
+    extra_link_args += [
+        '-framework', 'Cocoa',
+        '-framework', 'OpenGL',
+        '-framework', 'IOKit',
+    ]
+    RAYLIB_MACOS = 'raylib-5.5_macos'
+    RAYLIB_MACOS_URL = RAYLIB_BASE + RAYLIB_MACOS + '.tar.gz'
+    download_raylib(RAYLIB_MACOS, RAYLIB_MACOS_URL)
+else:
+    raise ValueError(f'Unsupported system: {system}')
 
 # Default Gym/Gymnasium/PettingZoo versions
 # Gym:
@@ -61,31 +162,6 @@
 GYM_VERSION = '0.23'
 PETTINGZOO_VERSION = '1.24.1'
 
-docs = [
-    'sphinx==5.0.0',
-    'sphinx-rtd-theme==0.5.1',
-    'sphinxcontrib-youtube==1.0.1',
-    'sphinx-rtd-theme==0.5.1',
-    'sphinx-design==0.4.1',
-    'furo==2023.3.27',
-]
-
-cleanrl = [
-    'stable_baselines3==2.1.0',
-    'tensorboard==2.11.2',
-    'torch',
-    'tyro==0.8.6',
-    'wandb==0.19.1',
-    'scipy',
-    'pyro-ppl',
-    'neptune',
-    'heavyball',
-]
-
-ray = [
-    'ray==2.23.0',
-]
-
 environments = {
     'avalon': [
         f'gym=={GYM_VERSION}',
@@ -237,6 +313,30 @@
     ],
 }
 
+docs = [
+    'sphinx==5.0.0',
+    'sphinx-rtd-theme==0.5.1',
+    'sphinxcontrib-youtube==1.0.1',
+    'sphinx-rtd-theme==0.5.1',
+    'sphinx-design==0.4.1',
+    'furo==2023.3.27',
+]
+
+cleanrl = [
+    'stable_baselines3==2.1.0',
+    'tensorboard==2.11.2',
+    'torch',
+    'tyro==0.8.6',
+    'wandb==0.19.1',
+    'scipy',
+    'pyro-ppl',
+    'neptune',
+    'heavyball',
+]
+
+ray = [
+    'ray==2.23.0',
+]
 
 # These are the environments that PufferLib has made
 # compatible with the latest version of Gym/Gymnasium/PettingZoo
@@ -264,69 +364,64 @@
     'vizdoom',
 ]]
 
-extension_paths = [
-    #'pufferlib/ocean/nmmo3/cy_nmmo3',
-    'pufferlib/ocean/moba/cy_moba',
-    # 'pufferlib/ocean/tactical/c_tactical',
-    #'pufferlib/ocean/squared/cy_squared',
-    'pufferlib/ocean/snake/cy_snake',
-    'pufferlib/ocean/gpudrive/cy_gpudrive',
-    #'pufferlib/ocean/pong/cy_pong',
-    # 'pufferlib/ocean/breakout/cy_breakout',
-    # 'pufferlib/ocean/cartpole/cy_cartpole',
-    # 'pufferlib/ocean/connect4/cy_connect4',
-    #'pufferlib/ocean/grid/cy_grid',
-    'pufferlib/ocean/tripletriad/cy_tripletriad',
-    # 'pufferlib/ocean/go/cy_go',
-    'pufferlib/ocean/rware/cy_rware',
-    'pufferlib/ocean/trash_pickup/cy_trash_pickup',
-    'pufferlib/ocean/cpr/cy_cpr',
-    'pufferlib/ocean/tower_climb/cy_tower_climb',
-    'pufferlib/ocean/gpudrive/cy_gpudrive',
-]
+# Extensions 
+class BuildExt(build_ext):
+    def run(self):
+        self.run_command('build_torch')
+        self.run_command('build_c')
 
-system = platform.system()
-if system == 'Darwin':
-    # On macOS, use @loader_path.
-    # The extension “.so” is typically in pufferlib/ocean/...,
-    # and “raylib/lib” is (maybe) two directories up from ocean/<env>.
-    # So @loader_path/../../raylib/lib is common.
-    extra_compile_args = ['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION','-DPLATFORM_DESKTOP', '-O2']
-    extra_link_args=['-fwrapv', '-framework', 'Cocoa', '-framework', 'OpenGL', '-framework', 'IOKit']
-
-elif system == 'Linux':
-    extra_compile_args = ['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than', '-fmax-errors=3', '-g']
-    extra_link_args=['-fwrapv', '-Bsymbolic-functions', '-O2']
-
-    # On Linux, $ORIGIN works
-else:
-    raise ValueError(f'Unsupported system: {system}')
+class CBuildExt(build_ext):
+    def run(self):
+        self.extensions = [e for e in self.extensions if e.name != "pufferlib._C"]
+        super().run()
+
+class TorchBuildExt(cpp_extension.BuildExtension):
+    def run(self):
+        self.extensions = [e for e in self.extensions if e.name == "pufferlib._C"]
+        super().run()
 
-extensions = [Extension(
-    path.replace('/', '.'),
-    [path + '.pyx'],
-    include_dirs=[numpy.get_include(), 'raylib/include'],
+RAYLIB_A = f'{RAYLIB_NAME}/lib/libraylib.a'
+INCLUDE = [numpy.get_include(), 'raylib/include']
+extension_kwargs = dict(
+    include_dirs=INCLUDE,
     extra_compile_args=extra_compile_args,
     extra_link_args=extra_link_args,
-    extra_objects=[f'{RAYLIB_NAME}/lib/libraylib.a'],
-) for path in extension_paths]
-
-#c_args = ['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O0', '-Wno-alloc-size-larger-than', '-g']
-#c_args = ['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2']
-#c_args += "-Wsign-compare -DNDEBUG -g -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC".split()
-
-pure_c_extensions = ['squared', 'pong', 'breakout', 'enduro', 'blastar', 'grid', 'nmmo3', 'tactical', 'go', 'cartpole', 'connect4']
+    extra_objects=[RAYLIB_A],
+)
 
-extensions += [
+c_extensions = [
     Extension(
         f'pufferlib.ocean.{name}.binding',
         sources=[f'pufferlib/ocean/{name}/binding.c'],
-        include_dirs=[numpy.get_include(), 'raylib/include'],
-        extra_compile_args=extra_compile_args,# + ['-fsanitize=address,undefined,bounds,pointer-overflow,leak'],
-        extra_link_args=extra_link_args,# + ['-fsanitize=address,undefined,bounds,pointer-overflow,leak', '-g'],
-        extra_objects=[f'{RAYLIB_NAME}/lib/libraylib.a'],
+        **extension_kwargs,
+    )
+    for name in c_extensions_names
+]
+
+cython_extensions = cythonize([
+    Extension(
+        path.replace('/', '.'),
+        [path + '.pyx'],
+        **extension_kwargs,
     )
-    for name in pure_c_extensions
+    for path in cython_extension_paths
+])
+
+# Check if CUDA compiler is available. You need cuda dev, not just runtime.
+if shutil.which("nvcc"):
+    extension = CUDAExtension
+else:
+    extension = CppExtension
+
+torch_extensions = [
+   extension(
+        "pufferlib._C",
+        ["pufferlib.cpp", "pufferlib/pufferlib.cu"],
+        extra_compile_args = {
+            "cxx": cxx_args,
+            "nvcc": nvcc_args,
+        }
+    ),
 ]
 
 # Prevent Conda from injecting garbage compile flags
@@ -341,7 +436,7 @@
 for key, value in cfg_vars.items():
     if value and '-fno-strict-overflow' in str(value):
         cfg_vars[key] = value.replace('-fno-strict-overflow', '')
- 
+
 setup(
     name="pufferlib",
     description="PufferAI Library"
@@ -362,6 +457,7 @@
         f'gym<={GYM_VERSION}',
         f'gymnasium<={GYMNASIUM_VERSION}',
         f'pettingzoo<={PETTINGZOO_VERSION}',
+        'torch',
         'shimmy[gym-v21]',
         'psutil==5.9.5',
         'pynvml',
@@ -375,25 +471,12 @@
         'common': common,
         **environments,
     },
-    ext_modules = cythonize([
-        "pufferlib/extensions.pyx",
-        "c_advantage.pyx",
-        "pufferlib/puffernet.pyx",
-        *extensions,
-    ], 
-    compiler_directives={
-        'language_level': 3,
-        'boundscheck': False,
-        'initializedcheck': False,
-        'wraparound': False,
-        'cdivision': True,
-        'nonecheck': False,
-        'profile': False,
+    ext_modules = cython_extensions + c_extensions + torch_extensions,
+    cmdclass={
+        "build_ext": BuildExt,
+        "build_torch": TorchBuildExt,
+        "build_c": CBuildExt,
     },
-       #nthreads=6,
-       #annotate=True,
-       #compiler_directives={'profile': True},# annotate=True
-    ),
     include_dirs=[numpy.get_include(), RAYLIB_NAME + '/include'],
     python_requires=">=3.9",
     license="MIT",
@@ -405,10 +488,10 @@
         "Intended Audience :: Science/Research",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
     ],
 )
 #stable_baselines3
diff --git a/tests/test.py b/tests/test.py
index 813bd8dbe1..1890c3c2ef 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -13,6 +13,100 @@
 import warnings
 warnings.filterwarnings("ignore")
 
+class RandomState:
+    def __init__(self, seed):
+        self.rng = np.random.RandomState(seed)
+
+    def random(self):
+        return self.rng.random()
+
+    def probabilistic_round(self, n):
+            frac, integer = np.modf(n)
+            if self.random() < frac:
+                return int(integer) + 1
+            else:
+                return int(integer)
+
+    def sample(self, ary, n):
+        n_rounded = self.probabilistic_round(n)
+        return self.rng.choice(ary, n_rounded, replace=False).tolist()
+
+    def choice(self, ary):
+        return self.sample(ary, 1)[0]
+
+
+# TODO: Fix this. Was in utils.py. Only used for tests
+def make_zeros_like(data):
+    if isinstance(data, dict):
+        return {k: make_zeros_like(v) for k, v in data.items()}
+    elif isinstance(data, (list, tuple)):
+        return [make_zeros_like(v) for v in data]
+    elif isinstance(data, np.ndarray):
+        return np.zeros_like(data)
+    elif isinstance(data, (int, float)):
+        return 0
+    else:
+        raise ValueError(f'Unsupported type: {type(data)}')
+
+def compare_arrays(array_1, array_2):
+    assert isinstance(array_1, np.ndarray)
+    assert isinstance(array_2, np.ndarray)
+    assert array_1.shape == array_2.shape
+    return np.allclose(array_1, array_2)
+
+def compare_dicts(dict_1, dict_2, idx):
+    assert isinstance(dict_1, (dict, OrderedDict))
+    assert isinstance(dict_2, (dict, OrderedDict))
+
+    if not all(k in dict_2 for k in dict_1):
+        raise ValueError("Keys do not match between dictionaries.")
+
+    for k, v in dict_1.items():
+        if not compare_space_samples(v, dict_2[k], idx):
+            return False
+
+    return True
+
+def compare_lists(list_1, list_2, idx):
+    assert isinstance(list_1, (list, tuple))
+    assert isinstance(list_2, (list, tuple))
+
+    if len(list_1) != len(list_2):
+        raise ValueError("Lengths do not match between lists/tuples.")
+
+    for v1, v2 in zip(list_1, list_2):
+        if not compare_space_samples(v1, v2, idx):
+            return False
+        
+    return True
+    
+def compare_space_samples(sample_1, sample_2, sample_2_batch_idx=None):
+    '''Compare two samples from the same space
+    
+    Optionally, sample_2 may be a batch of samples from the same space
+    concatenated along the first dimension of the leaves. In this case,
+    sample_2_batch_idx specifies which sample to compare.
+    '''
+    if isinstance(sample_1, (dict, OrderedDict)):
+        return compare_dicts(sample_1, sample_2, sample_2_batch_idx)
+    elif isinstance(sample_1, (list, tuple)):
+        return compare_lists(sample_1, sample_2, sample_2_batch_idx)
+    elif isinstance(sample_1, np.ndarray):
+        assert isinstance(sample_2, np.ndarray)
+        if sample_2_batch_idx is not None:
+            sample_2 = sample_2[sample_2_batch_idx]
+        return compare_arrays(sample_1, sample_2)
+    elif isinstance(sample_1, (int, float)):
+        if sample_2_batch_idx is not None:
+            sample_2 = sample_2[sample_2_batch_idx]
+        if isinstance(sample_2, np.ndarray):
+            assert sample_2.size == 1, "Cannot compare scalar to non-scalar."
+            sample_2 = sample_2[0]
+        return sample_1 == sample_2
+    else:
+        raise ValueError(f"Unsupported type: {type(sample_1)}")
+
+
 
 def test_gymnasium_emulation(env_cls, steps=100):
     raw_env = env_cls()
diff --git a/tests/test_env_binding.py b/tests/test_env_binding.py
new file mode 100644
index 0000000000..cefcef857d
--- /dev/null
+++ b/tests/test_env_binding.py
@@ -0,0 +1,116 @@
+from pufferlib.ocean.breakout import breakout
+
+kwargs = dict(
+    frameskip=1,
+    width=576,
+    height=330,
+    paddle_width=62,
+    paddle_height=8,
+    ball_width=32,
+    ball_height=32,
+    brick_width=32,
+    brick_height=12,
+    brick_rows=6,
+    brick_cols=18,
+    continuous=False,
+)
+
+def test_env_binding():
+    reference = breakout.Breakout()
+
+    # Correct usage
+    c_env = breakout.binding.env_init(
+        reference.observations,
+        reference.actions,
+        reference.rewards,
+        reference.terminals,
+        reference.truncations,
+        0,
+        **kwargs
+    )
+    c_envs = breakout.binding.vectorize(c_env)
+    breakout.binding.vec_reset(c_envs, 0)
+    breakout.binding.vec_step(c_envs)
+    breakout.binding.vec_close(c_envs)
+
+    # Correct vec usage
+    c_envs = breakout.binding.vec_init(
+        reference.observations,
+        reference.actions,
+        reference.rewards,
+        reference.terminals,
+        reference.truncations,
+        reference.num_agents,
+        0,
+        **kwargs
+    )
+
+    # Correct vec usage
+    c_envs = breakout.binding.vec_init(
+        reference.observations,
+        reference.actions,
+        reference.rewards,
+        reference.terminals,
+        reference.truncations,
+        reference.num_agents,
+        0,
+        **kwargs
+    )
+    breakout.binding.vec_reset(c_envs, 0)
+    breakout.binding.vec_step(c_envs)
+    breakout.binding.vec_close(c_envs)
+
+    try:
+        c_env = breakout.binding.env_init()
+        raise Exception('init missing args. Should have thrown TypeError')
+    except TypeError:
+        pass
+
+    try:
+        c_env = breakout.binding.env_init(
+            reference.observations,
+            reference.actions,
+            reference.rewards,
+            reference.terminals,
+            reference.truncations,
+            reference.num_agents,
+            0,
+        )
+        raise Exception('init missing kwarg. Should have thrown TypeError')
+    except TypeError:
+        pass
+
+    try:
+        c_envs = breakout.binding.vec_init()
+        raise Exception('vec_init missing args. Should have thrown TypeError')
+    except TypeError:
+        pass
+
+    try:
+        c_envs = breakout.binding.vec_init(
+            reference.observations,
+            reference.actions,
+            reference.rewards,
+            reference.terminals,
+            reference.truncations,
+            reference.num_agents,
+            0,
+        )
+        raise Exception('vec_init missing kwarg. Should have thrown TypeError')
+    except TypeError:
+        pass
+
+    try:
+        breakout.binding.vec_reset()
+        raise Exception('vec_reset missing arg. Should have thrown TypeError')
+    except TypeError:
+        pass
+
+    try:
+        breakout.binding.vec_step()
+        raise Exception('vec_step missing arg. Should have thrown TypeError')
+    except TypeError:
+        pass
+
+if __name__ == '__main__':
+    test_env_binding()