Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 31 additions & 14 deletions pufferlib/ocean/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def __init__(
# Road features size (lanes + boundaries)
self.obs_slots_lane_kept = env.obs_slots_lane_kept
self.obs_slots_boundary_kept = env.obs_slots_boundary_kept
self.obs_slots_lane_n = env.obs_slots_lane_n
self.obs_slots_boundary_n = env.obs_slots_boundary_n
self.road_features_count = env.road_features
# Traffic control size
self.obs_slots_traffic_controls_n = env.obs_slots_traffic_controls_n
Expand Down Expand Up @@ -116,9 +118,18 @@ def __init__(

def forward(self, observations, ego_dim):
# Extract and slice observations from the flat buffer

if self.training:
obs_slots_lane_kept = self.obs_slots_lane_kept
obs_slots_boundary_kept = self.obs_slots_boundary_kept
else:
# During evaluation, enforce zero dropout (also in pufferlib/ocean/benchmark/manager.py)
obs_slots_lane_kept = self.obs_slots_lane_n
obs_slots_boundary_kept = self.obs_slots_boundary_n

partner_dim = self.obs_slots_partners_n * self.partner_features_count
lane_dim = self.obs_slots_lane_kept * self.road_features_count
boundary_dim = self.obs_slots_boundary_kept * self.road_features_count
lane_dim = obs_slots_lane_kept * self.road_features_count
boundary_dim = obs_slots_boundary_kept * self.road_features_count
traffic_control_dim = self.obs_slots_traffic_controls_n * self.traffic_control_features_count

slide_idx = ego_dim
Expand All @@ -144,12 +155,12 @@ def forward(self, observations, ego_dim):
feature_list = [ego_features]

# Encode Lanes and Boundaries separately
if self.obs_slots_lane_kept > 0:
lane_objects = lane_observations.view(-1, self.obs_slots_lane_kept, self.road_features_count)
if obs_slots_lane_kept > 0:
lane_objects = lane_observations.view(-1, obs_slots_lane_kept, self.road_features_count)
lane_features = self.lane_encoder(lane_objects).max(dim=1).values
feature_list.append(lane_features)
if self.obs_slots_boundary_kept > 0:
boundary_objects = boundary_observations.view(-1, self.obs_slots_boundary_kept, self.road_features_count)
if obs_slots_boundary_kept > 0:
boundary_objects = boundary_observations.view(-1, obs_slots_boundary_kept, self.road_features_count)
boundary_features = self.boundary_encoder(boundary_objects).max(dim=1).values
feature_list.append(boundary_features)

Expand Down Expand Up @@ -192,9 +203,15 @@ def forward(self, observations, ego_dim):
return self.backbone(concat_features)

def pool_slot_counts(self, observations, ego_dim):
if self.training:
obs_slots_lane_kept = self.obs_slots_lane_kept
obs_slots_boundary_kept = self.obs_slots_boundary_kept
else:
obs_slots_lane_kept = self.obs_slots_lane_n
obs_slots_boundary_kept = self.obs_slots_boundary_n
partner_dim = self.obs_slots_partners_n * self.partner_features_count
lane_dim = self.obs_slots_lane_kept * self.road_features_count
boundary_dim = self.obs_slots_boundary_kept * self.road_features_count
lane_dim = obs_slots_lane_kept * self.road_features_count
boundary_dim = obs_slots_boundary_kept * self.road_features_count
traffic_control_dim = self.obs_slots_traffic_controls_n * self.traffic_control_features_count

slide_idx = ego_dim + self.conditioning_dim
Expand All @@ -207,18 +224,18 @@ def pool_slot_counts(self, observations, ego_dim):
traffic_control_observations = observations[:, slide_idx : slide_idx + traffic_control_dim]

counts = {}
if self.obs_slots_lane_kept > 0:
lane_objects = lane_observations.view(-1, self.obs_slots_lane_kept, self.road_features_count)
if obs_slots_lane_kept > 0:
lane_objects = lane_observations.view(-1, obs_slots_lane_kept, self.road_features_count)
lane_winners = self.lane_encoder(lane_objects).max(dim=1).indices
lane_counts = torch.zeros(
observations.shape[0], self.obs_slots_lane_kept, device=observations.device, dtype=torch.int64
observations.shape[0], obs_slots_lane_kept, device=observations.device, dtype=torch.int64
)
counts["pool_lane"] = lane_counts.scatter_add(1, lane_winners, torch.ones_like(lane_winners))
if self.obs_slots_boundary_kept > 0:
boundary_objects = boundary_observations.view(-1, self.obs_slots_boundary_kept, self.road_features_count)
if obs_slots_boundary_kept > 0:
boundary_objects = boundary_observations.view(-1, obs_slots_boundary_kept, self.road_features_count)
boundary_winners = self.boundary_encoder(boundary_objects).max(dim=1).indices
boundary_counts = torch.zeros(
observations.shape[0], self.obs_slots_boundary_kept, device=observations.device, dtype=torch.int64
observations.shape[0], obs_slots_boundary_kept, device=observations.device, dtype=torch.int64
)
counts["pool_boundary"] = boundary_counts.scatter_add(
1, boundary_winners, torch.ones_like(boundary_winners)
Expand Down
132 changes: 132 additions & 0 deletions scripts/cluster_configs/nightly_best_launch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Multi-agent "best launch" nightly training program config.
# Derived from the oignons2 (emerge/temp_training) configuration at:
# weights/oignons2/config.yaml
# Adapted to NYU Greene cluster paths and resource shape. Multi-agent gigaflow
# training over the 8 local CARLA maps with the oignons2 policy architecture,
# reward shaping (conditioning + randomization on), and partner-blindness /
# phantom-braking perturbations enabled. Keys here override
# pufferlib/config/ocean/drive.ini.
#
# Launch via scripts/launch_nightly_best.sh (3 seeds, date-stamped).

# Environment — multi-agent gigaflow over all 8 local CARLA towns
env.simulation_mode: gigaflow
env.map_dir: pufferlib/resources/drive/binaries/carla
env.num_maps: 8
env.num_agents: 720000
env.min_agents_per_env: 1
env.max_agents_per_env: 150
env.use_map_cache: 1
env.scenario_length: 1200
# 0 disables periodic scenario resampling — every sub-env keeps the same map
# for the full run instead of swapping every 38400 steps.
env.resample_frequency: 0
env.termination_mode: 1
env.inactive_agent_threshold: 0.4
env.dynamics_model: jerk
env.target_type: static
env.spawn_initial_speed: 0.0
env.dt: 0.3
env.traffic_light_behavior: 1
env.collision_behavior: 1
env.offroad_behavior: 1

# Goal setup — three sequential waypoints, route-based placement [20, 60m]
env.num_target_waypoints: 3
env.min_waypoint_spacing: 20.0
env.max_waypoint_spacing: 60.0
env.goal_radius: 2.0
env.goal_speed: 3.0

# Observation shaping (matches oignons2)
env.obs_slots_lane_n: 80
env.obs_slots_boundary_n: 80
env.obs_slots_partners_n: 16
env.obs_slots_traffic_controls_n: 4
env.obs_range_partner_m: 200.0
env.obs_range_road_front_m: 200.0
env.obs_range_road_behind_m: 40.0
env.obs_range_road_side_m: 50.0
env.obs_range_traffic_control_m: 100.0
env.obs_norm_xy_offset_m: 200.0
env.obs_norm_goal_offset_m: 200.0
env.obs_norm_road_seg_length_m: 10.0
env.obs_norm_road_seg_width_m: 5.0
env.obs_norm_veh_length_m: 15.0
env.obs_norm_veh_width_m: 10.0
env.obs_dropout_lane: 0.5
env.obs_dropout_boundary: 0.4

# Perturbations (on during training; eval's clean macro zeros these)
env.partner_blindness_prob: 0.03
env.partner_blindness_trigger_prob: 0.05
env.phantom_braking_prob: 0.02
env.phantom_braking_trigger_prob: 0.02
env.phantom_braking_duration: 10

# Reward shaping (oignons2 weights + conditioning/randomization on)
env.reward_conditioning: true
env.reward_randomization: true
env.reward_goal: 1.0
env.reward_collision: 1.5
env.reward_offroad: 1.5
env.reward_stop_line: 1.0
env.reward_comfort: 0.05
env.reward_lane_align: 0.025
env.reward_vel_align: 1.0
env.reward_lane_center: 0.005
env.reward_velocity: 0.0025
env.reward_reverse: 0.005
env.reward_timestep: 2.5e-05
env.reward_overspeed: 0.05

# Policy — 3x1024 backbone, split actor/critic, gigaflow encoder
policy.input_size: 256
policy.backbone_hidden_size: 1024
policy.backbone_num_layers: 3
policy.actor_hidden_size: 1024
policy.actor_num_layers: 0
policy.critic_hidden_size: 1024
policy.critic_num_layers: 0
policy.split_network: true
policy.encoder_gigaflow: true
policy.dropout: 0.0

# Training — 10B steps, large minibatch, compiled bfloat16
train.total_timesteps: 10_000_000_000
train.learning_rate: 0.0005
train.minibatch_size: 153600
train.max_minibatch_size: 153600
train.update_epochs: 3
train.bptt_horizon: 128
train.compile: true
train.precision: bfloat16
train.normalize_rewards: false
train.checkpoint_interval: 500
train.optimizer: adamw

# Eval — keep validation_gigaflow (CARLA sweep) inline, disable everything else
# (validation_replay needs nuPlan bins; behaviors_* need labelled scene
# categories not used in this nightly). Interval 250 keeps eval cost ~5% of
# wall-clock instead of ~85%.
eval.validation_defaults.interval: 250
eval.validation_replay.enabled: 0
eval.validation_gigaflow.render_backend: egl
eval.behaviors_full_dir.enabled: 0
eval.behaviors_hard_stop.enabled: 0
eval.behaviors_highway_straight.enabled: 0
eval.behaviors_lane_change.enabled: 0
eval.behaviors_merge.enabled: 0
eval.behaviors_parked_cars.enabled: 0
eval.behaviors_roundabout.enabled: 0
eval.behaviors_stopped_traffic.enabled: 0
eval.behaviors_traffic_light_green.enabled: 0
eval.behaviors_traffic_light_stop.enabled: 0
eval.behaviors_unprotected_left.enabled: 0
eval.behaviors_unprotected_right.enabled: 0

# W&B — project nightly-multi-agent; group has no space (submit_cluster.py
# joins the inner command without quoting arg values).
wandb: True
wandb_project: nightly-multi-agent
wandb_group: Nightly_MultiAgent
41 changes: 41 additions & 0 deletions scripts/launch_nightly_best.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash
# Launch multi-agent "best launch" nightly training on the cluster via
# submit_cluster.py. Derived from oignons2 (emerge/temp_training); see
# scripts/cluster_configs/nightly_best_launch.yaml for the config.
# Three seeds per launch, date-stamped wandb run names.
#
# Run on the login node (sources the venv and submits from there):
# ./scripts/launch_nightly_best.sh
#
# Overridable via the environment:
# PROGRAM_CONFIG default: scripts/cluster_configs/nightly_best_launch.yaml
# SEEDS colon sweep passed to --args train.seed (default 0:1:2)
# ACCOUNT/PARTITION/TIME/MEM SLURM overrides
# PREFIX run-name prefix (default <date>_multi_agent)
set -euo pipefail

PROGRAM_CONFIG="${PROGRAM_CONFIG:-scripts/cluster_configs/nightly_best_launch.yaml}"
COMPUTE_CONFIG="${COMPUTE_CONFIG:-scripts/cluster_configs/nyu_greene.yaml}"
ACCOUNT="${ACCOUNT:-torch_pr_924_tandon_advanced}"
PARTITION="${PARTITION:-h200_tandon}"
TIME="${TIME:-1800}"
MEM="${MEM:-192gb}"
SEEDS="${SEEDS:-0:1:2}"
PREFIX="${PREFIX:-$(date +%Y-%m-%d)_multi_agent}"
DATE_STAMP="$(date +%Y-%m-%d)"

source "/scratch/$USER/venvs/pufferdrive/bin/activate"

# One submission per seed so we can pass a per-seed run_name (wandb display
# name like 2026-06-01_seed0)
IFS=':' read -ra SEED_LIST <<< "$SEEDS"
for SEED in "${SEED_LIST[@]}"; do
python scripts/submit_cluster.py \
--save_dir "/scratch/$USER/runs" \
--prefix "$PREFIX" \
--compute_config "$COMPUTE_CONFIG" \
--program_config "$PROGRAM_CONFIG" \
--container --heartbeat \
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" --mem "$MEM" \
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}"
done
Loading
Loading