Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ compute_eval_metrics = False
; --- Goal / Target ---
; Target representation - options: "static", "dynamic"
target_type = "static"
; True: place goals along the agent's route (existing behavior, on-lane and
; in front of the agent). False: scatter each goal at a uniformly random
; drivable point anywhere on the map.
goal_on_lane = True
; Meters around goal to be considered "reached"
goal_radius = 2.0
; Maximum speed at final waypoint to count goal reward
Expand Down
1 change: 1 addition & 0 deletions pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -1953,6 +1953,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->num_target_waypoints = MAX_TARGET_WAYPOINTS;
}
env->target_type = (int) unpack(kwargs, "target_type");
env->goal_on_lane = (int) unpack(kwargs, "goal_on_lane");
env->obs_slots_boundary_n = (int) unpack(kwargs, "obs_slots_boundary_n");
env->obs_slots_lane_n = (int) unpack(kwargs, "obs_slots_lane_n");
env->obs_slots_partners_n = (int) unpack(kwargs, "obs_slots_partners_n");
Expand Down
137 changes: 136 additions & 1 deletion pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
// => For each entity type in gridmap, diagonal poly-lines -> sqrt(2), include diagonal ends -> 2
#define MAX_ENTITIES_PER_CELL 30

// TARGET_TYPE modes (controls what target info is in observations)
// TARGET_TYPE modes (controls what target info is in observations).
#define TARGET_STATIC 0
#define TARGET_DYNAMIC 1

Expand Down Expand Up @@ -396,6 +396,7 @@ struct Drive {
int num_target_waypoints;
int logs_capacity;
int target_type;
int goal_on_lane;
char *ini_file;
int collision_behavior; // 0 = none, 1=stop, 2 = remove
int offroad_behavior; // 0 = none, 1=stop, 2 = remove
Expand Down Expand Up @@ -1927,8 +1928,142 @@ static int compute_new_route(Drive *env, int agent_idx, int current_lane_idx) {
return 1; // Success
}

// Pick a random drivable point on the map whose Euclidean distance from
// (ref_x, ref_y) lies in [min_dist, max_dist]. Returns 1 on success.
static int pick_random_drivable_position(
Drive *env,
float ref_x,
float ref_y,
float min_dist,
float max_dist,
float *out_x,
float *out_y,
float *out_z) {
GridMap *gm = env->grid_map;
float cell = GRID_CELL_SIZE;
float half_diag = 0.5f * cell * (float) M_SQRT2;
float min_d2 = min_dist * min_dist;
float max_d2 = max_dist * max_dist;
float cell_filter = max_dist + half_diag;
float cell_filter2 = cell_filter * cell_filter;

int ref_cx = (int) ((ref_x - gm->top_left_x) / cell);
int ref_cy = (int) ((ref_y - gm->bottom_right_y) / cell);
int half_extent = (int) ceilf(cell_filter / cell);

int x_lo = ref_cx - half_extent;
int y_lo = ref_cy - half_extent;
int x_hi = ref_cx + half_extent;
int y_hi = ref_cy + half_extent;
if (x_lo < 0) {
x_lo = 0;
}
if (y_lo < 0) {
y_lo = 0;
}
if (x_hi >= gm->grid_cols) {
x_hi = gm->grid_cols - 1;
}
if (y_hi >= gm->grid_rows) {
y_hi = gm->grid_rows - 1;
}

int n_cand = 0;
float pick_x = 0.0f, pick_y = 0.0f, pick_z = 0.0f;
for (int gy = y_lo; gy <= y_hi; gy++) {
float cy = gm->bottom_right_y + (gy + 0.5f) * cell;
for (int gx = x_lo; gx <= x_hi; gx++) {
float cx = gm->top_left_x + (gx + 0.5f) * cell;
float dcx = cx - ref_x;
float dcy = cy - ref_y;
if (dcx * dcx + dcy * dcy > cell_filter2) {
continue;
}
int gi = gy * gm->grid_cols + gx;
for (int i = 0; i < gm->cell_entities_count[gi]; i++) {
GridMapEntity e = gm->cells[gi][i];
RoadMapElement *lane = &env->road_elements[e.entity_idx];
if (!is_drivable_road_lane(lane->type)) {
continue;
}
// The grid stores polyline SEGMENTS (start vertex = geometry_idx).
// Sample a uniform point along the segment so candidate positions
// are continuous along the road rather than quantized to vertices.
int k = e.geometry_idx;
if (k + 1 >= lane->segment_length) {
continue;
}
float t = (float) rand() / (float) RAND_MAX;
float ex = lane->x[k] + t * (lane->x[k + 1] - lane->x[k]);
float ey = lane->y[k] + t * (lane->y[k + 1] - lane->y[k]);
float ez = lane->z[k] + t * (lane->z[k + 1] - lane->z[k]);
float edx = ex - ref_x;
float edy = ey - ref_y;
float ed2 = edx * edx + edy * edy;
if (ed2 < min_d2 || ed2 > max_d2) {
continue;
}
n_cand++;
if (rand() % n_cand == 0) {
pick_x = ex;
pick_y = ey;
pick_z = ez;
}
}
}
}

if (n_cand == 0) {
return 0;
}
*out_x = pick_x;
*out_y = pick_y;
*out_z = pick_z;
return 1;
}

static void compute_goals(Drive *env, int agent_idx) {
Agent *agent = &env->agents[agent_idx];

// goal_on_lane=False: place each goal at a random drivable point whose
// Euclidean distance from the previous anchor (agent for goal 0, previous
// goal for subsequent ones) lies in [min_waypoint_spacing,
// max_waypoint_spacing].
if (!env->goal_on_lane) {
int num_target_waypoints = env->num_target_waypoints;
if (num_target_waypoints <= 0 || num_target_waypoints > MAX_TARGET_WAYPOINTS) {
num_target_waypoints = MAX_TARGET_WAYPOINTS;
}
float ref_x = agent->sim_x;
float ref_y = agent->sim_y;
for (int i = 0; i < num_target_waypoints; i++) {
float gx, gy, gz;
if (!pick_random_drivable_position(
env,
ref_x,
ref_y,
env->min_waypoint_spacing,
env->max_waypoint_spacing,
&gx,
&gy,
&gz)) {
printf("[GIGAFLOW WARNING] -> pick_random_drivable_position failed for agent %d\n", agent_idx);
agent->removed = 1;
return;
}
agent->goal_positions_x[i] = gx;
agent->goal_positions_y[i] = gy;
agent->goal_positions_z[i] = gz;
ref_x = gx;
ref_y = gy;
}
agent->current_goal_idx = 0;
agent->goal_position_x = agent->goal_positions_x[0];
agent->goal_position_y = agent->goal_positions_y[0];
agent->goal_position_z = agent->goal_positions_z[0];
return;
}

struct Path *path = agent->path;

// Validate path exists
Expand Down
3 changes: 3 additions & 0 deletions pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
control_mode="control_vehicles",
map_dir=None,
target_type="static",
goal_on_lane=True,
reward_conditioning=False,
reward_randomization=False,
compute_eval_metrics=True,
Expand Down Expand Up @@ -144,6 +145,7 @@ def __init__(
self.target_type = binding.TARGET_DYNAMIC
else:
raise ValueError(f"target_type must be 'static' or 'dynamic'. Got: {target_type}")
self.goal_on_lane = int(bool(goal_on_lane))
self.collision_behavior = collision_behavior
self.offroad_behavior = offroad_behavior
self.traffic_light_behavior = traffic_light_behavior
Expand Down Expand Up @@ -393,6 +395,7 @@ def _env_init_kwargs(self, map_file, max_agents):
"max_waypoint_spacing": self.max_waypoint_spacing,
"num_target_waypoints": self.num_target_waypoints,
"target_type": self.target_type,
"goal_on_lane": self.goal_on_lane,
"obs_slots_lane_n": self.obs_slots_lane_n,
"obs_slots_boundary_n": self.obs_slots_boundary_n,
"obs_slots_partners_n": self.obs_slots_partners_n,
Expand Down
7 changes: 7 additions & 0 deletions pufferlib/pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,7 @@ def __init__(self, args, load_id=None, resume="allow"):

wandb.init(
id=load_id or wandb.util.generate_id(),
name=args.get("run_name") or None,
project=args["wandb_project"],
group=args["wandb_group"],
allow_val_change=True,
Expand Down Expand Up @@ -2117,6 +2118,12 @@ def load_config(env_name, config_dir=None):
parser.add_argument("--wandb", action="store_true", help="Use wandb for logging")
parser.add_argument("--wandb-project", type=str, default="pufferlib")
parser.add_argument("--wandb-group", type=str, default="debug")
parser.add_argument(
"--run-name",
type=str,
default=None,
help="Wandb run display name. Unset → wandb auto-generates one.",
)
parser.add_argument("--neptune", action="store_true", help="Use neptune for logging")
parser.add_argument("--neptune-name", type=str, default="pufferai")
parser.add_argument("--neptune-project", type=str, default="ablations")
Expand Down
11 changes: 8 additions & 3 deletions scripts/cluster_configs/single_agent_speed_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,14 @@ env.max_agents_per_env: 1
env.num_agents: 1024
env.use_map_cache: 1

# Single goal waypoint ahead of the agent (route mode uses the default
# min/max waypoint spacing of 20m/60m to place it).
# Single goal placed at a random drivable point on the map whose Euclidean
# distance from the agent lies in [min_waypoint_spacing, max_waypoint_spacing].
# 6m floor avoids spawning the goal on top of the agent; 500m saturates at the
# Town10HD map diameter (~260m) so goals can land anywhere on the network.
env.num_target_waypoints: 1
env.goal_on_lane: False
env.min_waypoint_spacing: 6.0
env.max_waypoint_spacing: 500.0

# Traffic lights fully off: not observed, not scored, no reward penalty.
env.traffic_light_behavior: 0
Expand Down Expand Up @@ -63,5 +68,5 @@ eval.behaviors_unprotected_right.enabled: 0
# W&B. Group has no space: submit_cluster.py joins the inner command into a
# bash -c string without quoting arg values, so a space would split the arg.
wandb: True
wandb_project: puffer_drive
wandb_project: single_agent_nightly_test
wandb_group: Nightly_Test
23 changes: 15 additions & 8 deletions scripts/launch_single_agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@ PARTITION="${PARTITION:-h200_tandon}"
TIME="${TIME:-720}"
SEEDS="${SEEDS:-0:1:2}"
PREFIX="${PREFIX:-$(date +%Y-%m-%d)_single_agent}"
DATE_STAMP="$(date +%Y-%m-%d)"

source "/scratch/$USER/venvs/pufferdrive/bin/activate"
python scripts/submit_cluster.py \
--save_dir "/scratch/$USER/runs" \
--prefix "$PREFIX" \
--compute_config "$COMPUTE_CONFIG" \
--program_config "$PROGRAM_CONFIG" \
--container --heartbeat \
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" \
--args "train.seed=$SEEDS"

# One submission per seed so we can pass a per-seed run_name (wandb display
# name like 2026-05-31_seed0)
IFS=':' read -ra SEED_LIST <<< "$SEEDS"
for SEED in "${SEED_LIST[@]}"; do
python scripts/submit_cluster.py \
--save_dir "/scratch/$USER/runs" \
--prefix "$PREFIX" \
--compute_config "$COMPUTE_CONFIG" \
--program_config "$PROGRAM_CONFIG" \
--container --heartbeat \
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" \
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}"
done
Loading