Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
441c6c0
drive: map_dir accepts a single .bin; validation_replay enable flag; …
May 28, 2026
676a24f
test_single_agent_yaml: ruff format
May 28, 2026
e362d18
test_single_agent_yaml: drop history-of-change prose from docstrings
May 28, 2026
4f02f15
test_single_agent_yaml: drop value-spot-checks; keep argparse smoke
May 28, 2026
2c2271a
drive: guard isfile() so map_dir=None falls through to listdir path
May 28, 2026
62de125
test_drive_scenario_length: pass real map_dir so the test actually runs
May 28, 2026
0fb4da7
drive: disable validation_gigaflow eval in single-agent yaml
May 28, 2026
1f021ba
Merge origin/emerge/temp_training into ev/single_agent_runs
May 28, 2026
f74f819
drive: add [eval.dnf_triage] evaluator for triage_html rollout viz
May 29, 2026
35ee832
drive: single-agent triage_html eval rendering via yaml override
May 29, 2026
170cbf3
Merge emerge/temp_training into ev/dnf-triage (resolve yaml to keep t…
May 29, 2026
1b66ad2
eval: include scenarios_done counter in triage_html filename stem
May 29, 2026
7069e42
eval: build the gallery index for triage_html too (was obs_html-only)
May 29, 2026
984ab64
yaml: switch single-agent eval render to obs_html
May 29, 2026
0a84861
drive: per-episode dnf_rate in CompletedEpisodeSummary
May 29, 2026
9d390c2
eval: per-episode metric sort UI on gallery index.html
May 29, 2026
34af7c4
eval: fix gallery-sort helper using Path.name instead of unimported os
May 29, 2026
e490d47
eval: import viz inside _render_pass_html
May 30, 2026
0bd3ac7
eval: hoist viz import to module-level
May 30, 2026
12857c6
viz: accept any .html file in the gallery index, sort lexicographically
May 30, 2026
9e17db9
drive.ini: pin validation eval num_agents=1024, dnf_triage to single-…
May 30, 2026
c9da518
eval: write per-episode CSV before render so the gallery reads this r…
May 30, 2026
e0d503e
viz: render pooled road-edge slots in red, lanes in teal
May 30, 2026
d6f2512
eval: pair gallery metrics over all episodes, not just episode_index=0
May 30, 2026
36704b5
eval: gallery FILE_METRICS from render-pass info, not the metric-pass…
May 30, 2026
4a68d1b
viz: ruff-format build_gallery_index
May 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ env.offroad_behavior = 1
env.traffic_light_behavior = 0
env.reward_randomization = False
env.termination_mode = 0
env.num_agents = 512
env.num_agents = 1024
env.target_type = "static"
env.goal_speed = 3.0
env.reward_collision = 3.0
Expand Down Expand Up @@ -286,10 +286,12 @@ inherits = "validation_defaults"
type = "multi_scenario"
enabled = true
render = true
render_backend = "egl"
render_views = ["sim_state", "bev"]
env.simulation_mode = "gigaflow"
env.map_dir = "pufferlib/resources/drive/binaries/carla"
env.num_maps = 8
env.num_agents = 1024
env.min_agents_per_env = 40
env.max_agents_per_env = 40
env.scenario_length = 500
Expand All @@ -306,8 +308,8 @@ render_backend = "triage_html"
env.simulation_mode = "gigaflow"
env.map_dir = "pufferlib/resources/drive/binaries/carla/opendrive__Town10HD.bin"
env.num_maps = 1
env.min_agents_per_env = 40
env.max_agents_per_env = 40
env.min_agents_per_env = 1
env.max_agents_per_env = 1
env.scenario_length = 500
env.resample_frequency = 500
eval.num_scenarios = 32
Expand Down
58 changes: 50 additions & 8 deletions pufferlib/ocean/benchmark/evaluators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,32 @@
from dataclasses import dataclass, field
from typing import ClassVar
from tqdm import tqdm
from pufferlib import viz
from pufferlib.ocean.drive import binding

_GALLERY_METRIC_KEYS = (
"score",
"dnf_rate",
"episode_return",
"num_goals_reached",
"collision_rate",
"offroad_rate",
"red_light_violation_rate",
"total_infractions",
"total_distance_travelled",
"episode_length",
)


def _episode_metrics_from_info(info):
"""Pull the gallery-sort metrics out of a `completed_episode` summary dict."""
out = {}
for key in _GALLERY_METRIC_KEYS:
value = info.get(key)
if isinstance(value, (int, float)):
out[key] = float(value)
return out


@dataclass
class EvalResult:
Expand Down Expand Up @@ -84,6 +108,7 @@ def rollout(self, vecenv, policy, args) -> EvalResult:
try:
metrics = self._run_rollout_loop(vecenv, policy, args)
t_metric = time.time()
self._maybe_export_episodes(args, metrics)
frames = self._render_pass(vecenv, policy, args) if self.render else []
t_render = time.time()
finally:
Expand All @@ -92,10 +117,6 @@ def rollout(self, vecenv, policy, args) -> EvalResult:
metrics["metric_seconds"] = float(t_metric - t0)
metrics["render_seconds"] = float(t_render - t_metric)
metrics["eval_seconds"] = float(t_render - t0)
# Opt-in per-episode CSV + coverage check (writes files, folds
# coverage_* scalars into metrics). No-op unless the evaluator set
# eval.export_episode_csv / eval.verify_coverage.
self._maybe_export_episodes(args, metrics)
return EvalResult(metrics=metrics, frames=frames)

def _run_rollout_loop(self, vecenv, policy, args) -> dict:
Expand Down Expand Up @@ -451,6 +472,10 @@ def _render_pass_html(self, vecenv, policy, args) -> list:

out_dir = Path(args.get("render_results_dir") or args.get("eval_results_dir") or ".") / "gif" / self.name
out_dir.mkdir(parents=True, exist_ok=True)
# Per-rendered-file metrics, accumulated inline from each scenario's
# completed_episode summary so the gallery sort uses this render's
# own rollouts (the metric-pass CSV is from a different vec env).
render_file_metrics = {}

epoch = int(args.get("epoch") or 0)
global_step = int(args.get("global_step") or 0)
Expand Down Expand Up @@ -511,13 +536,18 @@ def _render_pass_html(self, vecenv, policy, args) -> list:
# basename: map_name is the full bin path, and an absolute
# value would make `out_dir / stem` escape out_dir.
map_name = os.path.basename(str(info.get("map_name") or "map")).split(".")[0]
stem = f"{map_name}_{scenario_id}{step_suffix}"
# scenario_id repeats across rollouts on the same map in
# gigaflow mode (the C side fills it with the map's short
# name), so append a monotonic counter to make every
# rendered episode land in its own file.
stem = f"{map_name}_{scenario_id}_{scenarios_done:04d}{step_suffix}"
tmp_path = out_dir / f"{stem}.pkl.zlib"
html_path = out_dir / f"{stem}.html"
tmp_path.write_bytes(bundle_bytes)
mining_viz.render_compact_replay_html(str(tmp_path), str(html_path))
tmp_path.unlink(missing_ok=True)
html_paths.append(html_path)
render_file_metrics[html_path.name] = _episode_metrics_from_info(info)
scenarios_done += 1
progress.update(1)
if scenarios_done >= num_scenarios:
Expand All @@ -529,6 +559,9 @@ def _render_pass_html(self, vecenv, policy, args) -> list:
vec.close()
progress.close()

if html_paths:
viz.build_gallery_index(str(out_dir), file_metrics=render_file_metrics or None)

return html_paths

def _render_pass_obs(self, vecenv, policy, args) -> list:
Expand All @@ -544,7 +577,6 @@ def _render_pass_obs(self, vecenv, policy, args) -> list:
import torch

import pufferlib
from pufferlib import viz

eval_cfg = self.config.get("eval", {})
for required in ("render_num_scenarios", "render_max_steps"):
Expand All @@ -566,9 +598,13 @@ def _render_pass_obs(self, vecenv, policy, args) -> list:

render_env_kwargs = self._render_env_overrides(args)
render_env_kwargs.pop("render_mode", None) # obs viz reads state, no EGL
# Per-episode summaries are needed so the gallery sort dropdown can
# show this render's actual metrics.
render_env_kwargs["emit_completed_episodes"] = True

device = args["train"]["device"]
html_paths = []
render_file_metrics = {}
scenarios_done = 0
progress = tqdm(total=num_scenarios * (max_steps + 1), desc=f"{self.name} obs_html", unit="step")
pool_method = getattr(policy, "pool_slot_counts", None)
Expand Down Expand Up @@ -631,6 +667,7 @@ def _render_pass_obs(self, vecenv, policy, args) -> list:
policy_std_hist = [[] for _ in range(n_in_batch)]
policy_log_prob_hist = [[] for _ in range(n_in_batch)]
pool_hist = None
batch_summary = None
for t in range(max_steps):
with torch.no_grad():
ob_t = torch.as_tensor(ob).to(device)
Expand Down Expand Up @@ -707,7 +744,10 @@ def _render_pass_obs(self, vecenv, policy, args) -> list:
np.asarray(policy_outputs[start_obs_index:end_obs_index], dtype=np.float32).copy()
)
start_obs_index = end_obs_index
ob, _, _, _, _ = vec.step(clipped_action)
ob, _, _, _, step_infos = vec.step(clipped_action)
for d in self._flatten_infos(step_infos):
if isinstance(d, dict) and d.get("summary_type") == "completed_episode":
batch_summary = d
progress.update(to_render)
for e in range(to_render):
map_name = os.path.basename(str(scenarios[e].get("map_name") or "map")).split(".")[0]
Expand Down Expand Up @@ -739,6 +779,8 @@ def _render_pass_obs(self, vecenv, policy, args) -> list:
compact_replay[k] = hists[e]
viz.generate_interactive_replay(scenarios[e], compact_replay, filename=str(path))
html_paths.append(path)
if batch_summary is not None:
render_file_metrics[path.name] = _episode_metrics_from_info(batch_summary)
scenarios_done += 1
progress.update(1)
if scenarios_done >= num_scenarios:
Expand All @@ -748,7 +790,7 @@ def _render_pass_obs(self, vecenv, policy, args) -> list:
progress.close()

if html_paths:
viz.build_gallery_index(str(out_dir))
viz.build_gallery_index(str(out_dir), file_metrics=render_file_metrics or None)
return html_paths

def _render_env_overrides(self, args) -> dict:
Expand Down
1 change: 1 addition & 0 deletions pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -2012,6 +2012,7 @@ static int my_completed_episode_to_dict(PyObject *dict, Env *env, CompletedEpiso
assign_to_dict(dict, "red_light_violation_rate", summary->red_light_violation_rate);
assign_to_dict(dict, "num_goals_reached", summary->num_goals_reached);
assign_to_dict(dict, "score", summary->score);
assign_to_dict(dict, "dnf_rate", summary->dnf_rate);
assign_to_dict(dict, "total_distance_travelled", summary->total_distance_travelled);
assign_to_dict(dict, "total_infractions", summary->total_infractions);

Expand Down
9 changes: 9 additions & 0 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ struct CompletedEpisodeSummary {
float red_light_violation_rate;
float num_goals_reached;
float score;
float dnf_rate;
float total_distance_travelled;
float total_infractions;
float n;
Expand Down Expand Up @@ -2642,6 +2643,7 @@ static void add_log(Drive *env) {
s->red_light_violation_rate = 0.0f;
s->num_goals_reached = 0.0f;
s->score = 0.0f;
s->dnf_rate = 0.0f;
s->total_distance_travelled = 0.0f;
s->total_infractions = 0.0f;
s->n = (float) env->active_agent_count;
Expand All @@ -2660,6 +2662,7 @@ static void add_log(Drive *env) {
int offroad = env->logs[i].offroad_rate;
int red_light = env->logs[i].red_light_violation_rate;
int num_goals = env->logs[i].num_goals_reached;
int num_waypoints = env->logs[i].num_waypoints_reached;
s->episode_length += env->logs[i].episode_length;
s->episode_return += env->logs[i].episode_return;
s->collision_rate += collided;
Expand All @@ -2669,6 +2672,12 @@ static void add_log(Drive *env) {
if (num_goals >= 3 && !agent_i->removed && !agent_i->stopped) {
s->score += 1.0f;
}
// Mirror the aggregate Log DNF predicate (see drive.h:2577):
// the agent stayed clean of infractions but never reached even
// one waypoint — i.e. wandered without making progress.
if (!offroad && !collided && !red_light && num_waypoints < 1) {
s->dnf_rate += 1.0f;
}
s->total_distance_travelled += agent_i->distance_since_spawn;
if (collided || offroad || red_light) {
s->total_infractions += 1.0f;
Expand Down
Loading
Loading