diff --git a/.github/workflows/update-smoke-golden.yml b/.github/workflows/update-smoke-golden.yml new file mode 100644 index 0000000000..1f088a98db --- /dev/null +++ b/.github/workflows/update-smoke-golden.yml @@ -0,0 +1,61 @@ +name: Update Smoke Golden + +# Regenerate tests/smoke_tests/data/drive_smoke_golden.json inside the pinned +# QEMU/Haswell image (the only place it is bit-reproducible) and commit it back. +# git commit --allow-empty -m "regen smoke golden [update-golden]" && git push + +on: + workflow_dispatch: + push: + branches: + - 'emerge/**' + - 'ev/**' + +permissions: + contents: write + +jobs: + update-golden: + name: Regenerate smoke golden + if: github.event_name == 'workflow_dispatch' || contains(github.event.head_commit.message, '[update-golden]') + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ github.ref_name }} + + - name: Build smoke image + run: docker build -f tests/smoke_tests/Dockerfile -t pufferdrive-smoke . + + - name: Regenerate golden + run: | + docker run --rm -e SMOKE_UPDATE_GOLDEN=1 \ + -v "$PWD/tests/smoke_tests/data:/app/tests/smoke_tests/data" \ + pufferdrive-smoke tests/smoke_tests/test_drive_train.py + timeout-minutes: 30 + + - name: Upload golden artifact + uses: actions/upload-artifact@v4 + with: + name: drive_smoke_golden + path: tests/smoke_tests/data/drive_smoke_golden.json + + - name: Commit regenerated golden + run: | + # The container writes the golden as root via the bind mount; reclaim + # it so git can stage/commit cleanly. + sudo chown "$(id -u):$(id -g)" tests/smoke_tests/data/drive_smoke_golden.json + # json.dump writes no trailing newline; add one so the committed file + # satisfies pre-commit's end-of-file-fixer. + [ -n "$(tail -c1 tests/smoke_tests/data/drive_smoke_golden.json)" ] \ + && printf '\n' >> tests/smoke_tests/data/drive_smoke_golden.json + if git diff --quiet -- tests/smoke_tests/data/drive_smoke_golden.json; then + echo "Golden unchanged; nothing to commit." + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add -f tests/smoke_tests/data/drive_smoke_golden.json # data/ is gitignored; file is tracked + git commit -m "Regenerate smoke golden for partner seconds_stopped feature" + git push origin "HEAD:${{ github.ref_name }}" diff --git a/notebooks/01_observations.ipynb b/notebooks/01_observations.ipynb index a72fcaa6f3..0e469ff560 100644 --- a/notebooks/01_observations.ipynb +++ b/notebooks/01_observations.ipynb @@ -214,7 +214,17 @@ "metadata": {}, "outputs": [], "source": [ - "partner_labels = [\"rel_x\", \"rel_y\", \"rel_z\", \"length\", \"width\", \"heading_cos\", \"heading_sin\", \"speed\"]\n", + "partner_labels = [\n", + " \"rel_x\",\n", + " \"rel_y\",\n", + " \"rel_z\",\n", + " \"length\",\n", + " \"width\",\n", + " \"heading_cos\",\n", + " \"heading_sin\",\n", + " \"speed\",\n", + " \"seconds_stopped\",\n", + "]\n", "active_mask = ~np.all(partners == 0, axis=1)\n", "n_active = active_mask.sum()\n", "print(f\"Active partners: {n_active}/{env.obs_slots_partners_n}\")\n", diff --git a/notebooks/05_inference.ipynb b/notebooks/05_inference.ipynb index c1f99ec009..df27d2171a 100644 --- a/notebooks/05_inference.ipynb +++ b/notebooks/05_inference.ipynb @@ -299,7 +299,7 @@ "- **Ego**: speed, width, length, [jerk: steering, a_long, a_lat], lane_center_dist, lane_angle, speed_limit\n", "- **Conditioning** (if enabled): 17 reward coefs (goal_radius, goal_speed, collision, offroad, comfort, lane_align, vel_align, lane_center, center_bias, velocity, reverse, stop_line, timestep, overspeed, throttle, steer, acc) + target waypoints\n", "- **Target**: static=rel_x,rel_y,rel_z per waypoint; dynamic=rel_x,rel_y,rel_z,heading_cos,heading_sin per waypoint\n", - "- **Partners** (MAX_PARTNERS x 8): rel_x, rel_y, rel_z, length, width, heading_cos, heading_sin, speed\n", + "- **Partners** (MAX_PARTNERS x 9): rel_x, rel_y, rel_z, length, width, heading_cos, heading_sin, speed, seconds_stopped\n", "- **Lanes** (MAX_LANES x 7): rel_x, rel_y, rel_z, seg_length, seg_width, dir_cos, dir_sin\n", "- **Boundaries** (MAX_BOUNDS x 7): same as lanes\n", "- **Traffic controls** (MAX_TRAFFIC x 7): rel_x1, rel_y1, rel_x2, rel_y2, rel_z, type, state" @@ -390,9 +390,19 @@ "# --- Partner summary ---\n", "n_visible = np.sum(np.any(partners != 0, axis=1))\n", "print(f\"\\n--- Partners: {n_visible}/{partners.shape[0]} visible ---\")\n", - "partner_labels = [\"rel_x\", \"rel_y\", \"rel_z\", \"width\", \"length\", \"heading_cos\", \"heading_sin\", \"speed\"]\n", + "partner_labels = [\n", + " \"rel_x\",\n", + " \"rel_y\",\n", + " \"rel_z\",\n", + " \"length\",\n", + " \"width\",\n", + " \"heading_cos\",\n", + " \"heading_sin\",\n", + " \"speed\",\n", + " \"seconds_stopped\",\n", + "]\n", "for p in range(min(int(n_visible), 5)):\n", - " vals = \", \".join(f\"{partner_labels[j]}={partners[p, j]:.3f}\" for j in range(8))\n", + " vals = \", \".join(f\"{partner_labels[j]}={partners[p, j]:.3f}\" for j in range(len(partner_labels)))\n", " print(f\" [{p}] {vals}\")\n", "if n_visible > 5:\n", " print(f\" ... ({n_visible - 5} more)\")\n", @@ -847,7 +857,17 @@ "outputs": [], "source": [ "# Partner per-feature distributions (pooled over all agents + timesteps, visible only)\n", - "partner_labels = [\"rel_x\", \"rel_y\", \"rel_z\", \"width\", \"length\", \"heading_cos\", \"heading_sin\", \"speed\"]\n", + "partner_labels = [\n", + " \"rel_x\",\n", + " \"rel_y\",\n", + " \"rel_z\",\n", + " \"length\",\n", + " \"width\",\n", + " \"heading_cos\",\n", + " \"heading_sin\",\n", + " \"speed\",\n", + " \"seconds_stopped\",\n", + "]\n", "obs_slots_partners_n = env.obs_slots_partners_n\n", "pf = env.partner_features\n", "\n", @@ -861,10 +881,10 @@ "\n", "all_partners = buf_stoch[\"obs\"][:, :, _p_start:_p_end].reshape(\n", " -1, obs_slots_partners_n, pf\n", - ") # (H*N, obs_slots_partners_n, 8)\n", + ") # (H*N, obs_slots_partners_n, pf)\n", "# Mask: partner is visible if any feature != 0\n", "visible_mask = np.any(all_partners != 0, axis=2) # (H*N, 16)\n", - "visible_partners = all_partners[visible_mask] # (K, 8) — all visible partner observations\n", + "visible_partners = all_partners[visible_mask] # (K, pf) — all visible partner observations\n", "\n", "print(\n", " f\"Total partner obs: {all_partners.shape[0] * obs_slots_partners_n}, visible: {len(visible_partners)} \"\n", diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 61cc1ec42b..48a68ac9cc 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -119,7 +119,7 @@ // Observation feature counts #define EGO_FEATURES 10 #define ROAD_FEATURES 7 -#define PARTNER_FEATURES 8 +#define PARTNER_FEATURES 9 #define TRAFFIC_CONTROL_FEATURES 7 #define PADDED_OBSERVATION_VALUE -0.001f #define STATIC_TARGET_FEATURES 3 @@ -4441,11 +4441,6 @@ static void compute_rewards(Drive *env, int i) { env->rewards[i] += speed_reward; env->logs[i].avg_speed_per_agent += agent->sim_speed; agent->distance_since_spawn += agent->sim_speed * env->dt; - if (agent->sim_speed < AGENT_STOPPED_SPEED_THRESHOLD) { - agent->seconds_stopped += env->dt; - } else { - agent->seconds_stopped = 0.0f; - } env->logs[i].episode_return += speed_reward; env->logs[i].reward_overspeed += speed_reward; @@ -4663,6 +4658,8 @@ static int write_partner_obs(Drive *env, Agent *ego, int agent_idx, float *obs, obs[obs_idx++] = rel_heading_x; obs[obs_idx++] = rel_heading_y; obs[obs_idx++] = other->sim_speed_signed / MAX_SPEED; + // TODO(hack): partner seconds_stopped is a temporary feature; remove later. + obs[obs_idx++] = fminf(1.0f, other->seconds_stopped / MAX_STOPPED_SECONDS); partners_written++; } @@ -5291,6 +5288,20 @@ void c_step(Drive *env) { // move_expert(env, env->actions, agent_idx); } + // Update stopped-duration for every agent (active + replayed/static), not + // just policy-controlled ones, so the partner seconds_stopped observation is + // populated even in control_sdc_only mode where only the ego is active. + for (int j = 0; j < env->num_agents; j++) { + int agent_idx = (j < env->active_agent_count) ? env->active_agent_indices[j] + : env->static_agent_indices[j - env->active_agent_count]; + Agent *agent = &env->agents[agent_idx]; + if (agent->sim_speed < AGENT_STOPPED_SPEED_THRESHOLD) { + agent->seconds_stopped += env->dt; + } else { + agent->seconds_stopped = 0.0f; + } + } + // -> 2. Compute metrics and rewards for (int i = 0; i < env->active_agent_count; i++) { int agent_idx = env->active_agent_indices[i]; diff --git a/pufferlib/viz.py b/pufferlib/viz.py index c72d0a2f8a..eadea0b883 100644 --- a/pufferlib/viz.py +++ b/pufferlib/viz.py @@ -954,6 +954,7 @@ def generate_interactive_replay(scenario, replay, filename="replay.html"): "num_target_waypoints": int(env_cfg["num_target_waypoints"]), "reward_conditioning": bool(env_cfg["reward_conditioning"]), "obs_slots_partners_n": int(env_cfg["obs_slots_partners_n"]), + "partner_features": int(binding.PARTNER_FEATURES), "lane_count": int(lane_count), "boundary_count": int(boundary_count), "traffic_obs_count": int(env_cfg["obs_slots_traffic_controls_n"]), @@ -1203,14 +1204,14 @@ def generate_interactive_replay(scenario, replay, filename="replay.html"): let p = base, ego = obs.subarray(p, p+10); p += 10; if (H.reward_conditioning) p += 17; const targetStart = p; p += H.num_target_waypoints * H.target_features; - const partnersStart = p; p += H.obs_slots_partners_n * 8; + const partnersStart = p; p += H.obs_slots_partners_n * H.partner_features; const lanesStart = p; p += H.lane_count * 7; const boundsStart = p; p += H.boundary_count * 7; const trafficStart = p; const rot = (x,y) => [-y,x]; const zero = (off,n) => { for(let i=0;i { const out=[]; for(let i=0;i