diff --git a/CHANGELOG.md b/CHANGELOG.md
index 058bbd0ba7..484a5ea4ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 Documenting **breaking** configuration changes — renamed, removed, or moved fields that require users to update existing configs.
 
+- **`rollouts_per_example` → `group_size`**: The orchestrator-level field, the group-level `[orchestrator.eval]` field, and the per-env `[[orchestrator.eval.env]]` field have all been renamed. The old name still parses as a validation alias (in both TOML and CLI), so existing configs keep working without changes; new configs should prefer `group_size`. (2026-05-22)
 - **`AdvantageInputs` / `AdvantageOutputs` are now per-group, and `AdvantageOutputs.advantages` is a plain `list[float]`** (second breaking change to this API in three weeks). `AdvantageInputs.rollouts` is now `list[vf.RolloutOutput]` (a single group) instead of `list[list[vf.RolloutOutput]]`, and `AdvantageOutputs.advantages` is now `list[float]` instead of a 2D `Float[Tensor, "num_examples rollouts_per_example"]`. `compute_advantages` calls `advantage_fn` once per group, which lets partial-group training (groups smaller than `rollouts_per_example` after rollout errors) round-trip without the previous bucket-by-size workaround. Custom advantage functions must drop the outer list dimension and return a list of floats — e.g. `AdvantageOutputs(advantages=(rewards - rewards.mean(dim=1, keepdim=True)).tolist())` becomes `AdvantageOutputs(advantages=[r - mean for r in rewards])` (or `.tolist()` if you keep torch internally). (2026-05-22)
 - **`[model.vlm]` requires `orchestrator.use_renderer = true`**: VLMs must go through the renderer path; the `vlm_requires_renderer` validator rejects `use_renderer = false` when `[model.vlm]` is set. The renderer owns the HF processor per-slot and ships generic `mm_kwargs` keyed by the model's forward signature. Since `use_renderer` already defaults to `true`, most VLM configs need no change. (2026-05-19)
 - **First-class `training_mode` + batch-driven loss dispatch** (collection of removals/renames). Loss selection is now driven by `TrainingSample.training_mode` (`rl` / `opd` / `sft`), set under `[orchestrator]`. The trainer is mode-agnostic and dispatches per batch.
diff --git a/configs/acereason_math/stage1.toml b/configs/acereason_math/stage1.toml
index 6c102b4a63..7afbda5526 100644
--- a/configs/acereason_math/stage1.toml
+++ b/configs/acereason_math/stage1.toml
@@ -17,7 +17,7 @@ name = "stage1"
 
 [orchestrator]
 batch_size = 1024
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 temperature = 0.6
@@ -32,15 +32,15 @@ interval = 50
 
 [[orchestrator.eval.env]]
 id = "math500"
-rollouts_per_example = 1
+group_size = 1
 
 [[orchestrator.eval.env]]
 id = "aime2024"
-rollouts_per_example = 32
+group_size = 32
 
 [[orchestrator.eval.env]]
 id = "aime2025"
-rollouts_per_example = 32
+group_size = 32
 
 [trainer.model.ac]
 
diff --git a/configs/acereason_math/stage2.toml b/configs/acereason_math/stage2.toml
index 7745b9ff05..57eebcd204 100644
--- a/configs/acereason_math/stage2.toml
+++ b/configs/acereason_math/stage2.toml
@@ -18,7 +18,7 @@ name = "stage2"
 
 [orchestrator]
 batch_size = 2048
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 temperature = 0.6
@@ -33,15 +33,15 @@ interval = 50
 
 [[orchestrator.eval.env]]
 id = "math500"
-rollouts_per_example = 1
+group_size = 1
 
 [[orchestrator.eval.env]]
 id = "aime2024"
-rollouts_per_example = 32
+group_size = 32
 
 [[orchestrator.eval.env]]
 id = "aime2025"
-rollouts_per_example = 32
+group_size = 32
 
 [trainer.model.ac]
 
diff --git a/configs/alphabet_sort/rl.toml b/configs/alphabet_sort/rl.toml
index 22daacdc5f..6c54274b64 100644
--- a/configs/alphabet_sort/rl.toml
+++ b/configs/alphabet_sort/rl.toml
@@ -11,7 +11,7 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 512
@@ -28,7 +28,7 @@ interval = 50
 id = "alphabet-sort"
 name = "alphabet-sort"
 num_examples = 50
-rollouts_per_example = 4
+group_size = 4
 args = { min_turns = 2, max_turns = 2 }
 
 [trainer]
diff --git a/configs/ci/integration/alphabet_sort.toml b/configs/ci/integration/alphabet_sort.toml
index 33ce009b34..adc7a8215b 100644
--- a/configs/ci/integration/alphabet_sort.toml
+++ b/configs/ci/integration/alphabet_sort.toml
@@ -11,7 +11,7 @@ lr = 1e-5
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 max_completion_tokens = 384
diff --git a/configs/ci/integration/reverse_text/resume.toml b/configs/ci/integration/reverse_text/resume.toml
index b9ca6b8515..aa446b4b35 100644
--- a/configs/ci/integration/reverse_text/resume.toml
+++ b/configs/ci/integration/reverse_text/resume.toml
@@ -12,7 +12,7 @@ lr = 3e-6
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 128
diff --git a/configs/ci/integration/reverse_text/start.toml b/configs/ci/integration/reverse_text/start.toml
index e0d8ed659a..190250e048 100644
--- a/configs/ci/integration/reverse_text/start.toml
+++ b/configs/ci/integration/reverse_text/start.toml
@@ -11,7 +11,7 @@ lr = 3e-6
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 128
diff --git a/configs/ci/integration/reverse_text_lora/resume.toml b/configs/ci/integration/reverse_text_lora/resume.toml
index e7a96edcac..7c431ef0d2 100644
--- a/configs/ci/integration/reverse_text_lora/resume.toml
+++ b/configs/ci/integration/reverse_text_lora/resume.toml
@@ -18,7 +18,7 @@ save_adapter_separately = true
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.model.lora]
 name = "r8-1e-4"
diff --git a/configs/ci/integration/reverse_text_lora/start.toml b/configs/ci/integration/reverse_text_lora/start.toml
index 0e32533259..08740163f7 100644
--- a/configs/ci/integration/reverse_text_lora/start.toml
+++ b/configs/ci/integration/reverse_text_lora/start.toml
@@ -17,7 +17,7 @@ save_adapter_separately = true
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.model.lora]
 name = "r8-1e-4"
diff --git a/configs/ci/integration/reverse_text_moe/start.toml b/configs/ci/integration/reverse_text_moe/start.toml
index 62b1d286fb..209f9fb5c8 100644
--- a/configs/ci/integration/reverse_text_moe/start.toml
+++ b/configs/ci/integration/reverse_text_moe/start.toml
@@ -14,7 +14,7 @@ impl = "custom"
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 128
diff --git a/configs/ci/integration/reverse_text_multi_run/orchestrator.toml b/configs/ci/integration/reverse_text_multi_run/orchestrator.toml
index 6be85cc776..2c43dcf8a0 100644
--- a/configs/ci/integration/reverse_text_multi_run/orchestrator.toml
+++ b/configs/ci/integration/reverse_text_multi_run/orchestrator.toml
@@ -1,7 +1,7 @@
 # Orchestrator config for multi-run RL integration test
 # model.lora.name and output_dir are set via CLI
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 seq_len = 2048
 max_steps = 20
 
diff --git a/configs/ci/integration/reverse_text_rl_opd/start.toml b/configs/ci/integration/reverse_text_rl_opd/start.toml
index bfc9d6f1d4..edd23df9c7 100644
--- a/configs/ci/integration/reverse_text_rl_opd/start.toml
+++ b/configs/ci/integration/reverse_text_rl_opd/start.toml
@@ -17,7 +17,7 @@ name = "ci-rl-opd"
 [orchestrator]
 training_mode = "opd"
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.renderer]
 name = "qwen3"
diff --git a/configs/ci/integration/reverse_text_rl_sft/start.toml b/configs/ci/integration/reverse_text_rl_sft/start.toml
index f3fe4448f4..6b26bb3335 100644
--- a/configs/ci/integration/reverse_text_rl_sft/start.toml
+++ b/configs/ci/integration/reverse_text_rl_sft/start.toml
@@ -17,7 +17,7 @@ name = "ci-rl-sft"
 [orchestrator]
 training_mode = "sft"
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 128
diff --git a/configs/ci/nightly/acereason_math.toml b/configs/ci/nightly/acereason_math.toml
index 0553e17ff3..4239ecac8a 100644
--- a/configs/ci/nightly/acereason_math.toml
+++ b/configs/ci/nightly/acereason_math.toml
@@ -15,7 +15,7 @@ name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
 
 [orchestrator]
 batch_size = 1024
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 temperature = 0.6
diff --git a/configs/ci/nightly/multimodal_color_codeword.toml b/configs/ci/nightly/multimodal_color_codeword.toml
index a90fdfb454..2ef83a63f0 100644
--- a/configs/ci/nightly/multimodal_color_codeword.toml
+++ b/configs/ci/nightly/multimodal_color_codeword.toml
@@ -15,7 +15,7 @@ language_model_attr = "model.language_model"
 
 [orchestrator]
 batch_size = 256
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 64
diff --git a/configs/debug/training_modes/opd.toml b/configs/debug/training_modes/opd.toml
index b24cf3fe91..39cbf6a604 100644
--- a/configs/debug/training_modes/opd.toml
+++ b/configs/debug/training_modes/opd.toml
@@ -18,7 +18,7 @@ name = "debug-opd"
 [orchestrator]
 training_mode = "opd"
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.renderer]
 name = "qwen3"
diff --git a/configs/debug/training_modes/opd_lora.toml b/configs/debug/training_modes/opd_lora.toml
index 135f083936..ba56ffea5c 100644
--- a/configs/debug/training_modes/opd_lora.toml
+++ b/configs/debug/training_modes/opd_lora.toml
@@ -18,7 +18,7 @@ name = "debug-opd-lora"
 [orchestrator]
 training_mode = "opd"
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.renderer]
 name = "qwen3"
diff --git a/configs/debug/training_modes/rl.toml b/configs/debug/training_modes/rl.toml
index bbc3ff27f9..27838809b3 100644
--- a/configs/debug/training_modes/rl.toml
+++ b/configs/debug/training_modes/rl.toml
@@ -11,7 +11,7 @@ name = "debug-rl"
 [orchestrator]
 training_mode = "rl"
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.renderer]
 name = "qwen3"
diff --git a/configs/debug/training_modes/sft.toml b/configs/debug/training_modes/sft.toml
index 3d583e6185..aed5b30cb3 100644
--- a/configs/debug/training_modes/sft.toml
+++ b/configs/debug/training_modes/sft.toml
@@ -18,7 +18,7 @@ name = "debug-sft"
 [orchestrator]
 training_mode = "sft"
 batch_size = 128
-rollouts_per_example = 4
+group_size = 4
 
 [orchestrator.train.sampling]
 max_completion_tokens = 128
diff --git a/configs/debug/training_modes/sft_external.toml b/configs/debug/training_modes/sft_external.toml
index 7fa5a478d9..cb9ea8d09e 100644
--- a/configs/debug/training_modes/sft_external.toml
+++ b/configs/debug/training_modes/sft_external.toml
@@ -17,7 +17,7 @@ name = "debug-sft-external"
 [orchestrator]
 training_mode = "sft"
 batch_size = 128
-rollouts_per_example = 4
+group_size = 4
 
 [orchestrator.train.sampling]
 max_completion_tokens = 2048
diff --git a/configs/debug/training_modes/sft_lora.toml b/configs/debug/training_modes/sft_lora.toml
index 560f94a321..687b45bbe3 100644
--- a/configs/debug/training_modes/sft_lora.toml
+++ b/configs/debug/training_modes/sft_lora.toml
@@ -18,7 +18,7 @@ name = "debug-sft-lora"
 [orchestrator]
 training_mode = "sft"
 batch_size = 128
-rollouts_per_example = 4
+group_size = 4
 
 [orchestrator.train.sampling]
 max_completion_tokens = 128
diff --git a/configs/deepscaler/stage1.toml b/configs/deepscaler/stage1.toml
index 68c95fdf26..09b5e16179 100644
--- a/configs/deepscaler/stage1.toml
+++ b/configs/deepscaler/stage1.toml
@@ -17,7 +17,7 @@ interval = 100
 
 [orchestrator]
 batch_size = 1024
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 temperature = 0.6
diff --git a/configs/deepscaler/stage2.toml b/configs/deepscaler/stage2.toml
index 77c51a0340..2bb3f9feea 100644
--- a/configs/deepscaler/stage2.toml
+++ b/configs/deepscaler/stage2.toml
@@ -18,7 +18,7 @@ resume_step = 500
 
 [orchestrator]
 batch_size = 1024
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 temperature = 0.6
diff --git a/configs/deepscaler/stage3.toml b/configs/deepscaler/stage3.toml
index db42eddcf3..42ed754ae8 100644
--- a/configs/deepscaler/stage3.toml
+++ b/configs/deepscaler/stage3.toml
@@ -18,7 +18,7 @@ resume_step = 1000
 
 [orchestrator]
 batch_size = 1024
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 temperature = 0.6
diff --git a/configs/elastic/rl.toml b/configs/elastic/rl.toml
index 387bfddbf2..3073dc4537 100644
--- a/configs/elastic/rl.toml
+++ b/configs/elastic/rl.toml
@@ -30,7 +30,7 @@ lr = 1e-5
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 max_completion_tokens = 768
diff --git a/configs/env_mix/env_mix.toml b/configs/env_mix/env_mix.toml
index 64d8283fff..79429a3d9f 100644
--- a/configs/env_mix/env_mix.toml
+++ b/configs/env_mix/env_mix.toml
@@ -14,7 +14,7 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [[orchestrator.train.env]]
 id = "math-env"
diff --git a/configs/gsm8k/rl.toml b/configs/gsm8k/rl.toml
index ae73b2c586..e3839c8770 100644
--- a/configs/gsm8k/rl.toml
+++ b/configs/gsm8k/rl.toml
@@ -10,7 +10,7 @@ name = "Qwen/Qwen3-0.6B"
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 2048
diff --git a/configs/hendrycks_math/rl.toml b/configs/hendrycks_math/rl.toml
index acc1434ea9..b4fb071ba4 100644
--- a/configs/hendrycks_math/rl.toml
+++ b/configs/hendrycks_math/rl.toml
@@ -10,7 +10,7 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 2048
@@ -33,7 +33,7 @@ max_completion_tokens = 2048
 [[orchestrator.eval.env]]
 id = "math500"
 num_examples = 30
-rollouts_per_example = 4
+group_size = 4
 
 [trainer] # Default trainer config
 
diff --git a/configs/hendrycks_math/sanity.toml b/configs/hendrycks_math/sanity.toml
index 9174350867..11da4f0bbf 100644
--- a/configs/hendrycks_math/sanity.toml
+++ b/configs/hendrycks_math/sanity.toml
@@ -10,7 +10,7 @@ name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 8
+group_size = 8
 
 [[orchestrator.train.env]]
 id = "math-env"
@@ -23,7 +23,7 @@ interval = 50
 [[orchestrator.eval.env]]
 id = "aime2024"
 name = "aime2024"
-rollouts_per_example = 16
+group_size = 16
 
 [trainer.model.ac]
 
diff --git a/configs/math_group/rl.toml b/configs/math_group/rl.toml
index 965cb1cd1c..51781dc9f2 100644
--- a/configs/math_group/rl.toml
+++ b/configs/math_group/rl.toml
@@ -9,7 +9,7 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
 
 [orchestrator]
 batch_size = 256
-rollouts_per_example = 8
+group_size = 8
 
 [[orchestrator.train.env]]
 id = "math-env"
@@ -34,7 +34,7 @@ interval = 50
 id = "aime2024"
 name = "aime2024"
 num_examples = 30
-rollouts_per_example = 4
+group_size = 4
 
 [trainer.model]
 seq_len = 4096
diff --git a/configs/math_python/math_python.toml b/configs/math_python/math_python.toml
index ae9ec70035..b5dae46f92 100644
--- a/configs/math_python/math_python.toml
+++ b/configs/math_python/math_python.toml
@@ -10,7 +10,7 @@ name = "Qwen/Qwen3-4B-Instruct-2507"
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 512
diff --git a/configs/multi_reverse_text/rl.toml b/configs/multi_reverse_text/rl.toml
index f00e68ab9e..d19602c543 100644
--- a/configs/multi_reverse_text/rl.toml
+++ b/configs/multi_reverse_text/rl.toml
@@ -6,7 +6,7 @@ name = "PrimeIntellect/Qwen3-0.6B-Reverse-Text-SFT"
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 # -- train envs --
 
@@ -36,13 +36,13 @@ max_completion_tokens = 512
 id = "reverse-text"
 name = "eval-default"
 num_examples = 32
-rollouts_per_example = 4
+group_size = 4
 
 [[orchestrator.eval.env]]
 id = "reverse-text"
 name = "eval-custom"
 num_examples = 16
-rollouts_per_example = 2
+group_size = 2
 interval = 10
 
 [orchestrator.eval.env.sampling]
diff --git a/configs/multimodal/rl_color_codeword.toml b/configs/multimodal/rl_color_codeword.toml
index a98f9ae7f8..35cfcab809 100644
--- a/configs/multimodal/rl_color_codeword.toml
+++ b/configs/multimodal/rl_color_codeword.toml
@@ -10,7 +10,7 @@ language_model_attr = "model.language_model"
 
 [orchestrator]
 batch_size = 256
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 64
diff --git a/configs/multimodal/rl_color_codeword_feat_renderer.toml b/configs/multimodal/rl_color_codeword_feat_renderer.toml
index 599ff2755f..d7fcce03c6 100644
--- a/configs/multimodal/rl_color_codeword_feat_renderer.toml
+++ b/configs/multimodal/rl_color_codeword_feat_renderer.toml
@@ -38,7 +38,7 @@ gpus_per_node = 2
 
 [orchestrator]
 batch_size = 16
-rollouts_per_example = 8
+group_size = 8
 use_renderer = true
 
 # Track zero-advantage groups but don't drop them — we're validating the
@@ -64,7 +64,7 @@ args = { images_per_turn = 2, max_turns = 2, num_examples = 100, seed = 42 }
 
 [orchestrator.renderer]
 name = "auto"
-# 64 concurrent rollouts (batch_size=16 × rollouts_per_example=4) want
+# 64 concurrent rollouts (batch_size=16 × group_size=4) want
 # more than one tokenizer slot to avoid serialization queueing. The
 # image processor (CPU-bound) dominates for VLMs so returns diminish
 # past 4; bump to 4 as the default for multimodal runs.
diff --git a/configs/multimodal/rl_color_codeword_test.toml b/configs/multimodal/rl_color_codeword_test.toml
index 151bad0987..23a25a94f9 100644
--- a/configs/multimodal/rl_color_codeword_test.toml
+++ b/configs/multimodal/rl_color_codeword_test.toml
@@ -11,7 +11,7 @@ language_model_attr = "model.language_model"
 
 [orchestrator]
 batch_size = 16
-rollouts_per_example = 2
+group_size = 2
 
 [orchestrator.train.sampling]
 max_completion_tokens = 32
diff --git a/configs/nemotron_4node/rl.toml b/configs/nemotron_4node/rl.toml
index 0fda06a049..b46fbe1c41 100644
--- a/configs/nemotron_4node/rl.toml
+++ b/configs/nemotron_4node/rl.toml
@@ -46,7 +46,7 @@ max_inflight_activations = 5
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 max_completion_tokens = 2048
@@ -69,7 +69,7 @@ max_completion_tokens = 2048
 [[orchestrator.eval.env]]
 id = "math500"
 num_examples = 30
-rollouts_per_example = 4
+group_size = 4
 
 [inference.parallel]
 tp = 8
diff --git a/configs/nemotron_debug/rl.toml b/configs/nemotron_debug/rl.toml
index 9e1e921b09..342840dfd7 100644
--- a/configs/nemotron_debug/rl.toml
+++ b/configs/nemotron_debug/rl.toml
@@ -45,7 +45,7 @@ freq = 1
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 4096
@@ -68,7 +68,7 @@ max_completion_tokens = 4096
 [[orchestrator.eval.env]]
 id = "math500"
 num_examples = 30
-rollouts_per_example = 4
+group_size = 4
 
 [inference.parallel]
 tp = 4
diff --git a/configs/wiki_search/rl.toml b/configs/wiki_search/rl.toml
index d688ba21dc..ebf0037b03 100644
--- a/configs/wiki_search/rl.toml
+++ b/configs/wiki_search/rl.toml
@@ -27,7 +27,7 @@ target_modules = [
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 oversampling_factor = 2.0
 
 [orchestrator.train.sampling]
diff --git a/docs/bring-your-own-algorithms.md b/docs/bring-your-own-algorithms.md
index fba1b1072f..a81549cacd 100644
--- a/docs/bring-your-own-algorithms.md
+++ b/docs/bring-your-own-algorithms.md
@@ -70,7 +70,7 @@ kwargs = { clip_eps = 0.2 }
 
 ## 2. Custom Advantage Functions
 
-Advantages are computed **per-group** (one example × N rollouts). You provide a function that computes advantages for a single group; the framework calls it once per group and stitches the results back together. Groups may have fewer than `rollouts_per_example` rollouts when some rollouts in the group errored (partial-group training).
+Advantages are computed **per-group** (one example × N rollouts). You provide a function that computes advantages for a single group; the framework calls it once per group and stitches the results back together. Groups may have fewer than `group_size` rollouts when some rollouts in the group errored (partial-group training).
 
 ### Interface
 
diff --git a/docs/slurm.md b/docs/slurm.md
index 7b171d4894..17a38ae8ec 100644
--- a/docs/slurm.md
+++ b/docs/slurm.md
@@ -138,7 +138,7 @@ freq = 1
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.sampling]
 max_tokens = 2048
diff --git a/examples/alphabet_sort/rl.toml b/examples/alphabet_sort/rl.toml
index 480cc9f04b..be35366d8c 100644
--- a/examples/alphabet_sort/rl.toml
+++ b/examples/alphabet_sort/rl.toml
@@ -26,7 +26,7 @@ lr = 1e-5
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 8
+group_size = 8
 
 [orchestrator.train.sampling]
 max_completion_tokens = 768
diff --git a/examples/glm5_pd_disag/rl.toml b/examples/glm5_pd_disag/rl.toml
index 91ac41300a..1ec1a5b435 100644
--- a/examples/glm5_pd_disag/rl.toml
+++ b/examples/glm5_pd_disag/rl.toml
@@ -62,7 +62,7 @@ weight_decay = 0.1
 
 [orchestrator]
 batch_size = 4096
-rollouts_per_example = 16
+group_size = 16
 oversampling_factor = 3
 max_off_policy_steps = 16
 
diff --git a/examples/hendrycks_sanity/rl.toml b/examples/hendrycks_sanity/rl.toml
index 10b7b1ce62..681e2682a7 100644
--- a/examples/hendrycks_sanity/rl.toml
+++ b/examples/hendrycks_sanity/rl.toml
@@ -13,7 +13,7 @@ name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 8
+group_size = 8
 seq_len = 8192
 
 [[orchestrator.train.env]]
@@ -27,7 +27,7 @@ interval = 50
 [[orchestrator.eval.env]]
 id = "primeintellect/aime2024"
 name = "aime2024"
-rollouts_per_example = 32
+group_size = 32
 
 [trainer.model]
 seq_len = 16384
diff --git a/examples/multinode/rl.toml b/examples/multinode/rl.toml
index 6d2854077a..f5ee93d16b 100644
--- a/examples/multinode/rl.toml
+++ b/examples/multinode/rl.toml
@@ -36,7 +36,7 @@ freq = 1
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 2048
diff --git a/examples/reverse_text/rl.toml b/examples/reverse_text/rl.toml
index 75a4530cdd..6b3db70a55 100644
--- a/examples/reverse_text/rl.toml
+++ b/examples/reverse_text/rl.toml
@@ -10,7 +10,7 @@ name = "reverse-text"
 
 [orchestrator]
 batch_size = 128
-rollouts_per_example = 16
+group_size = 16
 
 [orchestrator.train.sampling]
 max_completion_tokens = 128
diff --git a/examples/wiki_search/rl.toml b/examples/wiki_search/rl.toml
index 6abbb3d815..599d70658f 100644
--- a/examples/wiki_search/rl.toml
+++ b/examples/wiki_search/rl.toml
@@ -31,7 +31,7 @@ target_modules = [
 
 [orchestrator]
 batch_size = 512
-rollouts_per_example = 16
+group_size = 16
 oversampling_factor = 2.0
 
 [orchestrator.model.lora]
diff --git a/examples/wordle/rl.toml b/examples/wordle/rl.toml
index 07ac44d863..c16419ef81 100644
--- a/examples/wordle/rl.toml
+++ b/examples/wordle/rl.toml
@@ -16,7 +16,7 @@ name = "PrimeIntellect/Qwen3-1.7B-Wordle-SFT"
 
 [orchestrator]
 batch_size = 1024
-rollouts_per_example = 16
+group_size = 16
 
 [[orchestrator.train.env]]
 id = "primeintellect/wordle"
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
index 1507f96079..8b68efe9cd 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
@@ -250,8 +250,8 @@ class EvalEnvConfig(EnvConfig):
     num_examples: int = -1
     """Eval examples to sample from the dataset. ``-1`` uses all available examples."""
 
-    rollouts_per_example: int = Field(1, ge=1)
-    """Rollouts generated per example. Used for pass@k estimation (e.g. ``rollouts_per_example=8`` enables pass@1 through pass@8)."""
+    group_size: int = Field(1, ge=1, validation_alias=AliasChoices("group_size", "rollouts_per_example"))
+    """Rollouts generated per example. Used for pass@k estimation (e.g. ``group_size=8`` enables pass@1 through pass@8)."""
 
     interval: int = Field(100, ge=1)
     """Per-env eval interval. If unset, inherits from the group-level eval interval."""
@@ -316,7 +316,7 @@ class EvalConfig(BaseConfig):
     num_examples: int = -1
     """Default eval examples per environment. ``-1`` uses all. Can be overridden per env."""
 
-    rollouts_per_example: int = Field(1, ge=1)
+    group_size: int = Field(1, ge=1, validation_alias=AliasChoices("group_size", "rollouts_per_example"))
     """Default rollouts per example. Can be overridden per env."""
 
     num_workers: int | Literal["auto"] = "auto"
@@ -330,7 +330,7 @@ class EvalConfig(BaseConfig):
 
     @model_validator(mode="after")
     def resolve_env_defaults(self):
-        """Resolve per-env overrides: inherit group-level sampling, num_workers, max_retries, num_examples, rollouts_per_example, and interval. Then resolve auto num_workers."""
+        """Resolve per-env overrides: inherit group-level sampling, num_workers, max_retries, num_examples, group_size, and interval. Then resolve auto num_workers."""
         group_sampling = self.sampling.model_dump()
         for env in self.env:
             if "sampling" not in env.model_fields_set:
@@ -340,20 +340,20 @@ def resolve_env_defaults(self):
                 env.sampling = EvalSamplingConfig(**merged)
             if "num_examples" not in env.model_fields_set:
                 env.num_examples = self.num_examples
-            if "rollouts_per_example" not in env.model_fields_set:
-                env.rollouts_per_example = self.rollouts_per_example
+            if "group_size" not in env.model_fields_set:
+                env.group_size = self.group_size
             if "interval" not in env.model_fields_set:
                 env.interval = self.interval
             if "num_workers" not in env.model_fields_set:
                 env.num_workers = self.num_workers
             if "max_retries" not in env.model_fields_set:
                 env.max_retries = self.max_retries
-            # Resolve auto num_workers now that num_examples and rollouts_per_example are set
+            # Resolve auto num_workers now that num_examples and group_size are set
             if env.num_workers == "auto":
                 if env.num_examples == -1:
                     env.num_workers = 4
                 else:
-                    max_concurrent = env.num_examples * env.rollouts_per_example
+                    max_concurrent = env.num_examples * env.group_size
                     env.num_workers = max(1, math.ceil(max_concurrent / 256))
         return self
 
@@ -622,7 +622,7 @@ class OrchestratorConfig(BaseConfig):
     max_inflight_rollouts: int | None = Field(None, ge=1)
     """Maximum number of rollouts kept in-flight. Required for token-based batching. With ``batch_size`` set, defaults to ``batch_size * oversampling_factor`` (or ``batch_size`` when ``oversampling_factor`` is unset)."""
 
-    rollouts_per_example: int = Field(1, ge=1)
+    group_size: int = Field(1, ge=1, validation_alias=AliasChoices("group_size", "rollouts_per_example"))
     """Output sequences returned per example during training."""
 
     seq_len: int = 2048
@@ -907,11 +907,11 @@ def resolve_batching(self):
                 raise ValueError("max_inflight_rollouts must be set when token_batch_size is set")
         else:
             assert self.batch_size is not None
-            if self.batch_size % self.rollouts_per_example != 0:
+            if self.batch_size % self.group_size != 0:
                 raise ValueError("Batch size must be divisible by the number of samples per problem")
             oversampling_factor = self.oversampling_factor if self.oversampling_factor is not None else 1.0
             resolved_max_inflight_rollouts = max(
-                self.rollouts_per_example,
+                self.group_size,
                 int(self.batch_size * oversampling_factor),
             )
             if self.max_inflight_rollouts is not None and self.oversampling_factor is not None:
@@ -921,7 +921,7 @@ def resolve_batching(self):
             if self.max_inflight_rollouts is None:
                 self.max_inflight_rollouts = resolved_max_inflight_rollouts
 
-        if self.max_inflight_rollouts is not None and self.max_inflight_rollouts < self.rollouts_per_example:
+        if self.max_inflight_rollouts is not None and self.max_inflight_rollouts < self.group_size:
             raise ValueError("max_inflight_rollouts must be at least the number of rollouts per example")
 
         # Resolve train env num_workers from max_inflight_rollouts
diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py
index c7ac150aa6..0fdace4e2c 100644
--- a/src/prime_rl/orchestrator/envs.py
+++ b/src/prime_rl/orchestrator/envs.py
@@ -142,12 +142,12 @@ async def run_group(
         client: vf.ClientConfig,
         example: dict,
         model_name: str,
-        rollouts_per_example: int,
+        group_size: int,
         cache_salt: str,
     ) -> list[vf.RolloutOutput]:
         """Run a group of rollouts for an example. Required for group-scoring envs."""
         return await self.env.run_group(
-            [vf.RolloutInput(**example) for _ in range(rollouts_per_example)],
+            [vf.RolloutInput(**example) for _ in range(group_size)],
             client=client,
             model=model_name,
             sampling_args=self._sampling_args_with_salt(cache_salt),
@@ -191,30 +191,30 @@ async def evaluate(
         cache_salt: str,
     ) -> list[vf.RolloutOutput]:
         num_examples = len(self.examples)
-        rollouts_per_example = self.config.rollouts_per_example
-        get_logger().info(f"Evaluating {self.name} ({num_examples=}, {rollouts_per_example=})")
-        total_rollouts = num_examples * rollouts_per_example
+        group_size = self.config.group_size
+        get_logger().info(f"Evaluating {self.name} ({num_examples=}, {group_size=})")
+        total_rollouts = num_examples * group_size
         pbar = ProgressTracker(total=total_rollouts, desc=f"Evaluating {self.name}")
         eval_start = time.perf_counter()
 
         if self.requires_group_scoring:
 
             async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None:
-                """Run rollouts_per_example rollouts as a scored group for one example."""
+                """Run group_size rollouts as a scored group for one example."""
                 try:
                     client = await get_client()
                     outputs = await self.run_group(
                         client=client,
                         example=example,
                         model_name=model_name,
-                        rollouts_per_example=rollouts_per_example,
+                        group_size=group_size,
                         cache_salt=cache_salt,
                     )
-                    pbar.update(rollouts_per_example)
+                    pbar.update(group_size)
                     return outputs
                 except Exception as e:
                     get_logger().warning(f"Group failed: {e}")
-                    pbar.update(rollouts_per_example)
+                    pbar.update(group_size)
                     return None
 
             coros = [run_with_progress(example) for example in self.examples]
@@ -235,7 +235,7 @@ async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None:
                     pbar.update(1)
                     return None
 
-            coros = [run_with_progress(example) for example in self.examples for _ in range(rollouts_per_example)]
+            coros = [run_with_progress(example) for example in self.examples for _ in range(group_size)]
 
         try:
             results = await asyncio.gather(*coros)
@@ -291,9 +291,7 @@ async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None:
             pass_at_k = None
             get_logger().warning("Skipping computing pass@k rates because the task rewards appear to be non-binary")
 
-        message = (
-            f"Evaluated {self.name} in {eval_time:.2f}s (Avg@{rollouts_per_example}={results_df.reward.mean():.4f}"
-        )
+        message = f"Evaluated {self.name} in {eval_time:.2f}s (Avg@{group_size}={results_df.reward.mean():.4f}"
         if could_be_binary:
             assert pass_at_k is not None
             for pass_rate, pass_rate_score in pd.Series(pass_at_k.mean()).items():
@@ -307,7 +305,7 @@ async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None:
         get_logger().success(message)
 
         eval_metrics = {
-            f"avg@{rollouts_per_example}": float(results_df.reward.mean()),
+            f"avg@{group_size}": float(results_df.reward.mean()),
             "no_response/mean": float(results_df.no_response.mean()),
             "no_response/count": int(results_df.no_response.sum()),
             "completion_len/mean": results_df.completion_len.mean().item(),
diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py
index 5e5932ef58..1871bfca85 100644
--- a/src/prime_rl/orchestrator/orchestrator.py
+++ b/src/prime_rl/orchestrator/orchestrator.py
@@ -614,7 +614,7 @@ def compute_solve_rates(df):
             """Compute solve_none, solve_all, effective_batch_size for a set of rollouts."""
             reward_per_problem = df.groupby(["env_name", "example_id"]).reward.sum()
             solve_none = (reward_per_problem == 0).mean()
-            solve_all = (reward_per_problem == config.rollouts_per_example).mean()
+            solve_all = (reward_per_problem == config.group_size).mean()
             return solve_none, solve_all, 1 - solve_none - solve_all
 
         # Group by (env_name, example_id) to average across rollouts within each problem
diff --git a/src/prime_rl/orchestrator/scheduler.py b/src/prime_rl/orchestrator/scheduler.py
index 7b24ad9e49..c608ae0d1e 100644
--- a/src/prime_rl/orchestrator/scheduler.py
+++ b/src/prime_rl/orchestrator/scheduler.py
@@ -80,7 +80,7 @@ def __init__(
         self.config = config
         self.batch_size = config.batch_size
         self.token_batch_size = config.token_batch_size
-        self.rollouts_per_example = config.rollouts_per_example
+        self.group_size = config.group_size
         self.max_inflight_rollouts = max_inflight_rollouts
         self.max_async_level = max_async_level
         self.max_off_policy_steps = max_off_policy_steps
@@ -219,7 +219,7 @@ async def schedule_rollout(self, group_id: int):
                     client=client_config,
                     example=group.example,
                     model_name=self.model_name,
-                    rollouts_per_example=rollout_count,
+                    group_size=rollout_count,
                     cache_salt=cache_salt,
                 )
             )
@@ -266,13 +266,13 @@ async def _schedule_next_request(self) -> bool:
                 await self.schedule_rollout(group_id=group_id)
                 return True
 
-        if remaining_capacity < self.rollouts_per_example:
+        if remaining_capacity < self.group_size:
             return False
 
         example = self.buffer.sample_examples(n=1)[0]
         group_id = self.next_group_id
         self.next_group_id += 1
-        self.groups[group_id] = GroupState(example=example, rollouts_to_schedule=self.rollouts_per_example)
+        self.groups[group_id] = GroupState(example=example, rollouts_to_schedule=self.group_size)
         await self.schedule_rollout(group_id=group_id)
         return True
 
@@ -475,16 +475,16 @@ async def generate_batch(self, step: int) -> list[vf.RolloutOutput]:
 
                     # Wait until every dispatched rollout has come back (succeeded
                     # or failed) before finalizing. The group may finalize as a
-                    # partial group (< rollouts_per_example) when some rollouts
+                    # partial group (< group_size) when some rollouts
                     # errored - downstream advantage computation groups by
                     # (env_name, example_id), so variable-size groups are fine.
-                    if len(group.completed_rollouts) + group.failed_rollouts < self.rollouts_per_example:
+                    if len(group.completed_rollouts) + group.failed_rollouts < self.group_size:
                         continue
 
                     if not group.completed_rollouts:
                         self.dropped_groups_by_env[env_name] += 1
                         self.logger.warning(
-                            f"Dropping group {group_id} ({env_name}) - all {self.rollouts_per_example} rollouts failed"
+                            f"Dropping group {group_id} ({env_name}) - all {self.group_size} rollouts failed"
                         )
                         self.groups.pop(group_id, None)
                         continue
@@ -492,7 +492,7 @@ async def generate_batch(self, step: int) -> list[vf.RolloutOutput]:
                     if group.failed_rollouts > 0:
                         self.logger.warning(
                             f"Partial group {group_id} ({env_name}) - "
-                            f"{len(group.completed_rollouts)}/{self.rollouts_per_example} valid "
+                            f"{len(group.completed_rollouts)}/{self.group_size} valid "
                             f"({group.failed_rollouts} failed)"
                         )
 
diff --git a/tests/unit/orchestrator/test_advantage.py b/tests/unit/orchestrator/test_advantage.py
index 93c2b7ea4e..6acd1057e4 100644
--- a/tests/unit/orchestrator/test_advantage.py
+++ b/tests/unit/orchestrator/test_advantage.py
@@ -288,7 +288,7 @@ def test_compute_advantages_without_config():
 
 
 def test_compute_advantages_partial_groups():
-    """Partial groups (size < rollouts_per_example) are advantaged against their own mean.
+    """Partial groups (size < group_size) are advantaged against their own mean.
 
     Two groups of different sizes must round-trip cleanly: each group's advantages
     must sum to zero and not leak into the other.
diff --git a/tests/unit/test_configs.py b/tests/unit/test_configs.py
index 66ce195bc6..77e6c84bf1 100644
--- a/tests/unit/test_configs.py
+++ b/tests/unit/test_configs.py
@@ -435,7 +435,7 @@ def test_shared_output_dir_propagates_through_cli(tmp_path):
             "seq_len": 128,
             "model": {"name": "Qwen/Qwen3-0.6B"},
             "trainer": {},
-            "orchestrator": {"batch_size": 16, "rollouts_per_example": 1},
+            "orchestrator": {"batch_size": 16, "group_size": 1},
             "inference": {},
         },
     )
diff --git a/tests/unit/train/rl/test_packer.py b/tests/unit/train/rl/test_packer.py
index 7068e0665a..187eeecf5a 100644
--- a/tests/unit/train/rl/test_packer.py
+++ b/tests/unit/train/rl/test_packer.py
@@ -29,7 +29,7 @@ def create_run_with_config(output_dir: Path, run_name: str) -> Path:
     config = {
         "model": {"name": "test-model"},
         "batch_size": 2,
-        "rollouts_per_example": 1,
+        "group_size": 1,
         "env": [{"id": "test-env"}],
         "sampling": {"temperature": 1.0},
         # test-model isn't in MODEL_RENDERER_MAP; bypass the renderer-resolution validator.
diff --git a/tests/unit/train/test_runs.py b/tests/unit/train/test_runs.py
index b80da9c91e..883ef91bdb 100644
--- a/tests/unit/train/test_runs.py
+++ b/tests/unit/train/test_runs.py
@@ -40,7 +40,7 @@ def create_run_with_config(
         config = {
             "model": {"name": "test-model"},
             "batch_size": 32,
-            "rollouts_per_example": 4,
+            "group_size": 4,
             "env": [{"id": "test-env"}],
             # test-model isn't in MODEL_RENDERER_MAP; bypass the renderer-resolution validator.
             "use_renderer": False,
@@ -201,7 +201,7 @@ def test_config_loading(tmp_path: Path) -> None:
         "model": {"name": "test-model"},
         "batch_size": 32,
         "max_steps": 1000,
-        "rollouts_per_example": 4,
+        "group_size": 4,
         "env": [{"id": "test-env"}],
         "use_renderer": False,
     }
@@ -246,7 +246,7 @@ def test_config_cleanup_on_deletion(tmp_path: Path) -> None:
     test_config = {
         "model": {"name": "test-model"},
         "batch_size": 16,
-        "rollouts_per_example": 4,
+        "group_size": 4,
         "env": [{"id": "test-env"}],
         "use_renderer": False,
     }
@@ -277,7 +277,7 @@ def test_config_invalid(tmp_path: Path) -> None:
     invalid_config = {
         "model": {"name": "test-model"},
         "batch_size": "not-a-number",  # Invalid type
-        "rollouts_per_example": 4,
+        "group_size": 4,
         "env": [{"id": "test-env"}],
     }
     run_dir = create_run_with_config(tmp_path, "run_invalid", config=invalid_config)