diff --git a/configs/experimental/dynamo/hendrycks_math_qwen4b_aime25.toml b/configs/experimental/dynamo/hendrycks_math_qwen4b_aime25.toml
new file mode 100644
index 0000000000..eb62945a1f
--- /dev/null
+++ b/configs/experimental/dynamo/hendrycks_math_qwen4b_aime25.toml
@@ -0,0 +1,63 @@
+max_steps = 500
+seq_len = 2048
+max_async_level = 1
+
+[wandb]
+project = "hendrycks-math-qwen4b"
+group = "dynamo-vllm-qwen4b-aime25-500-bs64"
+name = "dynamo-qwen4b-aime25-500-bs64"
+
+[model]
+name = "Qwen/Qwen3-4B-Instruct-2507"
+
+[orchestrator]
+batch_size = 64
+max_inflight_rollouts = 64
+rollouts_per_example = 4
+num_train_workers = 8
+max_off_policy_steps = 8
+use_token_client = false
+
+[orchestrator.train.sampling]
+temperature = 1.0
+repetition_penalty = 1.0
+max_completion_tokens = 1024
+min_tokens = 0
+extra_body = { top_k = -1, min_p = 0.0 }
+
+[[orchestrator.train.env]]
+id = "math-env"
+name = "hendrycks-math"
+args = { dataset_name = "PrimeIntellect/Hendrycks-Math", dataset_subset = "default", math_verify_max_workers = 128, math_verify_timeout = 60 }
+
+[orchestrator.buffer]
+easy_threshold = 1.0
+hard_threshold = 0.0
+
+[orchestrator.eval]
+interval = 100
+eval_base_model = false
+skip_eval_on_resume = true
+
+[orchestrator.eval.sampling]
+max_completion_tokens = 1024
+
+[[orchestrator.eval.env]]
+id = "aime2025"
+name = "aime2025"
+num_examples = 30
+rollouts_per_example = 4
+interval = 100
+
+[trainer]
+
+[inference]
+backend = "dynamo"
+vllm_extra = { max_num_seqs = 64 }
+
+[inference.dynamo]
+deploy_router = true
+router_mode = "round-robin"
+discovery_backend = "file"
+request_plane = "tcp"
+system_port = 8081
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/inference.py b/packages/prime-rl-configs/src/prime_rl/configs/inference.py
index 805481de76..b638aa3867 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/inference.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/inference.py
@@ -10,6 +10,17 @@
 
 # TODO: Set thinking/ solution budget
 
+InferenceBackend: TypeAlias = Literal["vllm", "dynamo"]
+DynamoRouterMode: TypeAlias = Literal[
+    "round-robin",
+    "random",
+    "kv",
+    "direct",
+    "power-of-two",
+    "least-loaded",
+    "device-aware-weighted",
+]
+
 
 class ServerConfig(BaseConfig):
     """Configures the inference server."""
@@ -120,6 +131,78 @@ class WeightBroadcastConfig(BaseConfig):
     )
 
 
+class DynamoConfig(BaseConfig):
+    """Configures the experimental Dynamo backend."""
+
+    deploy_router: Annotated[
+        bool,
+        Field(
+            description=(
+                "Launch Dynamo's frontend/router from the inference entrypoint. Disable only when an "
+                "external Dynamo frontend/router is already deployed and orchestrator.client.base_url points at it."
+            ),
+        ),
+    ] = True
+
+    namespace: Annotated[
+        str,
+        Field(description="Dynamo namespace used by the frontend and worker for service discovery."),
+    ] = "dynamo"
+
+    discovery_backend: Annotated[
+        Literal["file", "etcd", "kubernetes", "mem"],
+        Field(description="Dynamo discovery backend. Use 'file' for local single-node runs without etcd/NATS."),
+    ] = "file"
+
+    request_plane: Annotated[
+        Literal["tcp", "nats", "http"],
+        Field(description="Dynamo request plane used between the frontend and worker."),
+    ] = "tcp"
+
+    router_mode: Annotated[
+        DynamoRouterMode,
+        Field(description="Dynamo frontend router mode used for OpenAI chat-completions traffic."),
+    ] = "round-robin"
+
+    min_initial_workers: Annotated[
+        int | None,
+        Field(
+            ge=0,
+            description="Optional Dynamo frontend minimum worker count before the router starts serving requests.",
+        ),
+    ] = None
+
+    event_plane: Annotated[
+        Literal["nats", "zmq"] | None,
+        Field(description="Dynamo event plane. If None, Dynamo derives it from discovery_backend."),
+    ] = None
+
+    system_port: Annotated[
+        int,
+        Field(ge=1, le=65535, description="Dynamo worker system-server port for /health and /engine routes."),
+    ] = 8081
+
+    use_vllm_tokenizer: Annotated[
+        bool,
+        Field(
+            description=(
+                "Use Dynamo's text-in/text-out vLLM worker path. prime-rl defaults this off so the "
+                "Dynamo frontend tokenizes OpenAI requests before dispatching them to the worker."
+            ),
+        ),
+    ] = False
+
+    frontend_extra: Annotated[
+        dict[str, Any],
+        Field(description="Extra CLI arguments for python -m dynamo.frontend."),
+    ] = {}
+
+    worker_extra: Annotated[
+        dict[str, Any],
+        Field(description="Extra CLI arguments for the Dynamo vLLM worker."),
+    ] = {}
+
+
 class KVCacheOffloadConfig(BaseModel):
     """CPU KV cache offloading for vLLM inference workers."""
 
@@ -252,6 +335,16 @@ class InferenceExperimentalConfig(BaseConfig):
 class InferenceConfig(BaseConfig):
     """Configures inference."""
 
+    backend: Annotated[
+        InferenceBackend,
+        Field(
+            description=(
+                "Inference server backend to launch. vLLM is the default; Dynamo is an experimental "
+                "backend for OpenAI-compatible rollouts with prime-rl weight updates."
+            )
+        ),
+    ] = "vllm"
+
     # The server configuration
     server: ServerConfig = ServerConfig()
 
@@ -384,6 +477,11 @@ class InferenceConfig(BaseConfig):
         WeightBroadcastConfig()
     )
 
+    dynamo: Annotated[
+        DynamoConfig,
+        Field(description="Experimental Dynamo-specific settings used when backend='dynamo'."),
+    ] = DynamoConfig()
+
     kv_cache_offload: Annotated[
         KVCacheOffloadConfig | None,
         Field(
@@ -447,6 +545,17 @@ def validate_multi_node_requires_slurm(self):
             raise ValueError("Must use SLURM for multi-node deployment.")
         return self
 
+    @model_validator(mode="after")
+    def validate_backend_support(self):
+        if self.backend != "dynamo":
+            return self
+
+        if self.deployment.type != "single_node":
+            raise ValueError("inference.backend='dynamo' currently supports only single_node deployment.")
+        if self.kv_cache_offload is not None:
+            raise ValueError("inference.backend='dynamo' does not support prime-rl kv_cache_offload plumbing yet.")
+        return self
+
     @model_validator(mode="after")
     def auto_setup_kv_cache_offload(self):
         if self.kv_cache_offload is not None:
@@ -603,3 +712,110 @@ def to_vllm(self) -> Namespace:
                 delattr(namespace, "rope_scaling")
 
         return namespace
+
+    def to_dynamo_frontend(self) -> Namespace:
+        """Convert InferenceConfig to Dynamo frontend CLI namespace."""
+        namespace = Namespace()
+        to_frontend = {
+            "server.host": "http_host",
+            "server.port": "http_port",
+            "dynamo.namespace": "namespace",
+            "dynamo.discovery_backend": "discovery_backend",
+            "dynamo.request_plane": "request_plane",
+            "dynamo.router_mode": "router_mode",
+            "dynamo.min_initial_workers": "min_initial_workers",
+            "dynamo.event_plane": "event_plane",
+            "model.name": "model_name",
+        }
+
+        for config_key, frontend_key in to_frontend.items():
+            value = rgetattr(self, config_key.replace("-", "_"))
+            rsetattr(namespace, frontend_key, value)
+
+        namespace.dyn_chat_processor = "vllm"
+
+        for key, value in self.dynamo.frontend_extra.items():
+            setattr(namespace, key, value)
+
+        for optional_key in ("http_host", "event_plane", "model_name", "min_initial_workers"):
+            value = getattr(namespace, optional_key, None)
+            if value is None:
+                delattr(namespace, optional_key)
+
+        return namespace
+
+    def to_dynamo_vllm(self) -> Namespace:
+        """Convert InferenceConfig to Dynamo vLLM worker CLI namespace."""
+        namespace = Namespace()
+        to_dynamo_vllm = {
+            "dynamo.namespace": "namespace",
+            "dynamo.discovery_backend": "discovery_backend",
+            "dynamo.request_plane": "request_plane",
+            "dynamo.event_plane": "event_plane",
+            "dynamo.use_vllm_tokenizer": "use_vllm_tokenizer",
+            "model.name": "model",
+            "model.dtype": "dtype",
+            "model.max_model_len": "max_model_len",
+            "model.enforce_eager": "enforce_eager",
+            "model.trust_remote_code": "trust_remote_code",
+            "model.rope_scaling": "rope_scaling",
+            "parallel.tp": "tensor_parallel_size",
+            "parallel.dp": "data_parallel_size",
+            "data_parallel_size_local": "data_parallel_size_local",
+            "data_parallel_rpc_port": "data_parallel_rpc_port",
+            "enable_lora": "enable_lora",
+            "enable_prefix_caching": "enable_prefix_caching",
+            "max_loras": "max_loras",
+            "max_cpu_loras": "max_cpu_loras",
+            "max_lora_rank": "max_lora_rank",
+            "lora_target_modules": "lora_target_modules",
+            "gpu_memory_utilization": "gpu_memory_utilization",
+            "enable_return_routed_experts": "enable_return_routed_experts",
+            "enable_expert_parallel": "enable_expert_parallel",
+            "all2all_backend": "all2all_backend",
+            "enable_eplb": "enable_eplb",
+            "enable_dbo": "enable_dbo",
+            "seed": "seed",
+        }
+
+        for config_key, dynamo_key in to_dynamo_vllm.items():
+            value = rgetattr(self, config_key.replace("-", "_"))
+            rsetattr(namespace, dynamo_key, value)
+
+        namespace.served_model_name = self.model.name
+
+        if self.model.tool_call_parser not in (None, "auto"):
+            namespace.dyn_tool_call_parser = self.model.tool_call_parser
+        if self.model.reasoning_parser is not None:
+            namespace.dyn_reasoning_parser = self.model.reasoning_parser
+
+        namespace.logprobs_mode = "processed_logprobs"
+
+        if self.enable_fp32_lm_head:
+            existing = getattr(namespace, "additional_config", None) or {}
+            existing["fp32_lm_head"] = True
+            rsetattr(namespace, "additional_config", existing)
+
+        for key, value in self.vllm_extra.items():
+            setattr(namespace, key, value)
+        for key, value in self.dynamo.worker_extra.items():
+            setattr(namespace, key, value)
+
+        for optional_key in (
+            "event_plane",
+            "rope_scaling",
+            "data_parallel_size_local",
+            "data_parallel_rpc_port",
+            "enable_prefix_caching",
+            "max_lora_rank",
+            "lora_target_modules",
+            "additional_config",
+        ):
+            if not hasattr(namespace, optional_key):
+                continue
+
+            value = getattr(namespace, optional_key)
+            if value is None:
+                delattr(namespace, optional_key)
+
+        return namespace
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
index 3911816227..a5753af8c6 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
@@ -1339,5 +1339,6 @@ def resolve_env_config(self):
             if is_vllm:
                 env.sampling.extra_body.setdefault("top_k", -1)
                 env.sampling.extra_body.setdefault("min_p", 0.0)
-                env.sampling.extra_body.setdefault("return_token_ids", True)
+                if self.use_token_client:
+                    env.sampling.extra_body.setdefault("return_token_ids", True)
         return self
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/rl.py b/packages/prime-rl-configs/src/prime_rl/configs/rl.py
index a160af2c9f..e403dd4c8d 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/rl.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/rl.py
@@ -681,6 +681,35 @@ def auto_setup_weight_broadcast(self):
 
         return self
 
+    @model_validator(mode="after")
+    def auto_setup_dynamo_backend(self):
+        """Configure orchestrator compatibility for the experimental Dynamo backend."""
+        if self.inference is None or self.inference.backend != "dynamo":
+            return self
+
+        self.orchestrator.client.admin_backend = "dynamo"
+        if self.orchestrator.client.admin_base_url is None:
+            self.orchestrator.client.admin_base_url = [f"http://localhost:{self.inference.dynamo.system_port}"]
+
+        if self.orchestrator.use_renderer:
+            raise ValueError(
+                "inference.backend='dynamo' does not support orchestrator.use_renderer because prime-rl's "
+                "renderer client targets the vLLM-only /v1/generate endpoint."
+            )
+
+        if self.orchestrator.use_token_client:
+            if "use_token_client" in self.orchestrator.model_fields_set:
+                raise ValueError(
+                    "inference.backend='dynamo' does not support orchestrator.use_token_client because Dynamo "
+                    "does not expose prime-rl's vLLM-only /v1/chat/completions/tokens endpoint."
+                )
+            self.orchestrator.use_token_client = False
+
+        for env in self.orchestrator.train.env:
+            env.sampling.extra_body.pop("return_token_ids", None)
+
+        return self
+
     @model_validator(mode="after")
     def validate_eplb_requires_quantized_weight_transfer(self):
         if self.inference is None or not self.inference.enable_eplb:
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/shared.py b/packages/prime-rl-configs/src/prime_rl/configs/shared.py
index d26c33d9a9..f999d1bf54 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/shared.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/shared.py
@@ -79,6 +79,7 @@ def resolve_project_dir(self):
 
 
 ServerType = Literal["vllm", "openai"]
+AdminBackend = Literal["vllm", "dynamo"]
 
 
 class VLMConfig(BaseConfig):
@@ -310,6 +311,11 @@ class ClientConfig(BaseConfig):
         ),
     ] = None
 
+    admin_backend: Annotated[
+        AdminBackend,
+        Field(description="Backend API exposed by admin_base_url/base_url for weight updates and health checks."),
+    ] = "vllm"
+
     elastic: Annotated[
         ElasticConfig | None,
         Field(
diff --git a/pyproject.toml b/pyproject.toml
index e130ba0f9c..a6a162d64e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -86,6 +86,10 @@ gpt-oss = [
 quack = [
     "quack-kernels>=0.3.3",
 ]
+dynamo = [
+    "ai-dynamo==1.1.1",
+    "blake3>=1.0.0,<2.0.0",
+]
 all = [
     "prime-rl[flash-attn]",
     "prime-rl[disagg]",
@@ -129,6 +133,8 @@ override-dependencies = [
 [tool.uv.exclude-newer-package]
 # we want latest vllm, remove next patch
 vllm = false
+ai-dynamo = false
+ai-dynamo-runtime = false
 flash_attn_3 = false
 # Self-vendored packages on our primeintellect index
 reverse-text = false
diff --git a/skills/config/SKILL.md b/skills/config/SKILL.md
index e8dc13216c..b9afa50528 100644
--- a/skills/config/SKILL.md
+++ b/skills/config/SKILL.md
@@ -132,6 +132,25 @@ On the CLI, pass as a JSON string:
 uv run inference --vllm-extra '{"key1": "value1", "key2": 123}'
 ```
 
+### Experimental Dynamo backend
+
+Dynamo support is intentionally isolated behind `inference.backend = "dynamo"` and requires installing the optional Dynamo extra. The launcher starts Dynamo's frontend/router and a Dynamo vLLM worker from `prime_rl.experimental.dynamo`:
+
+```toml
+[inference]
+backend = "dynamo"
+
+[inference.dynamo]
+deploy_router = true
+router_mode = "round-robin"
+system_port = 8081
+discovery_backend = "file"
+```
+
+The public rollout endpoint is Dynamo's OpenAI-compatible frontend on `inference.server.port`. Admin calls for health, NCCL setup, and weight updates use the worker system server on `inference.dynamo.system_port`. The RL config switches rollout training to the standard OpenAI chat-completions client and points admin weight updates at the Dynamo worker system server.
+
+Set `inference.dynamo.deploy_router = false` only when an external Dynamo frontend/router is already deployed and `orchestrator.client.base_url` points at it. With `discovery_backend = "file"`, also set `DYN_FILE_KV` to a discovery directory shared by the external router and worker.
+
 ### Discriminated unions
 
 Some config fields use discriminated unions (e.g. loss type, data type). Set the `type` field to select the variant:
diff --git a/skills/entrypoints/SKILL.md b/skills/entrypoints/SKILL.md
index f6589aca97..deb38dd2e2 100644
--- a/skills/entrypoints/SKILL.md
+++ b/skills/entrypoints/SKILL.md
@@ -39,7 +39,7 @@ The entrypoint launches torchrun internally — no need to call torchrun directl
 
 ## `inference` — Standalone inference server
 
-Launches a vLLM-based inference server with OpenAI-compatible API.
+Launches a vLLM-based inference server with OpenAI-compatible API. The experimental Dynamo backend is available with `inference.backend = "dynamo"` after installing the optional Dynamo extra.
 
 ```bash
 uv run inference @ configs/debug/infer.toml
@@ -48,10 +48,20 @@ uv run inference --model.name Qwen/Qwen3-0.6B --model.enforce-eager
 
 Always use the `inference` entrypoint — never `vllm serve` directly.
 
+For Dynamo:
+
+```bash
+uv run --extra dynamo inference @ configs/debug/infer.toml --backend dynamo
+```
+
+Dynamo code lives under `prime_rl.experimental.dynamo`; the launcher exposes Dynamo's OpenAI frontend/router on `inference.server.port` and uses `inference.dynamo.system_port` for weight-update/admin routes on the worker. `inference.dynamo.deploy_router` defaults to true for local `rl` testing; set it false only when `orchestrator.client.base_url` points at an externally deployed Dynamo router.
+
 Custom endpoints beyond standard OpenAI API:
 - `/v1/chat/completions/tokens` — accepts token IDs as prompt input
 - `/update_weights` — hot-reload model weights from the trainer
 - `/load_lora_adapter` — load LoRA adapters at runtime
+
+For Dynamo, generation should go through Dynamo's standard `/v1/chat/completions` frontend route. prime-rl adds only worker admin routes under `/engine/*` for liveness, NCCL broadcaster setup, and weight updates.
 - `/init_broadcaster` — initialize weight broadcast for distributed training
 
 Check health with:
diff --git a/src/prime_rl/entrypoints/inference.py b/src/prime_rl/entrypoints/inference.py
index a14ae26e63..576294f9ea 100644
--- a/src/prime_rl/entrypoints/inference.py
+++ b/src/prime_rl/entrypoints/inference.py
@@ -131,9 +131,16 @@ def inference_local(config: InferenceConfig):
 
     setup_vllm_env(config)
 
-    from prime_rl.inference.vllm.server import server  # pyright: ignore
+    if config.backend == "vllm":
+        from prime_rl.inference.vllm.server import server  # pyright: ignore
 
-    server(config, vllm_extra=config.vllm_extra)
+        server(config, vllm_extra=config.vllm_extra)
+    elif config.backend == "dynamo":
+        from prime_rl.experimental.dynamo.server import server
+
+        server(config)
+    else:
+        raise ValueError(f"Unsupported inference backend: {config.backend}")
 
 
 def inference(config: InferenceConfig):
diff --git a/src/prime_rl/entrypoints/rl.py b/src/prime_rl/entrypoints/rl.py
index b740f58ba2..4622640630 100644
--- a/src/prime_rl/entrypoints/rl.py
+++ b/src/prime_rl/entrypoints/rl.py
@@ -153,7 +153,12 @@ def rl_local(config: RLConfig):
     check_gpus_available(all_gpu_ids)
 
     # Validate client port matches inference server port
-    if config.inference is not None and not config.orchestrator.client.is_elastic:
+    local_router_deployed = not (
+        config.inference is not None
+        and config.inference.backend == "dynamo"
+        and not config.inference.dynamo.deploy_router
+    )
+    if config.inference is not None and not config.orchestrator.client.is_elastic and local_router_deployed:
         from urllib.parse import urlparse
 
         base_url = config.orchestrator.client.base_url[0]
diff --git a/src/prime_rl/experimental/__init__.py b/src/prime_rl/experimental/__init__.py
new file mode 100644
index 0000000000..ffcf35181b
--- /dev/null
+++ b/src/prime_rl/experimental/__init__.py
@@ -0,0 +1 @@
+"""Experimental integrations for prime-rl."""
diff --git a/src/prime_rl/experimental/dynamo/__init__.py b/src/prime_rl/experimental/dynamo/__init__.py
new file mode 100644
index 0000000000..32368aab91
--- /dev/null
+++ b/src/prime_rl/experimental/dynamo/__init__.py
@@ -0,0 +1 @@
+"""Dynamo inference backend integration."""
diff --git a/src/prime_rl/experimental/dynamo/server.py b/src/prime_rl/experimental/dynamo/server.py
new file mode 100644
index 0000000000..19e9af0da8
--- /dev/null
+++ b/src/prime_rl/experimental/dynamo/server.py
@@ -0,0 +1,152 @@
+from __future__ import annotations
+
+import json
+import os
+import signal
+import subprocess
+import sys
+import tempfile
+import time
+from argparse import Namespace
+from pathlib import Path
+from typing import Any
+
+from prime_rl.configs.inference import InferenceConfig
+from prime_rl.inference.vllm.server import WORKER_EXTENSION_CLS
+from prime_rl.utils.logger import get_logger
+from prime_rl.utils.nccl import disable_nccl_p2p_if_unavailable
+
+
+def _format_cli_value(value: Any) -> str:
+    if isinstance(value, Path):
+        return value.as_posix()
+    if isinstance(value, (dict, list, tuple)):
+        return json.dumps(value)
+    return str(value)
+
+
+def _namespace_to_cli_args(namespace: Namespace) -> list[str]:
+    args: list[str] = []
+    for key, value in vars(namespace).items():
+        if value is None:
+            continue
+
+        flag = f"--{key.replace('_', '-')}"
+        if value is True:
+            args.append(flag)
+        elif value is False:
+            continue
+        else:
+            args.extend([flag, _format_cli_value(value)])
+    return args
+
+
+def _terminate(processes: list[subprocess.Popen]) -> None:
+    for process in processes:
+        if process.poll() is None:
+            process.terminate()
+    for process in processes:
+        try:
+            process.wait(timeout=30)
+        except subprocess.TimeoutExpired:
+            process.kill()
+
+
+def _build_worker_namespace(config: InferenceConfig) -> Namespace:
+    namespace = config.to_dynamo_vllm()
+    namespace.worker_extension_cls = WORKER_EXTENSION_CLS[config.weight_broadcast.type]
+    return namespace
+
+
+def server(config: InferenceConfig) -> None:
+    """Launch a Dynamo vLLM worker, optionally with Dynamo's frontend/router."""
+    logger = get_logger()
+    disable_nccl_p2p_if_unavailable()
+
+    worker_command = [
+        sys.executable,
+        "-m",
+        "prime_rl.experimental.dynamo.worker",
+        *_namespace_to_cli_args(_build_worker_namespace(config)),
+    ]
+
+    base_env = os.environ.copy()
+    base_env.setdefault("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
+    if config.enable_lora:
+        base_env["VLLM_ALLOW_RUNTIME_LORA_UPDATING"] = "True"
+
+    discovery_dir = None
+    if (
+        not config.dynamo.deploy_router
+        and config.dynamo.discovery_backend == "file"
+        and "DYN_FILE_KV" not in base_env
+    ):
+        raise ValueError(
+            "inference.dynamo.deploy_router=false with discovery_backend='file' requires DYN_FILE_KV to point "
+            "at a file discovery directory shared with the external Dynamo frontend/router."
+        )
+    if not config.dynamo.deploy_router and config.dynamo.discovery_backend == "mem":
+        raise ValueError(
+            "inference.dynamo.deploy_router=false cannot use discovery_backend='mem' because the external "
+            "Dynamo frontend/router cannot share in-process discovery state with this worker."
+        )
+    if config.dynamo.deploy_router and config.dynamo.discovery_backend == "file" and "DYN_FILE_KV" not in base_env:
+        discovery_dir = tempfile.TemporaryDirectory(prefix="prime_rl_dynamo_")
+        base_env["DYN_FILE_KV"] = discovery_dir.name
+    if "DYN_EVENT_PLANE" not in base_env:
+        if config.dynamo.event_plane is not None:
+            base_env["DYN_EVENT_PLANE"] = config.dynamo.event_plane
+        elif config.dynamo.discovery_backend in ("file", "mem"):
+            base_env["DYN_EVENT_PLANE"] = "zmq"
+
+    frontend_env = base_env.copy()
+    frontend_env.pop("DYN_SYSTEM_PORT", None)
+
+    worker_env = base_env.copy()
+    worker_env["DYN_SYSTEM_PORT"] = str(config.dynamo.system_port)
+
+    process_specs: list[tuple[str, list[str], dict[str, str]]] = []
+    if config.dynamo.deploy_router:
+        frontend_namespace = config.to_dynamo_frontend()
+        frontend_command = [
+            sys.executable,
+            "-m",
+            "dynamo.frontend",
+            *_namespace_to_cli_args(frontend_namespace),
+        ]
+        process_specs.append(("Dynamo frontend/router", frontend_command, frontend_env))
+    else:
+        logger.info("Skipping Dynamo frontend/router launch; using external router deployment.")
+
+    process_specs.append(("Dynamo vLLM worker", worker_command, worker_env))
+
+    for name, command, _env in process_specs:
+        logger.info(f"Starting {name}: {' '.join(command)}")
+
+    processes: list[subprocess.Popen] = []
+
+    def handle_signal(signum, _frame):
+        logger.warning(f"Received signal {signum}, terminating Dynamo processes")
+        _terminate(processes)
+        raise SystemExit(128 + signum)
+
+    previous_sigterm = signal.signal(signal.SIGTERM, handle_signal)
+    previous_sigint = signal.signal(signal.SIGINT, handle_signal)
+
+    try:
+        for _name, command, env in process_specs:
+            processes.append(subprocess.Popen(command, env=env))
+
+        while True:
+            for process in processes:
+                return_code = process.poll()
+                if return_code is not None:
+                    _terminate(processes)
+                    raise SystemExit(return_code)
+            time.sleep(1)
+    finally:
+        signal.signal(signal.SIGTERM, previous_sigterm)
+        signal.signal(signal.SIGINT, previous_sigint)
+        _terminate(processes)
+        if discovery_dir is not None:
+            discovery_dir.cleanup()
diff --git a/src/prime_rl/experimental/dynamo/worker.py b/src/prime_rl/experimental/dynamo/worker.py
new file mode 100644
index 0000000000..0b668f68af
--- /dev/null
+++ b/src/prime_rl/experimental/dynamo/worker.py
@@ -0,0 +1,102 @@
+"""Dynamo vLLM worker entrypoint with prime-rl admin routes."""
+
+from __future__ import annotations
+
+from typing import Any, Callable
+
+from prime_rl.utils.logger import get_logger
+
+logger = get_logger()
+
+
+async def _pause_generation(handler, body: dict[str, Any]) -> dict[str, str]:
+    mode = body.get("mode", "keep")
+    clear_cache = bool(body.get("clear_cache", False))
+    await handler.engine_client.pause_generation(mode=mode, clear_cache=clear_cache)
+    return {"status": "ok"}
+
+
+async def _resume_generation(handler, _body: dict[str, Any]) -> dict[str, str]:
+    await handler.engine_client.resume_generation()
+    return {"status": "ok"}
+
+
+async def _update_weights(handler, body: dict[str, Any]) -> dict[str, str]:
+    weight_dir = body.get("weight_dir")
+    await handler.engine_client.pause_generation(mode="keep", clear_cache=False)
+    try:
+        await handler.engine_client.collective_rpc("update_weights_from_path", args=(weight_dir,))
+        reset_prefix_cache = bool(body.get("reset_prefix_cache", True))
+        if reset_prefix_cache and hasattr(handler.engine_client, "reset_prefix_cache"):
+            await handler.engine_client.reset_prefix_cache()
+    finally:
+        await handler.engine_client.resume_generation()
+    return {"status": "ok"}
+
+
+async def _init_broadcaster(handler, body: dict[str, Any]) -> dict[str, str]:
+    await handler.engine_client.collective_rpc(
+        "init_broadcaster",
+        args=(
+            body.get("host"),
+            body.get("port"),
+            body.get("rank_offset"),
+            body.get("inference_world_size"),
+            body.get("timeout"),
+            body.get("quantize_in_weight_transfer", False),
+        ),
+    )
+    return {"status": "ok"}
+
+
+async def _liveness_probe(handler, _body: dict[str, Any]) -> dict[str, str]:
+    await handler.engine_client.collective_rpc("liveness_probe")
+    return {"status": "ok"}
+
+
+def _bind(handler, callback: Callable[[Any, dict[str, Any]], Any]):
+    async def route(body: dict[str, Any] | None = None):
+        return await callback(handler, body or {})
+
+    return route
+
+
+def patch_dynamo_vllm_worker() -> None:
+    """Register prime-rl admin routes on Dynamo's worker system server."""
+    from dynamo.vllm.handlers import BaseWorkerHandler
+
+    if getattr(BaseWorkerHandler, "_prime_rl_admin_routes_patched", False):
+        return
+
+    original_init = BaseWorkerHandler.__init__
+
+    def patched_init(self, runtime, *args, **kwargs) -> None:
+        original_init(self, runtime, *args, **kwargs)
+        if getattr(self, "_prime_rl_admin_routes_registered", False):
+            return
+        runtime.register_engine_route("pause", _bind(self, _pause_generation))
+        runtime.register_engine_route("resume", _bind(self, _resume_generation))
+        runtime.register_engine_route("update_weights", _bind(self, _update_weights))
+        runtime.register_engine_route("init_broadcaster", _bind(self, _init_broadcaster))
+        runtime.register_engine_route("liveness", _bind(self, _liveness_probe))
+        self._prime_rl_admin_routes_registered = True
+        logger.info(
+            "Registered prime-rl Dynamo admin routes: "
+            "/engine/pause, /engine/resume, /engine/update_weights, /engine/init_broadcaster, "
+            "/engine/liveness"
+        )
+
+    BaseWorkerHandler.__init__ = patched_init
+    BaseWorkerHandler._prime_rl_admin_routes_patched = True
+
+
+def main() -> None:
+    patch_dynamo_vllm_worker()
+
+    from dynamo.vllm.main import main as dynamo_vllm_main
+
+    dynamo_vllm_main()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/prime_rl/inference/server.py b/src/prime_rl/inference/server.py
index 322f0145eb..9558a006b1 100644
--- a/src/prime_rl/inference/server.py
+++ b/src/prime_rl/inference/server.py
@@ -19,9 +19,16 @@ def main():
     setup_vllm_env(config)
 
     # We import here to be able to set environment variables before importing vLLM
-    from prime_rl.inference.vllm.server import server  # pyright: ignore
+    if config.backend == "vllm":
+        from prime_rl.inference.vllm.server import server  # pyright: ignore
 
-    server(config, vllm_extra=config.vllm_extra)
+        server(config, vllm_extra=config.vllm_extra)
+    elif config.backend == "dynamo":
+        from prime_rl.experimental.dynamo.server import server
+
+        server(config)
+    else:
+        raise ValueError(f"Unsupported inference backend: {config.backend}")
 
 
 if __name__ == "__main__":
diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py
index 1de52994a9..c5d9047e0a 100644
--- a/src/prime_rl/orchestrator/envs.py
+++ b/src/prime_rl/orchestrator/envs.py
@@ -104,10 +104,14 @@ def _spawn(
         self._env_server_process = process
         return address
 
-    def _sampling_args_with_salt(self, cache_salt: str) -> dict:
+    def _sampling_args_with_salt(self, cache_salt: str | None) -> dict:
         sampling_args = {**self.sampling_args}
-        extra_body = {**sampling_args.get("extra_body", {}), "cache_salt": cache_salt}
-        sampling_args["extra_body"] = extra_body
+        if cache_salt is not None:
+            extra_body = {**sampling_args.get("extra_body", {}), "cache_salt": cache_salt}
+            sampling_args["extra_body"] = extra_body
+        else:
+            sampling_args.pop("logprobs", None)
+            sampling_args.pop("top_logprobs", None)
         return sampling_args
 
     async def run_rollout(
@@ -115,7 +119,7 @@ async def run_rollout(
         client: vf.ClientConfig,
         example: dict,
         model_name: str,
-        cache_salt: str,
+        cache_salt: str | None,
     ) -> vf.RolloutOutput:
         """Run a single rollout for an example."""
         return await self.env.run_rollout(
@@ -134,7 +138,7 @@ async def run_group(
         example: dict,
         model_name: str,
         rollouts_per_example: int,
-        cache_salt: str,
+        cache_salt: str | None,
     ) -> list[vf.RolloutOutput]:
         """Run a group of rollouts for an example. Required for group-scoring envs."""
         return await self.env.run_group(
@@ -179,7 +183,7 @@ async def evaluate(
         get_client: Callable[[], Awaitable[vf.ClientConfig]],
         ckpt_step: int,
         step: int,
-        cache_salt: str,
+        cache_salt: str | None,
     ) -> list[vf.RolloutOutput]:
         num_examples = len(self.examples)
         rollouts_per_example = self.config.rollouts_per_example
diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py
index bc1128ebc7..689b691288 100644
--- a/src/prime_rl/orchestrator/orchestrator.py
+++ b/src/prime_rl/orchestrator/orchestrator.py
@@ -394,7 +394,7 @@ async def orchestrate(config: OrchestratorConfig):
                         get_client=inference_pool.get_eval_client,
                         ckpt_step=ckpt_step,
                         step=progress.step,
-                        cache_salt=str(ckpt_step),
+                        cache_salt=str(ckpt_step) if scheduler.supports_cache_salt else None,
                     )
                     for eval_env in envs_to_eval
                 ]
@@ -813,7 +813,7 @@ def compute_solve_rates(df):
                     get_client=inference_pool.get_eval_client,
                     ckpt_step=ckpt_step,
                     step=progress.step,
-                    cache_salt=str(ckpt_step),
+                    cache_salt=str(ckpt_step) if scheduler.supports_cache_salt else None,
                 )
                 for eval_env in eval_envs
             ]
diff --git a/src/prime_rl/orchestrator/scheduler.py b/src/prime_rl/orchestrator/scheduler.py
index a8427b69f7..7e02ccd147 100644
--- a/src/prime_rl/orchestrator/scheduler.py
+++ b/src/prime_rl/orchestrator/scheduler.py
@@ -88,6 +88,7 @@ def __init__(
         self.lora_name = lora_name
         self.model_name = self.config.model.name
         self.json_logging = config.log.json_logging
+        self.supports_cache_salt = config.client.admin_backend != "dynamo"
 
         # Inference pool - used for admin operations (adapter sync) and metrics
         self.inference_pool = inference_pool
@@ -199,7 +200,7 @@ async def schedule_rollout(self, group_id: int):
         env_name = group.example["env_name"]
         env = self.train_envs.get(env_name)
 
-        cache_salt = str(self.ckpt_step)
+        cache_salt = str(self.ckpt_step) if self.supports_cache_salt else None
         if env.requires_group_scoring:
             rollout_count = group.rollouts_to_schedule
             group.rollouts_to_schedule = 0
diff --git a/src/prime_rl/utils/client.py b/src/prime_rl/utils/client.py
index 21659dfc46..201117a429 100644
--- a/src/prime_rl/utils/client.py
+++ b/src/prime_rl/utils/client.py
@@ -15,6 +15,8 @@
 from prime_rl.configs.shared import ClientConfig
 from prime_rl.utils.logger import get_logger
 
+ADMIN_BACKEND_ATTR = "prime_rl_admin_backend"
+
 
 @runtime_checkable
 class InferencePool(Protocol):
@@ -81,6 +83,11 @@ def __init__(
         )
         self._eval_clients = setup_clients(client_config, client_type=eval_client_type)
         self._admin_clients = setup_admin_clients(client_config)
+        self._model_clients = (
+            setup_admin_clients(client_config, use_admin_base_url=False)
+            if client_config.admin_base_url
+            else self._admin_clients
+        )
         self._skip_model_check = client_config.skip_model_check
         self._wait_for_ready_timeout = client_config.wait_for_ready_timeout
         self._eval_cycle = cycle(self._eval_clients)
@@ -105,10 +112,14 @@ async def get_eval_client(self) -> vf.ClientConfig:
         return next(self._eval_cycle)
 
     async def wait_for_ready(self, model_name: str, timeout: int | None = None) -> None:
-        await check_health(
-            self._admin_clients, timeout=timeout if timeout is not None else self._wait_for_ready_timeout
+        wait_timeout = timeout if timeout is not None else self._wait_for_ready_timeout
+        await check_health(self._admin_clients, timeout=wait_timeout)
+        await maybe_check_has_model(
+            self._model_clients,
+            model_name,
+            skip_model_check=self._skip_model_check,
+            timeout=wait_timeout,
         )
-        await maybe_check_has_model(self._admin_clients, model_name, skip_model_check=self._skip_model_check)
 
     async def update_weights(self, weight_dir: Path | None, lora_name: str | None = None, step: int = 0) -> None:
         await update_weights(self._admin_clients, weight_dir, lora_name=lora_name, step=step)
@@ -211,14 +222,19 @@ def setup_clients(
     return clients
 
 
-def setup_admin_clients(client_config: ClientConfig) -> list[AsyncClient]:
+def setup_admin_clients(client_config: ClientConfig, *, use_admin_base_url: bool = True) -> list[AsyncClient]:
     """Create dedicated admin clients for weight update operations.
 
     Uses a separate connection pool to avoid queueing behind streaming requests.
-    When admin_base_url is set, uses those URLs instead of base_url, allowing
-    weight updates to bypass routers in disaggregated P/D deployments.
+    When admin_base_url is set and use_admin_base_url is true, uses those URLs
+    instead of base_url, allowing weight updates to bypass routers in
+    disaggregated P/D deployments.
     """
-    urls = client_config.admin_base_url if client_config.admin_base_url else client_config.base_url
+    urls = (
+        client_config.admin_base_url
+        if use_admin_base_url and client_config.admin_base_url
+        else client_config.base_url
+    )
 
     def _setup_admin_client(base_url: str) -> httpx.AsyncClient:
         headers = client_config.headers.copy()  # avoid mutating config
@@ -229,29 +245,62 @@ def _setup_admin_client(base_url: str) -> httpx.AsyncClient:
         # Strip /v1 suffix since admin endpoints are at root level
         base_url = base_url.rstrip("/").removesuffix("/v1")
 
-        return AsyncClient(
+        client = AsyncClient(
             base_url=base_url,
             headers=headers,
             limits=httpx.Limits(max_connections=4, max_keepalive_connections=1),
             timeout=httpx.Timeout(None),
         )
+        setattr(client, ADMIN_BACKEND_ATTR, client_config.admin_backend)
+        return client
 
     return [_setup_admin_client(base_url) for base_url in urls]
 
 
+def get_admin_backend(admin_client: AsyncClient) -> str:
+    return getattr(admin_client, ADMIN_BACKEND_ATTR, "vllm")
+
+
 async def maybe_check_has_model(
-    admin_clients: list[AsyncClient], model_name: str, skip_model_check: bool = False
+    admin_clients: list[AsyncClient],
+    model_name: str,
+    skip_model_check: bool = False,
+    interval: int = 1,
+    log_interval: int = 10,
+    timeout: int = 1800,
 ) -> None:
     if skip_model_check:
         return
     logger = get_logger()
     logger.debug(f"Checking if model {model_name} is in the inference pool")
-    results = await asyncio.gather(*[admin_client.get("/v1/models") for admin_client in admin_clients])
-    for admin_client, result in zip(admin_clients, results):
-        models = result.json()["data"]
-        if not any(model["id"] == model_name for model in models):
-            raise ValueError(f"Model {model_name} was not found in the inference pool on {admin_client.base_url}")
-    logger.debug(f"Model {model_name} was found in the inference pool")
+    wait_time = 0
+    last_error: Exception | None = None
+    while wait_time < timeout:
+        try:
+            results = await asyncio.gather(*[admin_client.get("/v1/models") for admin_client in admin_clients])
+            missing_urls = []
+            for admin_client, result in zip(admin_clients, results):
+                result.raise_for_status()
+                models = result.json()["data"]
+                if not any(model["id"] == model_name for model in models):
+                    missing_urls.append(str(admin_client.base_url))
+            if not missing_urls:
+                logger.debug(f"Model {model_name} was found in the inference pool")
+                return
+            last_error = ValueError(f"Model {model_name} was not found on {', '.join(missing_urls)}")
+        except Exception as e:
+            last_error = e
+
+        if wait_time % log_interval == 0 and wait_time > 0:
+            logger.warning(
+                f"Model {model_name} was not found in the inference pool after {wait_time} seconds "
+                f"(Error: {last_error})"
+            )
+        await asyncio.sleep(interval)
+        wait_time += interval
+
+    msg = f"Model {model_name} was not found in the inference pool after {wait_time} (>{timeout}) seconds."
+    raise TimeoutError(msg) from last_error
 
 
 async def check_health(
@@ -261,10 +310,18 @@ async def check_health(
 
     async def _check_health(admin_client: AsyncClient) -> None:
         wait_time = 0
-        logger.debug("Starting pinging /health to check health")
+        admin_backend = get_admin_backend(admin_client)
+        logger.debug("Starting pinging health endpoint to check health")
         while wait_time < timeout:
             try:
-                await admin_client.get("/health")
+                if admin_backend == "dynamo":
+                    response = await admin_client.post("/engine/liveness", json={})
+                else:
+                    response = await admin_client.get("/health")
+                    if response.status_code == 404:
+                        logger.warning("The route /health does not exist. Skipping health check.")
+                        return
+                response.raise_for_status()
                 logger.debug(f"Inference pool is ready after {wait_time} seconds")
                 return
             except NotFoundError:
@@ -332,15 +389,29 @@ async def update_weights(
     weight_dir_posix = weight_dir.as_posix() if weight_dir is not None else None
 
     if lora_name is not None and weight_dir is not None:
+        dynamo_clients = [client for client in admin_clients if get_admin_backend(client) == "dynamo"]
+        if dynamo_clients:
+            raise ValueError("Dynamo backend does not support prime-rl LoRA adapter weight updates yet.")
         await load_lora_adapter(admin_clients, lora_name, weight_dir)
     else:
+        vllm_clients = [client for client in admin_clients if get_admin_backend(client) == "vllm"]
 
         async def _update_weights(admin_client: AsyncClient, weight_dir: str | None) -> None:
+            if get_admin_backend(admin_client) == "dynamo":
+                response = await admin_client.post("/engine/update_weights", json={"weight_dir": weight_dir})
+                response.raise_for_status()
+                result = response.json()
+                if result.get("status") == "error":
+                    raise RuntimeError(result.get("message", "Dynamo weight update failed"))
+                return
+
             response = await admin_client.post("/update_weights", json={"weight_dir": weight_dir})
             response.raise_for_status()
 
-        # Pause engines so all DP workers drain in-flight work and can join the NCCL broadcast
-        await _pause_engines(admin_clients)
+        # Pause vLLM engines so all DP workers drain in-flight work and can join the NCCL broadcast.
+        # Dynamo's update route pauses/resumes its engine internally.
+        if vllm_clients:
+            await _pause_engines(vllm_clients)
 
         try:
             # Create ready marker before servers enter receive path (used by NCCL broadcast)
@@ -352,7 +423,8 @@ async def _update_weights(admin_client: AsyncClient, weight_dir: str | None) ->
 
             await asyncio.gather(*[_update_weights(admin_client, weight_dir_posix) for admin_client in admin_clients])
         finally:
-            await _resume_engines(admin_clients)
+            if vllm_clients:
+                await _resume_engines(vllm_clients)
 
 
 def _is_retryable_lora_error(exception: BaseException) -> bool:
@@ -452,6 +524,24 @@ async def init_nccl_broadcast(
     )
 
     async def _init_nccl_broadcast(admin_client: AsyncClient, rank_offset: int) -> None:
+        if get_admin_backend(admin_client) == "dynamo":
+            response = await admin_client.post(
+                "/engine/init_broadcaster",
+                json={
+                    "host": host,
+                    "port": port,
+                    "rank_offset": rank_offset,
+                    "inference_world_size": inference_world_size,
+                    "timeout": timeout,
+                    "quantize_in_weight_transfer": quantize_in_weight_transfer,
+                },
+            )
+            response.raise_for_status()
+            result = response.json()
+            if result.get("status") == "error":
+                raise RuntimeError(result.get("message", "Dynamo NCCL initialization failed"))
+            return
+
         try:
             response = await admin_client.post(
                 "/init_broadcaster",
diff --git a/tests/unit/test_configs.py b/tests/unit/test_configs.py
index ffcc18f270..30efe20cd9 100644
--- a/tests/unit/test_configs.py
+++ b/tests/unit/test_configs.py
@@ -159,3 +159,50 @@ def test_removed_fused_lm_head_chunk_size_field_is_rejected():
 def test_selective_activation_checkpointing_requires_custom_impl():
     with pytest.raises(ValidationError, match="Selective activation checkpointing requires model.impl='custom'"):
         TrainerModelConfig.model_validate({"impl": "hf", "ac": {"mode": "selective"}})
+
+
+def test_inference_config_translates_dynamo_args():
+    config = InferenceConfig.model_validate(
+        {
+            "backend": "dynamo",
+            "server": {"host": "127.0.0.1", "port": 8123},
+            "model": {"name": "Qwen/Qwen3-4B", "dtype": "bfloat16", "max_model_len": 2048},
+            "parallel": {"tp": 2, "dp": 1},
+            "dynamo": {
+                "system_port": 9001,
+                "discovery_backend": "file",
+                "router_mode": "least-loaded",
+                "min_initial_workers": 1,
+                "worker_extra": {"block_size": 64},
+            },
+            "vllm_extra": {"max_num_seqs": 32},
+        }
+    )
+
+    frontend = config.to_dynamo_frontend()
+    worker = config.to_dynamo_vllm()
+
+    assert frontend.http_host == "127.0.0.1"
+    assert frontend.http_port == 8123
+    assert frontend.namespace == "dynamo"
+    assert frontend.discovery_backend == "file"
+    assert frontend.router_mode == "least-loaded"
+    assert frontend.min_initial_workers == 1
+    assert frontend.dyn_chat_processor == "vllm"
+
+    assert worker.model == "Qwen/Qwen3-4B"
+    assert worker.dtype == "bfloat16"
+    assert worker.max_model_len == 2048
+    assert worker.tensor_parallel_size == 2
+    assert worker.block_size == 64
+    assert worker.max_num_seqs == 32
+    assert worker.logprobs_mode == "processed_logprobs"
+
+
+def test_rl_config_auto_selects_openai_client_for_dynamo():
+    config = RLConfig.model_validate({"trainer": {}, "orchestrator": {}, "inference": {"backend": "dynamo"}})
+
+    assert config.orchestrator.use_token_client is False
+    assert config.orchestrator.client.admin_backend == "dynamo"
+    assert config.orchestrator.client.admin_base_url == ["http://localhost:8081"]
+    assert config.orchestrator.client.skip_model_check is False
diff --git a/uv.lock b/uv.lock
index 002a9313d6..b254a36436 100644
--- a/uv.lock
+++ b/uv.lock
@@ -11,37 +11,39 @@ supported-markers = [
 ]
 
 [options]
-exclude-newer = "2026-05-03T13:45:58.065909043Z"
+exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
 exclude-newer-span = "P7D"
 
 [options.exclude-newer-package]
-vllm = false
 verifiers = false
-vllm-router = false
 dion = false
 alphabet-sort = false
 science-env = false
-color-codeword = false
-nixl-cu12 = false
-flash-attn-3 = false
-prime-tunnel = false
-prime = false
-deep-gemm = false
-aime2024 = false
 prime-evals = false
 deepdive = false
 reverse-text = false
+ai-dynamo-runtime = false
 code-env = false
 mini-swe-agent-plus = false
 deep-ep = false
 pydantic-config = false
-math-env = false
-logic-env = false
+ai-dynamo = false
 wiki-search = false
 math-python = false
 math500 = false
 aime2025 = false
+vllm = false
+vllm-router = false
+color-codeword = false
+nixl-cu12 = false
+flash-attn-3 = false
+prime-tunnel = false
+deep-gemm = false
+aime2024 = false
+math-env = false
 prime-sandboxes = false
+logic-env = false
+prime = false
 
 [manifest]
 members = [
@@ -63,6 +65,36 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl", hash = "sha256:eeecf07f0c2a93ace0772c92e596ace6d3d3996c042b2128459aaae2a76de11d", size = 135811, upload-time = "2025-07-03T09:31:42.253Z" },
 ]
 
+[[package]]
+name = "ai-dynamo"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ai-dynamo-runtime", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "kubernetes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "msgpack", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "msgspec", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "prometheus-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyzmq", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "transformers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/7d/b99564ff88262e8ba377bb592c17ba84db9ae1b4993cbdeb532484dc034e/ai_dynamo-1.1.1-py3-none-any.whl", hash = "sha256:fd6a811a6d5ef36537a5f32af88dda722c0cf6b7c4f3e31a7671e1e59dc36777", size = 1762603, upload-time = "2026-05-09T02:43:51.382Z" },
+]
+
+[[package]]
+name = "ai-dynamo-runtime"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvloop", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0a/e2/47cc654088fc4b17f53fbf147020ca497afb9fdd3955997d667c285ba1bb/ai_dynamo_runtime-1.1.1-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2c96c93195c87be5377f42e99151536cde9629bf7e8d5af8574d30e9fbcb8bb3", size = 34753711, upload-time = "2026-05-09T02:41:45.993Z" },
+    { url = "https://files.pythonhosted.org/packages/00/68/acfdb9d6b0b9a4d25f991bae781992893b845fcd0981907189fbb27aa81b/ai_dynamo_runtime-1.1.1-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a7378079c81f2d81ca3363345111f04d9292aa158271b23e0236670f501e2b1d", size = 35042939, upload-time = "2026-05-09T02:40:41.554Z" },
+]
+
 [[package]]
 name = "aime2024"
 version = "0.1.18"
@@ -1137,6 +1169,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/01/61/d4b89fec821f72385526e1b9d9a3a0385dda4a72b206d28049e2c7cd39b8/gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77", size = 208168, upload-time = "2025-07-24T03:45:52.517Z" },
 ]
 
+[[package]]
+name = "google-auth"
+version = "2.50.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyasn1-modules", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5f/18/238d7021d151bdab868f23433817b027dd759135202f4dfce0670d1230ca/google_auth-2.50.0.tar.gz", hash = "sha256:f35eafb191195328e8ce10a7883970877e7aeb49c2bfaa54aa0e394316d353d0", size = 336523, upload-time = "2026-04-30T21:19:29.659Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/37/cf/4880c2137c14280b2f59975cdf12cc442bc0ae1f9ea473a26eaa0c146786/google_auth-2.50.0-py3-none-any.whl", hash = "sha256:04382175e28b94f49694977f0a792688b59a668def1499e9d8de996dc9ce5b15", size = 246495, upload-time = "2026-04-30T21:19:27.664Z" },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.72.0"
@@ -1588,11 +1633,13 @@ wheels = [
 
 [[package]]
 name = "kubernetes"
-version = "35.0.0"
+version = "32.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "durationpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "google-auth", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "oauthlib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "python-dateutil", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -1601,9 +1648,9 @@ dependencies = [
     { name = "urllib3", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "websocket-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2c/8f/85bf51ad4150f64e8c665daf0d9dfe9787ae92005efb9a4d1cba592bd79d/kubernetes-35.0.0.tar.gz", hash = "sha256:3d00d344944239821458b9efd484d6df9f011da367ecb155dadf9513f05f09ee", size = 1094642, upload-time = "2026-01-16T01:05:27.76Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/0598f0e8b4af37cd9b10d8b87386cf3173cb8045d834ab5f6ec347a758b3/kubernetes-32.0.1.tar.gz", hash = "sha256:42f43d49abd437ada79a79a16bd48a604d3471a117a8347e87db693f2ba0ba28", size = 946691, upload-time = "2025-02-18T21:06:34.148Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0c/70/05b685ea2dffcb2adbf3cdcea5d8865b7bc66f67249084cf845012a0ff13/kubernetes-35.0.0-py2.py3-none-any.whl", hash = "sha256:39e2b33b46e5834ef6c3985ebfe2047ab39135d41de51ce7641a7ca5b372a13d", size = 2017602, upload-time = "2026-01-16T01:05:25.991Z" },
+    { url = "https://files.pythonhosted.org/packages/08/10/9f8af3e6f569685ce3af7faab51c8dd9d93b9c38eba339ca31c746119447/kubernetes-32.0.1-py2.py3-none-any.whl", hash = "sha256:35282ab8493b938b08ab5526c7ce66588232df00ef5e1dbe88a419107dc10998", size = 1988070, upload-time = "2025-02-18T21:06:31.391Z" },
 ]
 
 [[package]]
@@ -2802,6 +2849,10 @@ disagg = [
     { name = "nixl-cu12", version = "0.10.1", source = { url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/nixl_cu12-0.10.1-cp312-cp312-linux_x86_64.whl" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "vllm-router", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
 ]
+dynamo = [
+    { name = "ai-dynamo", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "blake3", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
 envs = [
     { name = "aime2024", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "aime2025", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -2848,11 +2899,13 @@ mamba-ssm = [
 
 [package.metadata]
 requires-dist = [
+    { name = "ai-dynamo", marker = "extra == 'dynamo'", specifier = "==1.1.1" },
     { name = "aime2024", marker = "extra == 'envs'", index = "https://hub.primeintellect.ai/primeintellect/simple/" },
     { name = "aime2025", marker = "extra == 'envs'", index = "https://hub.primeintellect.ai/primeintellect/simple/" },
     { name = "aiolimiter", specifier = ">=1.2.1" },
     { name = "alphabet-sort", marker = "extra == 'envs'", index = "https://hub.primeintellect.ai/primeintellect/simple/" },
     { name = "beartype", specifier = ">=0.21.0" },
+    { name = "blake3", marker = "extra == 'dynamo'", specifier = ">=1.0.0,<2.0.0" },
     { name = "code-env", marker = "extra == 'envs'", index = "https://hub.primeintellect.ai/primeintellect/simple/" },
     { name = "color-codeword", marker = "extra == 'envs'", index = "https://hub.primeintellect.ai/primeintellect/simple/" },
     { name = "datasets", specifier = ">=4.0.0" },
@@ -2910,7 +2963,7 @@ requires-dist = [
     { name = "wandb", specifier = ">=0.26.1" },
     { name = "wiki-search", marker = "extra == 'envs'", index = "https://hub.primeintellect.ai/primeintellect/simple/" },
 ]
-provides-extras = ["flash-attn", "flash-attn-3", "flash-attn-cute", "envs", "disagg", "gpt-oss", "quack", "all"]
+provides-extras = ["flash-attn", "flash-attn-3", "flash-attn-cute", "envs", "disagg", "gpt-oss", "quack", "dynamo", "all"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -2974,11 +3027,11 @@ wheels = [
 
 [[package]]
 name = "prometheus-client"
-version = "0.22.1"
+version = "0.25.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/cf/40dde0a2be27cc1eb41e333d1a674a74ce8b8b0457269cc640fd42b07cf7/prometheus_client-0.22.1.tar.gz", hash = "sha256:190f1331e783cf21eb60bca559354e0a4d4378facecf78f5428c39b675d20d28", size = 69746, upload-time = "2025-06-02T14:29:01.152Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/fb/d9aa83ffe43ce1f19e557c0971d04b90561b0cfd50762aafb01968285553/prometheus_client-0.25.0.tar.gz", hash = "sha256:5e373b75c31afb3c86f1a52fa1ad470c9aace18082d39ec0d2f918d11cc9ba28", size = 86035, upload-time = "2026-04-09T19:53:42.359Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/32/ae/ec06af4fe3ee72d16973474f122541746196aaa16cea6f66d18b963c6177/prometheus_client-0.22.1-py3-none-any.whl", hash = "sha256:cca895342e308174341b2cbf99a56bef291fbc0ef7b9e5412a0f26d653ba7094", size = 58694, upload-time = "2025-06-02T14:29:00.068Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/9b/d4b1e644385499c8346fa9b622a3f030dce14cd6ef8a1871c221a17a67e7/prometheus_client-0.25.0-py3-none-any.whl", hash = "sha256:d5aec89e349a6ec230805d0df882f3807f74fd6c1a2fa86864e3c2279059fed1", size = 64154, upload-time = "2026-04-09T19:53:41.324Z" },
 ]
 
 [[package]]
@@ -3079,6 +3132,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" },
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyasn1", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
+]
+
 [[package]]
 name = "pybase64"
 version = "1.4.2"