diff --git a/deps/renderers b/deps/renderers index 8704f9d502..3ae276c446 160000 --- a/deps/renderers +++ b/deps/renderers @@ -1 +1 @@ -Subproject commit 8704f9d50252692a4a677177eb98d274f8d3ac5d +Subproject commit 3ae276c44683f8b11115b0c9f365abbb4beb850c diff --git a/deps/verifiers b/deps/verifiers index 58b119fa1b..521d436c55 160000 --- a/deps/verifiers +++ b/deps/verifiers @@ -1 +1 @@ -Subproject commit 58b119fa1b24eff85b74a75ccf3e132523b3c6c3 +Subproject commit 521d436c551b9a706cd3bcebd7200ae7e8907abc diff --git a/packages/prime-rl-configs/src/prime_rl/configs/rl.py b/packages/prime-rl-configs/src/prime_rl/configs/rl.py index 7a5809c1c6..45f930f8f7 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/rl.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/rl.py @@ -484,6 +484,19 @@ def auto_setup_router_replay(self): ) return self + @model_validator(mode="after") + def validate_router_replay_without_kv_offload(self): + if ( + self.trainer.enable_router_replay + and self.inference is not None + and self.inference.kv_cache_offload is not None + ): + raise ValueError( + "Router replay with inference.kv_cache_offload is not supported. " + "External KV cache hits do not carry routed-expert decisions." + ) + return self + @model_validator(mode="after") def auto_setup_deployment(self): if self.deployment.type == "single_node": # single-node diff --git a/pyproject.toml b/pyproject.toml index b8c1500971..6a4bae2629 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "tilelang>=0.1.8", "flash-linear-attention", "nvidia-ml-py>=12.575.51", + "pybase64>=1.4.2", ] [project.scripts] @@ -178,6 +179,7 @@ override-dependencies = [ # we want latest vllm, remove next patch vllm = false tokenspeed-mla = false +fastokens = false flash_attn_3 = false # PrimeIntellect-published on PyPI (trusted publisher) prime = false @@ -230,9 +232,9 @@ dion = { git = "https://github.com/samsja/dion.git", rev = "d891eeb" } transformers = { git = "https://github.com/huggingface/transformers.git", rev = "c1c3424" } flash-attn-4 = { git = "https://github.com/Dao-AILab/flash-attention.git", subdirectory = "flash_attn/cute", rev = "96bd151" } prime-pydantic-config = { workspace = true } -vllm-router = { url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.22/vllm_router-0.1.22-cp38-abi3-manylinux_2_28_x86_64.whl" } +vllm-router = { url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.25/vllm_router-0.1.25-cp38-abi3-manylinux_2_28_x86_64.whl" } vllm = [ - { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_x86_64.whl", marker = "platform_machine == 'x86_64'" }, + { url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/vllm-0.21.0+cu129.r42434.pr39568.a106aa6-cp38-abi3-manylinux_2_24_x86_64.whl", marker = "platform_machine == 'x86_64'" }, { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_aarch64.whl", marker = "platform_machine == 'aarch64'" }, ] deep-ep = { url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/deep_ep-1.2.1+29d31c0-cp312-cp312-linux_x86_64.whl" } diff --git a/src/prime_rl/inference/patches.py b/src/prime_rl/inference/patches.py index e8a4e8c4bf..35b2d1d26c 100644 --- a/src/prime_rl/inference/patches.py +++ b/src/prime_rl/inference/patches.py @@ -21,6 +21,51 @@ def transformers_v5_compat(): monkey_patch_dp_engine_core_pause_resume_deadlock() monkey_patch_vllm_layerwise_reload_alias_buffers() monkey_patch_vllm_padded_input_scrub() + monkey_patch_return_routed_experts_with_nixl_connector() + + +def monkey_patch_return_routed_experts_with_nixl_connector(): + from vllm import envs + from vllm.config.vllm import VllmConfig + from vllm.logger import init_logger + + logger = init_logger(__name__) + original_post_init = VllmConfig.__post_init__ + + if getattr(original_post_init, "_prime_rl_allows_nixl_routed_experts", False): + return + + def _is_nixl_routed_experts_pd_config(config: VllmConfig) -> bool: + kv_transfer_config = config.kv_transfer_config + return ( + config.model_config is not None + and config.model_config.enable_return_routed_experts + and kv_transfer_config is not None + and kv_transfer_config.kv_connector == "NixlConnector" + and kv_transfer_config.is_kv_transfer_instance + ) + + def _post_init(config: VllmConfig): + if not _is_nixl_routed_experts_pd_config(config): + return original_post_init(config) + + if config.parallel_config.pipeline_parallel_size > 1: + raise ValueError("--enable-return-routed-experts is incompatible with pipeline parallelism (PP > 1).") + if envs.VLLM_USE_V2_MODEL_RUNNER: + raise ValueError("VLLM_USE_V2_MODEL_RUNNER does not yet support: routed experts capture") + + # vLLM rejects every KV connector, but our P/D path uses NIXL and + # stitches prefill/decode routed experts in the router. CPU KV offload + # remains rejected by prime-rl config validation. + config.model_config.enable_return_routed_experts = False + try: + return original_post_init(config) + finally: + config.model_config.enable_return_routed_experts = True + + _post_init._prime_rl_allows_nixl_routed_experts = True + VllmConfig.__post_init__ = _post_init + logger.warning("Enabled vLLM routed-experts capture with NIXL connector patch.") def monkey_patch_vllm_layerwise_reload_alias_buffers(): diff --git a/src/prime_rl/inference/vllm/routed_experts.py b/src/prime_rl/inference/vllm/routed_experts.py new file mode 100644 index 0000000000..cad97e8574 --- /dev/null +++ b/src/prime_rl/inference/vllm/routed_experts.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import Any + +import numpy as np +import pybase64 +from vllm.outputs import RequestOutput + + +def serialize_routed_experts(routed_experts: Any) -> dict[str, Any] | None: + if routed_experts is None: + return None + + array = np.asarray(routed_experts) + assert array.ndim == 3 + assert np.issubdtype(array.dtype, np.integer) + if array.size: + assert array.min() >= 0 + assert array.max() <= np.iinfo(np.uint8).max + + compact = np.ascontiguousarray(array.astype(np.uint8, copy=False)) + return { + "data": pybase64.b64encode(memoryview(compact)).decode("ascii"), + "shape": list(compact.shape), + } + + +class RoutedExpertsCapture: + def __init__(self, generator: AsyncIterator[RequestOutput]): + self._generator = generator + self.routed_experts: dict[int, dict[str, Any]] = {} + + async def __aiter__(self): + async for request_output in self._generator: + for output in request_output.outputs: + encoded = serialize_routed_experts(getattr(output, "routed_experts", None)) + if encoded is not None: + self.routed_experts[output.index] = encoded + yield request_output diff --git a/src/prime_rl/inference/vllm/serving_tokens.py b/src/prime_rl/inference/vllm/serving_tokens.py index 932ebeaa60..afaabef0e6 100644 --- a/src/prime_rl/inference/vllm/serving_tokens.py +++ b/src/prime_rl/inference/vllm/serving_tokens.py @@ -10,9 +10,9 @@ header and forwarded to ``engine_client.generate``. The DP-replicated inference servers prime-RL runs need this to target a specific replica. -2. ``routed_experts`` per-token export — when the engine emits routing - decisions (``enable_return_routed_experts``), surface them on each choice. - This is what the trainer's router-replay path consumes. +2. Compact ``routed_experts`` export — when the engine emits routing + decisions, surface them as base64 raw-byte payloads without requiring a vLLM + source fork. 3. Server-side ``max_tokens`` defaulting — ``ServingTokens`` hands the client-supplied ``SamplingParams`` to the engine verbatim, and @@ -30,13 +30,11 @@ from __future__ import annotations -import base64 from collections.abc import AsyncGenerator from functools import cached_property +from typing import Any -import numpy as np from fastapi import Request -from pydantic import Field from vllm.entrypoints.openai.engine.protocol import ErrorResponse, RequestResponseMetadata from vllm.entrypoints.serve.disagg.protocol import ( GenerateRequest, @@ -48,64 +46,29 @@ from vllm.outputs import RequestOutput from vllm.sampling_params import RequestOutputKind, SamplingParams +from prime_rl.inference.vllm.routed_experts import RoutedExpertsCapture + class PrimeRlGenerateResponseChoice(GenerateResponseChoice): - routed_experts: dict | None = Field( - default=None, - description=( - "Per-token expert routing decisions (base85-encoded int32 array + shape). " - "Populated only when the engine was launched with " - "``enable_return_routed_experts=True``; otherwise ``None``." - ), - ) + routed_experts: dict[str, Any] | None = None class PrimeRlGenerateResponse(GenerateResponse): choices: list[PrimeRlGenerateResponseChoice] -def encode_routed_experts(arr: np.ndarray) -> dict: - return { - "data": base64.b85encode(arr.tobytes()).decode("ascii"), - "shape": list(arr.shape), - } - - -class _RoutedExpertsCaptureBase: - """Wraps the engine result generator and accumulates a - ``{output_index: encoded_experts}`` map as outputs stream. Subclasses - implement ``post_process`` to fold the captured map into the response - in whatever shape the endpoint returns (in-place vs rebuilt).""" - - def __init__(self, generator: AsyncGenerator[RequestOutput, None]): - self._generator = generator - self.routed_experts: dict[int, dict] = {} - - async def __aiter__(self): - async for request_output in self._generator: - for output in request_output.outputs: - if output.routed_experts is not None: - self.routed_experts[output.index] = encode_routed_experts(output.routed_experts) - yield request_output - - -class _RoutedExpertsCapture(_RoutedExpertsCaptureBase): - """Generate-endpoint variant: rebuilds the response with - ``PrimeRlGenerateResponseChoice`` because upstream's - ``GenerateResponseChoice`` isn't ``extra='allow'``, so an attribute - set after construction wouldn't survive serialization.""" - +class _GenerateRoutedExpertsCapture(RoutedExpertsCapture): def post_process(self, response: GenerateResponse) -> PrimeRlGenerateResponse: - new_choices = [ + choices = [ PrimeRlGenerateResponseChoice( - **choice.model_dump(), + **choice.model_dump(exclude={"routed_experts"}), routed_experts=self.routed_experts.get(choice.index), ) for choice in response.choices ] return PrimeRlGenerateResponse( request_id=response.request_id, - choices=new_choices, + choices=choices, prompt_logprobs=response.prompt_logprobs, kv_transfer_params=response.kv_transfer_params, ) @@ -135,7 +98,7 @@ async def _client_set_max_tokens(raw_request: Request | None) -> bool: class PrimeRlServingTokens(ServingTokens): - """ServingTokens + DP-rank routing + routed_experts export + max_tokens defaulting.""" + """ServingTokens + DP-rank routing + compact routed experts + max_tokens defaulting.""" @cached_property def _max_tokens_defaults(self) -> tuple[dict, int | None]: @@ -298,15 +261,13 @@ async def serve_tokens_full_generator( # type: ignore[override] model_name: str, request_metadata: RequestResponseMetadata, ) -> ErrorResponse | GenerateResponse: - # Wrap the result generator to capture routed_experts as it streams, - # defer the rest to upstream, then post-process the response into our - # PrimeRlGenerateResponse subclass so the encoded experts surface in - # the JSON. Skipping the wrapper when the engine isn't producing routed - # experts keeps us a no-op subclass on the common path. - capture: _RoutedExpertsCapture | None = None + # Capture routed_experts as vLLM streams request outputs, then post-process + # the final response into our GenerateResponse subclass so the encoded + # experts surface in the JSON. + capture: _GenerateRoutedExpertsCapture | None = None if self.model_config.enable_return_routed_experts: - capture = _RoutedExpertsCapture(result_generator) - result_generator = capture # type: ignore[assignment] + capture = _GenerateRoutedExpertsCapture(result_generator) + result_generator = capture response = await super().serve_tokens_full_generator( request, result_generator, request_id, model_name, request_metadata diff --git a/src/prime_rl/orchestrator/trajectories.py b/src/prime_rl/orchestrator/trajectories.py index a1e0ff9001..5e693b0a76 100644 --- a/src/prime_rl/orchestrator/trajectories.py +++ b/src/prime_rl/orchestrator/trajectories.py @@ -3,11 +3,13 @@ from pathlib import Path from typing import Any +import numpy as np +import pybase64 import torch import verifiers as vf from transformers.tokenization_utils import PreTrainedTokenizer -from prime_rl.transport import TrainingSample +from prime_rl.transport import RoutedExperts, TrainingSample from prime_rl.utils.chat_template import ( common_prefix_len, deserialize_tool_calls, @@ -21,25 +23,26 @@ # primitives are immutable. mm_kwargs payloads are not mutated after creation. -def _align_routed_experts( - routed_experts: list[list[list[int]]] | None, +def align_routed_experts( + routed_experts: np.ndarray | None, expected_len: int, -) -> list[list[list[int]]] | None: +) -> np.ndarray | None: """Align routed_experts length with the expected token count. VLLM's capturer uses `num_tokens - 1` slot mappings because the final generated token was never fed as input to a forward pass and has no routing decision. Append zero-filled entries for the missing positions. """ - if routed_experts is None or not routed_experts: + if routed_experts is None: return routed_experts - deficit = expected_len - len(routed_experts) + assert routed_experts.ndim == 3 + if routed_experts.shape[0] > expected_len: + return np.ascontiguousarray(routed_experts[:expected_len]) + deficit = expected_len - routed_experts.shape[0] if deficit <= 0: return routed_experts - num_layers = len(routed_experts[0]) - topk = len(routed_experts[0][0]) - zero_entry = [[0] * topk for _ in range(num_layers)] - return routed_experts + [zero_entry for _ in range(deficit)] + padding = np.zeros((deficit, routed_experts.shape[1], routed_experts.shape[2]), dtype=routed_experts.dtype) + return np.concatenate((routed_experts, padding), axis=0) def _common_prefix_len(a: list[int], b: list[int]) -> int: @@ -236,13 +239,21 @@ def interleave_rollout( def prepare_step_tokens(step: vf.TrajectoryStep, step_idx: int) -> dict[str, Any] | None: tokens = step["tokens"] if tokens is not None: + routed_experts_payload = tokens.get("routed_experts") + routed_experts = None + if routed_experts_payload is not None: + decoded_routed_experts = pybase64.b64decode_as_bytearray(routed_experts_payload["data"]) + routed_experts = np.frombuffer(decoded_routed_experts, dtype=np.uint8).reshape( + routed_experts_payload["shape"] + ) + return { "prompt_ids": list(tokens["prompt_ids"]), "prompt_mask": [bool(i) for i in tokens["prompt_mask"]], "completion_ids": list(tokens["completion_ids"]), "completion_mask": [bool(i) for i in tokens["completion_mask"]], "completion_logprobs": list(tokens["completion_logprobs"]), - "routed_experts": tokens.get("routed_experts"), + "routed_experts": routed_experts, # Renderer-emitted multimodal sidecar (placeholders + per-item # processed tensors). Populated when the rollout went through # a multimodal-aware renderer (e.g. Qwen3VLRenderer); absent @@ -268,12 +279,21 @@ def make_sample(tokens: dict[str, Any]) -> TrainingSample: completion_mask = [bool(i) for i in tokens["completion_mask"]] completion_ids = list(tokens["completion_ids"]) - routed_experts = _align_routed_experts( + routed_experts = align_routed_experts( tokens.get("routed_experts"), len(tokens["prompt_ids"]) + len(tokens["completion_ids"]), ) + packed_routed_experts = None + if routed_experts is not None: + routed_experts = np.ascontiguousarray(routed_experts) + packed_routed_experts = RoutedExperts( + data=routed_experts.tobytes(), + shape=list(routed_experts.shape), + dtype=str(routed_experts.dtype), + ) + prompt_ids = list(tokens["prompt_ids"]) - return TrainingSample( + sample = TrainingSample( prompt_ids=prompt_ids, prompt_mask=[bool(i) for i in tokens["prompt_mask"]], completion_ids=completion_ids, @@ -283,11 +303,17 @@ def make_sample(tokens: dict[str, Any]) -> TrainingSample: teacher_logprobs=None, advantage=None, env_name=output["env_name"], - routed_experts=routed_experts, mm_token_type_ids=None, + routed_experts=packed_routed_experts, ) - - def extend_sample(sample: TrainingSample, prefix_len: int, step_idx: int) -> None: + return sample, routed_experts + + def extend_sample( + sample: TrainingSample, + sample_routed_experts: np.ndarray | None, + prefix_len: int, + step_idx: int, + ) -> np.ndarray | None: """Extend an existing sample with a new trajectory step (extension property holds).""" tokens = prepared_steps[step_idx] @@ -308,26 +334,35 @@ def extend_sample(sample: TrainingSample, prefix_len: int, step_idx: int) -> Non sample.completion_logprobs.extend(tokens["completion_logprobs"]) sample.completion_temperatures.extend([temperature] * len(completion_ids)) - if tokens.get("routed_experts") is not None and sample.routed_experts is not None: + if tokens.get("routed_experts") is not None and sample_routed_experts is not None: step_routed = tokens["routed_experts"] - # The previous step's last routing entry was zero-padded by _align_routed_experts + # The previous step's last routing entry was zero-padded by align_routed_experts # (vLLM only captures num_tokens-1 routings per request). This step actually # processed that boundary token as part of its prompt, so replace the zero-fill # with the real routing decision before appending new entries. - if prefix_len > 0 and prefix_len <= len(step_routed): - sample.routed_experts[prefix_len - 1] = step_routed[prefix_len - 1] - sample.routed_experts.extend(step_routed[prefix_len:]) + if prefix_len > 0 and prefix_len <= step_routed.shape[0]: + sample_routed_experts[prefix_len - 1] = step_routed[prefix_len - 1] + sample_routed_experts = np.concatenate((sample_routed_experts, step_routed[prefix_len:]), axis=0) expected_len = len(sample.prompt_ids) + len(sample.completion_ids) - sample.routed_experts = _align_routed_experts(sample.routed_experts, expected_len) + sample_routed_experts = align_routed_experts(sample_routed_experts, expected_len) + sample_routed_experts = np.ascontiguousarray(sample_routed_experts) + packed_routed_experts = RoutedExperts( + data=sample_routed_experts.tobytes(), + shape=list(sample_routed_experts.shape), + dtype=str(sample_routed_experts.dtype), + ) + sample.routed_experts = packed_routed_experts + return sample_routed_experts # Track (prefix_tokens, sample, step_indices) per active sample. step_indices # is the explicit list of prepared_steps positions merged into this sample — # non-contiguous when other agents' steps interleave. - active_samples: list[tuple[list[int], TrainingSample, list[int]]] = [] + active_samples: list[tuple[list[int], TrainingSample, list[int], np.ndarray | None]] = [] first_tokens = prepared_steps[0] first_prefix = first_tokens["prompt_ids"] + first_tokens["completion_ids"] - active_samples.append((first_prefix, make_sample(first_tokens), [0])) + first_sample, first_routed_experts = make_sample(first_tokens) + active_samples.append((first_prefix, first_sample, [0], first_routed_experts)) for step_idx, _step in enumerate(trajectory[1:], start=1): tokens = prepared_steps[step_idx] @@ -335,19 +370,20 @@ def extend_sample(sample: TrainingSample, prefix_len: int, step_idx: int) -> Non # Check if this step extends ANY active prefix matched_idx = None - for idx, (prefix_tokens, _, _) in enumerate(active_samples): + for idx, (prefix_tokens, _, _, _) in enumerate(active_samples): if step_prompt_ids[: len(prefix_tokens)] == prefix_tokens: matched_idx = idx break if matched_idx is not None: # Extension holds - merge into matched sample - prefix_tokens, sample, step_indices = active_samples[matched_idx] - extend_sample(sample, len(prefix_tokens), step_idx=step_idx) + prefix_tokens, sample, step_indices, sample_routed_experts = active_samples[matched_idx] + sample_routed_experts = extend_sample(sample, sample_routed_experts, len(prefix_tokens), step_idx=step_idx) active_samples[matched_idx] = ( tokens["prompt_ids"] + tokens["completion_ids"], sample, step_indices + [step_idx], + sample_routed_experts, ) else: # No prefix matches - start a new sample @@ -356,7 +392,8 @@ def extend_sample(sample: TrainingSample, prefix_len: int, step_idx: int) -> Non f"Starting new sample (active_prefixes={len(active_samples)}, step_prompt_len={len(step_prompt_ids)})." ) new_prefix = tokens["prompt_ids"] + tokens["completion_ids"] - active_samples.append((new_prefix, make_sample(tokens), [step_idx])) + sample, routed_experts = make_sample(tokens) + active_samples.append((new_prefix, sample, [step_idx], routed_experts)) # Attach images by concatenating mm_items across every step the # sample covers. verifiers' ``state_to_output`` ships per-step @@ -365,7 +402,7 @@ def extend_sample(sample: TrainingSample, prefix_len: int, step_idx: int) -> Non # reading the last step alone would miss every earlier-turn image. # Concat in step order recovers the per-sample cumulative set; # deduping again here would drop legitimate duplicate placeholders. - for _, sample, step_indices in active_samples: + for _, sample, step_indices, _ in active_samples: renderer_mm = _union_step_mm_data(prepared_steps, step_indices) if renderer_mm is not None: mm_kwargs = _pack_mm_kwargs_from_renderer(renderer_mm) @@ -380,7 +417,7 @@ def extend_sample(sample: TrainingSample, prefix_len: int, step_idx: int) -> Non for token_id in sample.prompt_ids + sample.completion_ids ] - return [sample for _, sample, _ in active_samples] + return [sample for _, sample, _, _ in active_samples] def _union_step_mm_data( diff --git a/src/prime_rl/trainer/batch.py b/src/prime_rl/trainer/batch.py index 9db4aefd74..ea99859a35 100644 --- a/src/prime_rl/trainer/batch.py +++ b/src/prime_rl/trainer/batch.py @@ -1,6 +1,52 @@ import copy -from prime_rl.transport.types import MicroBatch, TrainingSample +from prime_rl.transport.types import MicroBatch, RoutedExperts, TrainingSample + +ROUTED_EXPERTS_DTYPE_ITEMSIZE = { + "uint8": 1, + "int16": 2, + "int32": 4, +} + + +def _copy_routed_experts(routed_experts: RoutedExperts) -> RoutedExperts: + return RoutedExperts( + data=routed_experts.data, + shape=list(routed_experts.shape), + dtype=routed_experts.dtype, + ) + + +def _routed_experts_row_size(routed_experts: RoutedExperts) -> int: + return routed_experts.shape[1] * routed_experts.shape[2] * ROUTED_EXPERTS_DTYPE_ITEMSIZE[routed_experts.dtype] + + +def _slice_routed_experts(routed_experts: RoutedExperts, seq_len: int) -> RoutedExperts: + row_size = _routed_experts_row_size(routed_experts) + return RoutedExperts( + data=routed_experts.data[: seq_len * row_size], + shape=[seq_len, routed_experts.shape[1], routed_experts.shape[2]], + dtype=routed_experts.dtype, + ) + + +def _append_routed_experts(dst: MicroBatch, src: MicroBatch) -> None: + dst_routed = dst.routed_experts + src_routed = src.routed_experts + assert dst_routed is not None + assert src_routed is not None + assert dst_routed.dtype == src_routed.dtype + assert dst_routed.shape[1:] == src_routed.shape[1:] + dst_routed.data += src_routed.data + dst_routed.shape[0] += src_routed.shape[0] + + +def _pad_routed_experts(micro_batch: MicroBatch, padding_size: int) -> None: + routed_experts = micro_batch.routed_experts + assert routed_experts is not None + row_size = _routed_experts_row_size(routed_experts) + routed_experts.data += b"\0" * (padding_size * row_size) + routed_experts.shape[0] += padding_size def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch: @@ -27,7 +73,9 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch # Teacher logprobs already cover the full sequence (prompt + completion), # computed via prefill in the orchestrator when a teacher model is configured teacher_logprobs = training_example.teacher_logprobs - routed_experts = training_example.routed_experts + routed_experts = ( + _copy_routed_experts(training_example.routed_experts) if training_example.routed_experts is not None else None + ) if len(input_ids) > seq_len: input_ids = input_ids[:seq_len] @@ -40,7 +88,7 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch if teacher_logprobs is not None: teacher_logprobs = teacher_logprobs[:seq_len] if routed_experts is not None: - routed_experts = routed_experts[:seq_len] + routed_experts = _slice_routed_experts(routed_experts, seq_len) if mm_token_type_ids is not None: mm_token_type_ids = mm_token_type_ids[:seq_len] env_names = env_names[:seq_len] @@ -60,9 +108,10 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch assert len(teacher_logprobs) == len(input_ids), f"teacher_logprobs: {len(teacher_logprobs)}" if routed_experts is not None: - assert len(routed_experts) == len(input_ids), ( - f"routed_experts: {len(routed_experts)}, input_ids: {len(input_ids)}" + assert routed_experts.shape[0] == len(input_ids), ( + f"routed_experts: {routed_experts.shape}, input_ids: {len(input_ids)}" ) + assert len(routed_experts.data) == len(input_ids) * _routed_experts_row_size(routed_experts) if mm_token_type_ids is not None: assert len(mm_token_type_ids) == len(input_ids), ( @@ -143,10 +192,12 @@ def packed_samples_into_micro_bs( if bin_content.teacher_logprobs is None: bin_content.teacher_logprobs = [] bin_content.teacher_logprobs.extend(sample.teacher_logprobs) + assert (bin_content.routed_experts is None) == (sample.routed_experts is None) if sample.routed_experts is not None: if bin_content.routed_experts is None: - bin_content.routed_experts = [] - bin_content.routed_experts.extend(sample.routed_experts) + bin_content.routed_experts = _copy_routed_experts(sample.routed_experts) + else: + _append_routed_experts(bin_content, sample) if sample.mm_token_type_ids is not None: if bin_content.mm_token_type_ids is None: bin_content.mm_token_type_ids = [] @@ -201,6 +252,8 @@ def pad_micro_batch(micro_batch: MicroBatch, pad_to_multiple_of: int) -> MicroBa ) if micro_batch.mm_token_type_ids is not None: micro_batch.mm_token_type_ids.extend([0] * padding_size) + if micro_batch.routed_experts is not None: + _pad_routed_experts(micro_batch, padding_size) micro_batch.env_names.extend([""] * padding_size) return micro_batch diff --git a/src/prime_rl/trainer/rl/data.py b/src/prime_rl/trainer/rl/data.py index 73e35159af..45acdcb5c0 100644 --- a/src/prime_rl/trainer/rl/data.py +++ b/src/prime_rl/trainer/rl/data.py @@ -210,6 +210,18 @@ def _micro_batch_to_tensor(self, micro_batch: MicroBatch) -> TensorMicroBatch: key: torch.frombuffer(bytearray(payload.data), dtype=_torch_dtype(payload.dtype)).reshape(payload.shape) for key, payload in micro_batch.mm_kwargs.items() } + routed_experts = None + packed_routed_experts = micro_batch.routed_experts + if packed_routed_experts is not None: + routed_experts = ( + torch.frombuffer( + packed_routed_experts.data, + dtype=_torch_dtype(packed_routed_experts.dtype), + ) + .reshape(packed_routed_experts.shape) + .to(torch.int32) + .unsqueeze(0) + ) return TensorMicroBatch( input_ids=torch.tensor(micro_batch.input_ids, dtype=torch.long).unsqueeze(0), position_ids=torch.tensor(micro_batch.position_ids, dtype=torch.long).unsqueeze(0), @@ -229,11 +241,7 @@ def _micro_batch_to_tensor(self, micro_batch: MicroBatch) -> TensorMicroBatch: mm_token_type_ids=torch.tensor(micro_batch.mm_token_type_ids, dtype=torch.long).unsqueeze(0) if micro_batch.mm_token_type_ids is not None else None, - routed_experts=torch.tensor(micro_batch.routed_experts, dtype=torch.int32).unsqueeze( - 0 - ) # [1, seq_len, layers, topk] - if micro_batch.routed_experts is not None - else None, + routed_experts=routed_experts, training_mode=micro_batch.training_mode, ) diff --git a/src/prime_rl/transport/__init__.py b/src/prime_rl/transport/__init__.py index e4c3153dc7..bad9d6c806 100644 --- a/src/prime_rl/transport/__init__.py +++ b/src/prime_rl/transport/__init__.py @@ -8,7 +8,7 @@ FileSystemTrainingBatchReceiver, FileSystemTrainingBatchSender, ) -from prime_rl.transport.types import MicroBatch, TrainingBatch, TrainingSample +from prime_rl.transport.types import MicroBatch, RoutedExperts, TrainingBatch, TrainingSample from prime_rl.transport.zmq import ( ZMQMicroBatchReceiver, ZMQMicroBatchSender, @@ -67,6 +67,7 @@ def setup_micro_batch_receiver( "TrainingSample", "TrainingBatch", "MicroBatch", + "RoutedExperts", "setup_training_batch_sender", "setup_training_batch_receiver", "setup_micro_batch_sender", diff --git a/src/prime_rl/transport/types.py b/src/prime_rl/transport/types.py index d4c947224f..1bb31c9325 100644 --- a/src/prime_rl/transport/types.py +++ b/src/prime_rl/transport/types.py @@ -14,6 +14,14 @@ class EncodedTensor(msgspec.Struct, array_like=True, gc=False): data: bytes +# Routed experts are large per-token arrays. tolist() is too expensive, so we +# send raw bytes through msgpack and carry the shape/dtype needed to rebuild. +class RoutedExperts(msgspec.Struct, array_like=True, gc=False, omit_defaults=True): + data: bytes + shape: list[int] # [seq_len, layers, topk] + dtype: str + + # Orchestrator -> Packer class TrainingSample(msgspec.Struct, array_like=True, gc=False, omit_defaults=True): """A single training example.""" @@ -39,7 +47,7 @@ class TrainingSample(msgspec.Struct, array_like=True, gc=False, omit_defaults=Tr # touching this transport. mm_kwargs: dict[str, EncodedTensor] | None = None - routed_experts: list[list[list[int]]] | None = None # [seq_len, layers, topk] + routed_experts: RoutedExperts | None = None # mm_token_type_ids: token type ids per token [batch seq], int64 (0=text, 1=image, 2=video) mm_token_type_ids: list[int] | None = None @@ -70,7 +78,7 @@ class MicroBatch(msgspec.Struct, array_like=True, gc=False, omit_defaults=True): env_names: list[str] teacher_logprobs: list[float] | None = None lora_num_tokens: list[int] | None = None - routed_experts: list[list[list[int]]] | None = None + routed_experts: RoutedExperts | None = None # See TrainingSample.mm_kwargs. mm_kwargs: dict[str, EncodedTensor] | None = None diff --git a/tests/unit/inference/test_serving_tokens.py b/tests/unit/inference/test_serving_tokens.py index ac5b52b3d4..1882e57e55 100644 --- a/tests/unit/inference/test_serving_tokens.py +++ b/tests/unit/inference/test_serving_tokens.py @@ -3,8 +3,7 @@ The full happy-path is owned upstream by vLLM 0.20's ``vllm/entrypoints/serve/disagg`` test suite. We only cover the prime-RL deltas here: - * ``encode_routed_experts`` round-trips a numpy array as expected. - * ``PrimeRlGenerateResponseChoice`` accepts the optional field. + * ``serialize_routed_experts`` round-trips a compact raw-byte payload. * The subclass attaches its overrides without monkey-patching the parent. * ``_client_set_max_tokens`` distinguishes raw-body shapes correctly. """ @@ -12,19 +11,26 @@ from __future__ import annotations import asyncio -import base64 import numpy as np +import pybase64 +from vllm.entrypoints.serve.disagg.protocol import GenerateResponse, GenerateResponseChoice +from prime_rl.inference.vllm.routed_experts import serialize_routed_experts from prime_rl.inference.vllm.serving_tokens import ( - PrimeRlGenerateResponse, - PrimeRlGenerateResponseChoice, PrimeRlServingTokens, _client_set_max_tokens, - encode_routed_experts, + _GenerateRoutedExpertsCapture, ) +def _decode_routed_experts(encoded: dict) -> np.ndarray: + return np.frombuffer( + pybase64.b64decode_as_bytearray(encoded["data"]), + dtype=np.uint8, + ).reshape(encoded["shape"]) + + class _FakeRawRequest: def __init__(self, body): self._body = body @@ -36,50 +42,54 @@ async def json(self): return self._body -def test_encode_routed_experts_roundtrip(): - arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32) - encoded = encode_routed_experts(arr) - - assert encoded["shape"] == [2, 3] - decoded = np.frombuffer(base64.b85decode(encoded["data"]), dtype=np.int32).reshape(encoded["shape"]) - np.testing.assert_array_equal(decoded, arr) - +async def _empty_request_outputs(): + if False: + yield -def test_routed_experts_choice_accepts_none_and_dict(): - no_re = PrimeRlGenerateResponseChoice(index=0, finish_reason="stop", token_ids=[1, 2]) - assert no_re.routed_experts is None - encoded = encode_routed_experts(np.zeros((1, 1), dtype=np.int32)) - with_re = PrimeRlGenerateResponseChoice(index=0, finish_reason="stop", token_ids=[1], routed_experts=encoded) - assert with_re.routed_experts == encoded - - -def test_response_only_serializes_declared_fields(): - # Upstream silently drops id=/created=/model=/usage= because they're not - # declared on GenerateResponse. Our subclass adds nothing to that surface - # — it only widens the choices type — so the JSON shape stays slim. - resp = PrimeRlGenerateResponse( - request_id="gen-x", - choices=[PrimeRlGenerateResponseChoice(index=0, finish_reason="stop", token_ids=[7])], - ) - dumped = resp.model_dump() - assert set(dumped.keys()) == { - "request_id", - "choices", - "prompt_logprobs", - "kv_transfer_params", - } - assert dumped["choices"][0]["routed_experts"] is None - - -def test_subclass_inherits_serve_tokens_full_generator(): - # The subclass adds an override; make sure we didn't accidentally rebind - # ``serve_tokens`` to a parent attribute via __dict__-update tricks later. +def test_subclass_only_overrides_serve_tokens(): + assert PrimeRlServingTokens.serve_tokens is not PrimeRlServingTokens.__mro__[1].serve_tokens assert ( PrimeRlServingTokens.serve_tokens_full_generator is not PrimeRlServingTokens.__mro__[1].serve_tokens_full_generator ) - assert PrimeRlServingTokens.serve_tokens is not PrimeRlServingTokens.__mro__[1].serve_tokens + + +def test_serialize_routed_experts_uses_compact_raw_payload(): + routed_experts = np.array( + [ + [[1, 2], [3, 4]], + [[5, 6], [7, 8]], + ], + dtype=np.int64, + ) + + encoded = serialize_routed_experts(routed_experts) + assert encoded is not None + + decoded = _decode_routed_experts(encoded) + assert decoded.dtype == np.uint8 + np.testing.assert_array_equal(decoded, routed_experts) + + +def test_generate_response_post_process_replaces_upstream_routed_experts(): + compact_routed_experts = {"data": "AQID", "shape": [1, 1, 3]} + capture = _GenerateRoutedExpertsCapture(_empty_request_outputs()) + capture.routed_experts[0] = compact_routed_experts + response = GenerateResponse( + request_id="request-id", + choices=[ + GenerateResponseChoice( + index=0, + token_ids=[1, 2, 3], + routed_experts="upstream-npy-payload", + ) + ], + ) + + processed = capture.post_process(response) + + assert processed.choices[0].routed_experts == compact_routed_experts def test_client_set_max_tokens_recognizes_explicit_value(): diff --git a/tests/unit/orchestrator/test_batch.py b/tests/unit/orchestrator/test_batch.py index e01089ccf4..7531423c72 100644 --- a/tests/unit/orchestrator/test_batch.py +++ b/tests/unit/orchestrator/test_batch.py @@ -1,7 +1,17 @@ +import numpy as np import pytest from prime_rl.trainer.batch import prepare_batch, prepare_sample -from prime_rl.transport.types import TrainingSample +from prime_rl.transport.types import RoutedExperts, TrainingSample + + +def _routed_experts(data, dtype=np.uint8): + routed_experts = np.asarray(data, dtype=dtype) + return RoutedExperts( + data=routed_experts.tobytes(), + shape=list(routed_experts.shape), + dtype=str(routed_experts.dtype), + ) @pytest.fixture @@ -128,6 +138,7 @@ def test_prepare_sample_with_routed_experts(): """Routed experts are passed through prepare_sample and match input_ids length.""" # 2 prompt + 2 completion = 4 tokens, 2 layers, topk=2 routed_experts = [[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[0, 2], [1, 3]], [[1, 0], [3, 2]]] + routed_payload = _routed_experts(routed_experts) sample = TrainingSample( prompt_ids=[1, 2], prompt_mask=[False, False], @@ -137,18 +148,19 @@ def test_prepare_sample_with_routed_experts(): completion_temperatures=[1.0, 1.0], advantage=1.0, env_name="test-env", - routed_experts=routed_experts, + routed_experts=routed_payload, ) micro_batch = prepare_sample(sample, seq_len=8) assert micro_batch.routed_experts is not None - assert len(micro_batch.routed_experts) == 4 - assert micro_batch.routed_experts == routed_experts + assert micro_batch.routed_experts == routed_payload def test_prepare_sample_truncates_routed_experts(): """Routed experts are truncated to seq_len when input exceeds it.""" routed_experts = [[[0, 1]], [[2, 3]], [[4, 5]], [[6, 7]]] + routed_payload = _routed_experts(routed_experts) + expected_payload = _routed_experts(routed_experts[:3]) sample = TrainingSample( prompt_ids=[1, 2], prompt_mask=[False, False], @@ -158,13 +170,12 @@ def test_prepare_sample_truncates_routed_experts(): completion_temperatures=[1.0, 1.0], advantage=1.0, env_name="test-env", - routed_experts=routed_experts, + routed_experts=routed_payload, ) micro_batch = prepare_sample(sample, seq_len=3) assert micro_batch.routed_experts is not None - assert len(micro_batch.routed_experts) == 3 - assert micro_batch.routed_experts == routed_experts[:3] + assert micro_batch.routed_experts == expected_payload assert micro_batch.env_names == ["test-env"] * 3 diff --git a/tests/unit/orchestrator/test_qwen3_vl_e2e.py b/tests/unit/orchestrator/test_qwen3_vl_e2e.py index a08fa30fae..ffbdc45457 100644 --- a/tests/unit/orchestrator/test_qwen3_vl_e2e.py +++ b/tests/unit/orchestrator/test_qwen3_vl_e2e.py @@ -14,10 +14,12 @@ from __future__ import annotations import asyncio +import json from pathlib import Path from typing import Any from unittest.mock import MagicMock +import httpx import pytest _HF_CACHE = Path("~/.cache/huggingface/hub").expanduser() @@ -54,7 +56,7 @@ async def post(self, path, *, cast_to=dict, body=None, options=None): self.calls.append({"path": path, "body": body, "options": options}) # Reply with two sampled tokens + <|im_end|>. The renderer's # parse_response slices the content tokens. - return { + payload = { "request_id": "qwen-vl-e2e", "choices": [ { @@ -71,6 +73,7 @@ async def post(self, path, *, cast_to=dict, body=None, options=None): }, ], } + return httpx.Response(200, content=json.dumps(payload).encode()) def test_renderer_client_qwen3_vl_e2e_features_payload_roundtrips_through_vllm(): diff --git a/tests/unit/orchestrator/test_trajectories.py b/tests/unit/orchestrator/test_trajectories.py index c29e80976c..36c9ef1008 100644 --- a/tests/unit/orchestrator/test_trajectories.py +++ b/tests/unit/orchestrator/test_trajectories.py @@ -1,12 +1,13 @@ from unittest.mock import MagicMock import numpy as np +import pybase64 import pytest import verifiers as vf from prime_rl.orchestrator.trajectories import ( - _align_routed_experts, _deserialize_tool_calls, + align_routed_experts, interleave_rollout, ) @@ -30,6 +31,21 @@ def _decode_mm_thw(sample) -> list: return np.frombuffer(g.data, dtype=np.dtype(g.dtype)).reshape(g.shape).tolist() +def _routed_experts_payload(data) -> dict: + arr = np.asarray(data, dtype=np.uint8) + return { + "data": pybase64.b64encode(memoryview(np.ascontiguousarray(arr))).decode("ascii"), + "shape": list(arr.shape), + } + + +def _sample_routed_experts(sample) -> np.ndarray: + assert sample.routed_experts is not None + return np.frombuffer(sample.routed_experts.data, dtype=np.dtype(sample.routed_experts.dtype)).reshape( + sample.routed_experts.shape + ) + + def test_deserialize_tool_calls_does_not_inject_missing_key(): messages = [{"role": "assistant", "content": "hello"}] @@ -807,44 +823,47 @@ def test_interleave_rollout_error_masks_all_false(): def test_align_routed_experts_none(): - assert _align_routed_experts(None, 10) is None + assert align_routed_experts(None, 10) is None def test_align_routed_experts_empty(): - result = _align_routed_experts([], 10) - assert result == [] + experts = np.empty((0, 2, 2), dtype=np.uint8) + result = align_routed_experts(experts, 10) + assert result is not None + assert result.shape == (10, 2, 2) + assert np.all(result == 0) def test_align_routed_experts_no_deficit(): # 3 tokens, 2 layers, topk=2 - experts = [[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[0, 2], [1, 3]]] - result = _align_routed_experts(experts, expected_len=3) - assert result == experts + experts = np.asarray([[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[0, 2], [1, 3]]], dtype=np.uint8) + result = align_routed_experts(experts, expected_len=3) + np.testing.assert_array_equal(result, experts) def test_align_routed_experts_with_deficit(): # 2 tokens but expected 4 (deficit of 2) - experts = [[[1, 2], [3, 4]], [[5, 6], [7, 0]]] - result = _align_routed_experts(experts, expected_len=4) - assert len(result) == 4 - assert result[:2] == experts + experts = np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 0]]], dtype=np.uint8) + result = align_routed_experts(experts, expected_len=4) + assert result is not None + assert result.shape == (4, 2, 2) + np.testing.assert_array_equal(result[:2], experts) # Padded entries should be zero-filled with same shape [layers=2, topk=2] - assert result[2] == [[0, 0], [0, 0]] - assert result[3] == [[0, 0], [0, 0]] + np.testing.assert_array_equal(result[2], [[0, 0], [0, 0]]) + np.testing.assert_array_equal(result[3], [[0, 0], [0, 0]]) def test_align_routed_experts_excess_length(): - experts = [[[1, 2]], [[3, 4]], [[5, 6]]] - result = _align_routed_experts(experts, expected_len=2) - # No truncation, just returns as-is - assert result == experts + experts = np.asarray([[[1, 2]], [[3, 4]], [[5, 6]]], dtype=np.uint8) + result = align_routed_experts(experts, expected_len=2) + np.testing.assert_array_equal(result, experts[:2]) def test_interleave_rollout_single_step_with_routed_experts(): """Routed experts are aligned and passed through for a single-step trajectory.""" # prompt_ids=[1,2], completion_ids=[3,4] -> total 4 tokens # vLLM returns num_tokens-1 = 3 routed expert entries - routed_experts_from_vllm = [[[0, 1]], [[2, 3]], [[4, 5]]] # 3 entries, 1 layer, topk=2 + routed_experts_from_vllm = np.asarray([[[0, 1]], [[2, 3]], [[4, 5]]], dtype=np.uint8) output = vf.RolloutOutput( example_id=0, trajectory=[ @@ -860,7 +879,7 @@ def test_interleave_rollout_single_step_with_routed_experts(): completion_logprobs=[-0.1, -0.2], overlong_prompt=False, is_truncated=False, - routed_experts=routed_experts_from_vllm, + routed_experts=_routed_experts_payload(routed_experts_from_vllm), ), reward=None, advantage=None, @@ -880,18 +899,19 @@ def test_interleave_rollout_single_step_with_routed_experts(): # Should be aligned to 4 tokens (2 prompt + 2 completion) assert sample.routed_experts is not None - assert len(sample.routed_experts) == 4 + routed_experts = _sample_routed_experts(sample) + assert routed_experts.shape == (4, 1, 2) # First 3 are original, last one is zero-padded - assert sample.routed_experts[:3] == routed_experts_from_vllm - assert sample.routed_experts[3] == [[0, 0]] + np.testing.assert_array_equal(routed_experts[:3], routed_experts_from_vllm) + np.testing.assert_array_equal(routed_experts[3], [[0, 0]]) def test_interleave_rollout_multi_step_with_routed_experts(): """Routed experts are extended and aligned across multi-step trajectories.""" # Step 1: prompt=[1,2], completion=[3,4] -> 4 tokens, vLLM returns 3 - step1_experts = [[[1, 2]], [[3, 4]], [[5, 6]]] + step1_experts = np.asarray([[[1, 2]], [[3, 4]], [[5, 6]]], dtype=np.uint8) # Step 2: prompt=[1,2,3,4,5,6], completion=[7,8] -> 8 tokens, vLLM returns 7 - step2_experts = [[[1, 0]], [[2, 0]], [[3, 0]], [[4, 0]], [[5, 0]], [[6, 0]], [[7, 0]]] + step2_experts = np.asarray([[[1, 0]], [[2, 0]], [[3, 0]], [[4, 0]], [[5, 0]], [[6, 0]], [[7, 0]]], dtype=np.uint8) output = vf.RolloutOutput( example_id=0, @@ -908,7 +928,7 @@ def test_interleave_rollout_multi_step_with_routed_experts(): completion_logprobs=[-0.1, -0.2], overlong_prompt=False, is_truncated=False, - routed_experts=step1_experts, + routed_experts=_routed_experts_payload(step1_experts), ), reward=None, advantage=None, @@ -932,7 +952,7 @@ def test_interleave_rollout_multi_step_with_routed_experts(): completion_logprobs=[-0.3, -0.4], overlong_prompt=False, is_truncated=False, - routed_experts=step2_experts, + routed_experts=_routed_experts_payload(step2_experts), ), reward=None, advantage=None, @@ -953,7 +973,7 @@ def test_interleave_rollout_multi_step_with_routed_experts(): # Merged sample: prompt=[1,2], completion=[3,4,5,6,7,8] -> 8 tokens total assert len(sample.prompt_ids) + len(sample.completion_ids) == 8 assert sample.routed_experts is not None - assert len(sample.routed_experts) == 8 + assert _sample_routed_experts(sample).shape == (8, 1, 2) def test_interleave_rollout_none_routed_experts_stays_none(): diff --git a/uv.lock b/uv.lock index b85ead13d3..0aab78a784 100644 --- a/uv.lock +++ b/uv.lock @@ -15,7 +15,7 @@ conflicts = [[ ]] [options] -exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. +exclude-newer = "2026-05-14T14:58:09.755544055Z" exclude-newer-span = "P7D" [options.exclude-newer-package] @@ -23,6 +23,7 @@ vllm = false vllm-router = false dion = false tokenspeed-mla = false +prime = false nixl-cu12 = false deep-ep = false flash-attn-3 = false @@ -30,7 +31,7 @@ prime-sandboxes = false prime-tunnel = false deep-gemm = false prime-evals = false -prime = false +fastokens = false [manifest] members = [ @@ -1309,14 +1310,14 @@ wheels = [ [[package]] name = "fastokens" -version = "0.1.2" +version = "0.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f2/bd/e65b2989eb045863e1d4b1d161d122f69c8d3b8e23fa287e2a8f1eb4c8ab/fastokens-0.1.2.tar.gz", hash = "sha256:71da0dd9b198d3a00c1cdfae06aff7a616513bced4ba6b2ab0da63b688302c0d", size = 675220, upload-time = "2026-05-07T14:34:31.372Z" } +sdist = { url = "https://files.pythonhosted.org/packages/14/8e/7e88ec1d48db5a6e8d8d44318ce285e38c04b81508bdc2a60e17045a116f/fastokens-0.2.0.tar.gz", hash = "sha256:ef0e175de5c8cb1b616b3210d75dce1fab78e35fc02f77f03f7847d4678be686", size = 675822, upload-time = "2026-05-17T10:32:55.642Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/79/fd6f087929423289df4cf11c5c05d0c13f5274b6f1ff187d322b15ee35bc/fastokens-0.1.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07737f126ea0c6b92123f13c6aef9fada45923d37efdcf3d6bb23e677ec782a6", size = 3304086, upload-time = "2026-05-07T14:34:13.13Z" }, - { url = "https://files.pythonhosted.org/packages/3c/9d/393fa72d1d9a4e251221e077e42bdccce736f86636563b785d8460d655d1/fastokens-0.1.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:cd5c630f190a29492d86da7fcc53dd642f59dec4bf3eb204a378a32131003970", size = 3252648, upload-time = "2026-05-07T14:34:01.934Z" }, - { url = "https://files.pythonhosted.org/packages/70/8e/728c46b32fd6c10088a6a1b268732f50c03b0efb035bbea7d3f22b8de47e/fastokens-0.1.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:62b3bbbeb4e0ec72ff895b15e4b0ad04a791c87caa0edb1f63bd9d7c9896c86e", size = 3335929, upload-time = "2026-05-07T14:34:21.548Z" }, - { url = "https://files.pythonhosted.org/packages/04/3d/4ccb53de21bfb87ec13f1dfddc7567cb01732f5c755a083a7cc9e6eebfec/fastokens-0.1.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8e1cb2331e4ac377a636411d722e7a0a2a12c00a46c2d64ab6522004a38c8918", size = 3598235, upload-time = "2026-05-07T14:34:30.102Z" }, + { url = "https://files.pythonhosted.org/packages/b4/54/e0e4318ee1ad0b5196df72cf93615bba0b81f7869d659a44ccc475969151/fastokens-0.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:160253f8d30747cf66e7ed895c513e16f7b173dd9e644fa641e2eecbd43a616a", size = 3303534, upload-time = "2026-05-17T10:32:37.462Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/bfff90e4b1a43c17edf7305dafbd56dc992bbe832cc08da78f1f50104c2d/fastokens-0.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b61b9fe5b41e0bb36ad86e7551dc53293c9833909ef07b1cdbaa2055b06c3b3e", size = 3254096, upload-time = "2026-05-17T10:32:28.489Z" }, + { url = "https://files.pythonhosted.org/packages/05/bf/1cad7f0e8d03f5f5b2b417cda8859e4d968d2eebdca0cd336b23d7dbbdbb/fastokens-0.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:01b9bdba818d7b2c67d57d9917faf7a1dad32ece0734440130de94ad768b819f", size = 3336689, upload-time = "2026-05-17T10:32:46.21Z" }, + { url = "https://files.pythonhosted.org/packages/97/d7/f5fb2564e16b1f5733e05c41b090f95a3fe767f6b888ba7d864193bc5447/fastokens-0.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d068bc50082ad67d5d542847075f1f7b8d10f703274e56e241312f18b4d9e772", size = 3598064, upload-time = "2026-05-17T10:32:54.109Z" }, ] [[package]] @@ -3903,6 +3904,7 @@ dependencies = [ { name = "prime", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "prime-rl-configs", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "pyarrow", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pybase64", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "pyzmq", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "renderers", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "rich", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, @@ -3919,7 +3921,7 @@ dependencies = [ { name = "uvloop", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "verifiers", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "vllm", version = "0.21.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, - { name = "vllm", version = "0.21.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_x86_64.whl" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "vllm", version = "0.21.0+cu129.r42434.pr39568.a106aa6", source = { url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/vllm-0.21.0+cu129.r42434.pr39568.a106aa6-cp38-abi3-manylinux_2_24_x86_64.whl" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "wandb", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] @@ -4050,6 +4052,7 @@ requires-dist = [ { name = "prime-rl", extras = ["quack"], marker = "extra == 'all'" }, { name = "prime-rl-configs", editable = "packages/prime-rl-configs" }, { name = "pyarrow", specifier = ">=21.0.0" }, + { name = "pybase64", specifier = ">=1.4.2" }, { name = "pyzmq", specifier = ">=27.1.0" }, { name = "quack-kernels", marker = "extra == 'quack'", specifier = ">=0.4.1" }, { name = "renderers", editable = "deps/renderers" }, @@ -4072,8 +4075,8 @@ requires-dist = [ { name = "verifiers", editable = "deps/verifiers" }, { name = "vllm", marker = "platform_machine != 'aarch64' and platform_machine != 'x86_64'", specifier = ">=0.21.0" }, { name = "vllm", marker = "platform_machine == 'aarch64'", url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_aarch64.whl" }, - { name = "vllm", marker = "platform_machine == 'x86_64'", url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_x86_64.whl" }, - { name = "vllm-router", marker = "platform_machine == 'x86_64' and extra == 'disagg'", url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.22/vllm_router-0.1.22-cp38-abi3-manylinux_2_28_x86_64.whl" }, + { name = "vllm", marker = "platform_machine == 'x86_64'", url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/vllm-0.21.0+cu129.r42434.pr39568.a106aa6-cp38-abi3-manylinux_2_24_x86_64.whl" }, + { name = "vllm-router", marker = "platform_machine == 'x86_64' and extra == 'disagg'", url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.25/vllm_router-0.1.25-cp38-abi3-manylinux_2_28_x86_64.whl" }, { name = "wandb", specifier = ">=0.26.1" }, { name = "wiki-search", marker = "extra == 'envs'", editable = "deps/verifiers/environments/wiki_search" }, ] @@ -4756,7 +4759,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "fastokens", specifier = ">=0.1.1" }, + { name = "fastokens", specifier = ">=0.2.0" }, { name = "jinja2" }, { name = "numpy" }, { name = "openai", specifier = ">=1.108.1" }, @@ -5893,6 +5896,7 @@ dependencies = [ { name = "numpy", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "openai", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "openai-agents", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "prime-pydantic-config", extra = ["toml"], marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "prime-sandboxes", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "prime-tunnel", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "pydantic", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, @@ -5931,7 +5935,7 @@ rl = [ { name = "torch", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "transformers", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "vllm", version = "0.21.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, - { name = "vllm", version = "0.21.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_x86_64.whl" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "vllm", version = "0.21.0+cu129.r42434.pr39568.a106aa6", source = { url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/vllm-0.21.0+cu129.r42434.pr39568.a106aa6-cp38-abi3-manylinux_2_24_x86_64.whl" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "wandb", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] ta = [ @@ -5985,6 +5989,7 @@ requires-dist = [ { name = "openai-agents", specifier = ">=0.0.7" }, { name = "openenv-core", marker = "extra == 'openenv'", specifier = ">=0.3.0" }, { name = "peft", marker = "extra == 'rl'" }, + { name = "prime-pydantic-config", extras = ["toml"], editable = "deps/pydantic-config" }, { name = "prime-sandboxes", specifier = ">=0.2.25" }, { name = "prime-tunnel", specifier = ">=0.1.6" }, { name = "pydantic", specifier = ">=2.11.9" }, @@ -6007,7 +6012,7 @@ requires-dist = [ { name = "typing-extensions", marker = "python_full_version < '3.12'" }, { name = "vllm", marker = "platform_machine == 'aarch64' and extra == 'rl'", url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_aarch64.whl" }, { name = "vllm", marker = "platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'rl'", specifier = ">=0.10.0,<0.11.0" }, - { name = "vllm", marker = "platform_machine == 'x86_64' and extra == 'rl'", url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_x86_64.whl" }, + { name = "vllm", marker = "platform_machine == 'x86_64' and extra == 'rl'", url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/vllm-0.21.0+cu129.r42434.pr39568.a106aa6-cp38-abi3-manylinux_2_24_x86_64.whl" }, { name = "wandb", marker = "extra == 'rl'" }, ] provides-extras = ["browser", "openenv", "renderers", "rg", "rl", "ta"] @@ -6227,8 +6232,8 @@ provides-extras = ["zen", "bench", "tensorizer", "fastsafetensors", "instanttens [[package]] name = "vllm" -version = "0.21.0+cu129" -source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_x86_64.whl" } +version = "0.21.0+cu129.r42434.pr39568.a106aa6" +source = { url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/vllm-0.21.0+cu129.r42434.pr39568.a106aa6-cp38-abi3-manylinux_2_24_x86_64.whl" } resolution-markers = [ "platform_machine == 'x86_64' and sys_platform == 'linux'", ] @@ -6304,7 +6309,7 @@ dependencies = [ { name = "xgrammar", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] wheels = [ - { url = "https://github.com/vllm-project/vllm/releases/download/v0.21.0/vllm-0.21.0+cu129-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:920777691e340df7a8328adfb1e57b9996dbb537edfb654dd32f70844f5f423d" }, + { url = "https://github.com/PrimeIntellect-ai/prime-rl/releases/download/v0.5.0/vllm-0.21.0+cu129.r42434.pr39568.a106aa6-cp38-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:80dbe20d6df474df0d9f87c4b82a68de2c96c36d9a1a5e55620e69d3f306fd4b" }, ] [package.metadata] @@ -6404,8 +6409,8 @@ provides-extras = ["zen", "bench", "tensorizer", "fastsafetensors", "instanttens [[package]] name = "vllm-router" -version = "0.1.22" -source = { url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.22/vllm_router-0.1.22-cp38-abi3-manylinux_2_28_x86_64.whl" } +version = "0.1.25" +source = { url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.25/vllm_router-0.1.25-cp38-abi3-manylinux_2_28_x86_64.whl" } dependencies = [ { name = "aiohttp", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "fastapi", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, @@ -6415,7 +6420,7 @@ dependencies = [ { name = "uvicorn", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] wheels = [ - { url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.22/vllm_router-0.1.22-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6361a0387241e56932f3ba2e51af27f58d11a462e3187e58286b2f96056e4d15" }, + { url = "https://github.com/PrimeIntellect-ai/router/releases/download/v0.1.25/vllm_router-0.1.25-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:e84e731a0779f820bfe3cf4ce78cea2d09993c0a6501c63bcda93826bcd21fd0" }, ] [package.metadata]