Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions safetytooling/apis/inference/openrouter.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ async def __call__(
generated_content = []
duration = None
api_duration = None
reasoning = None

error_list = []
for i in range(max_attempts):
Expand All @@ -295,6 +296,13 @@ async def __call__(

api_duration = time.time() - api_start

# Extract reasoning trace if present (for reasoning models like DeepSeek-R1, o1)
reasoning = None
if response_data.choices and response_data.choices[0].message:
msg = response_data.choices[0].message
if hasattr(msg, "reasoning") and msg.reasoning:
reasoning = msg.reasoning

if (
response_data.choices is None
or len(response_data.choices) == 0
Expand All @@ -314,6 +322,7 @@ async def __call__(
LLMResponse(
model_id=model_id,
completion="",
reasoning=reasoning,
generated_content=[],
stop_reason="stop_sequence",
api_duration=api_duration,
Expand Down Expand Up @@ -356,10 +365,15 @@ async def __call__(
assert not tools, "Multiple choices not supported with tools"
responses = []
for choice in response_data.choices:
# Extract per-choice reasoning if available
choice_reasoning = None
if hasattr(choice.message, "reasoning") and choice.message.reasoning:
choice_reasoning = choice.message.reasoning
responses.append(
LLMResponse(
model_id=model_id,
completion=choice.message.content or "",
reasoning=choice_reasoning,
generated_content=[self._convert_message_to_chat_message(choice.message)],
stop_reason=choice.finish_reason,
api_duration=api_duration,
Expand All @@ -378,6 +392,7 @@ async def __call__(
LLMResponse(
model_id=model_id,
completion=completion,
reasoning=reasoning,
generated_content=generated_content,
stop_reason=response_data.choices[0].finish_reason,
api_duration=api_duration,
Expand Down
1 change: 1 addition & 0 deletions safetytooling/data_models/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class LLMResponse(pydantic.BaseModel):
completion: str
stop_reason: StopReason | GeminiStopReason | GeminiBlockReason
cost: float = 0
reasoning: str | None = None # Reasoning trace from reasoning models (e.g., DeepSeek-R1, o1)
generated_content: List[ChatMessage] | None = None
audio_out: str | Path | None = None
duration: float | None = None
Expand Down