Skip to content

Commit 5b26fd9

Browse files
committed
fix: strip logprobs from trajectories before sending to RULER judge
1 parent 44f8d3f commit 5b26fd9

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/art/rewards/ruler.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from typing import List
1515

1616
from litellm import acompletion
17+
18+
from art.utils.strip_logprobs import strip_logprobs
1719
from litellm.types.utils import ModelResponse
1820
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
1921
from pydantic import BaseModel, Field
@@ -287,9 +289,10 @@ async def ruler_score_group(
287289
new_trajectories.append(new_traj)
288290

289291
# Extract message lists and preserve original rewards for comparison
292+
# Strip logprobs to avoid sending huge token probability data to the judge
290293
message_lists: list[list[ChatCompletionMessageParam]] = []
291294
for traj in new_trajectories:
292-
message_lists.append(traj.messages())
295+
message_lists.append(strip_logprobs(traj.messages()))
293296
traj.metrics["independent_reward"] = traj.reward
294297

295298
try:

0 commit comments

Comments
 (0)