File tree Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Original file line number Diff line number Diff line change 1414from typing import List
1515
1616from litellm import acompletion
17+
18+ from art .utils .strip_logprobs import strip_logprobs
1719from litellm .types .utils import ModelResponse
1820from openai .types .chat .chat_completion_message_param import ChatCompletionMessageParam
1921from pydantic import BaseModel , Field
@@ -287,9 +289,10 @@ async def ruler_score_group(
287289 new_trajectories .append (new_traj )
288290
289291 # Extract message lists and preserve original rewards for comparison
292+ # Strip logprobs to avoid sending huge token probability data to the judge
290293 message_lists : list [list [ChatCompletionMessageParam ]] = []
291294 for traj in new_trajectories :
292- message_lists .append (traj .messages ())
295+ message_lists .append (strip_logprobs ( traj .messages () ))
293296 traj .metrics ["independent_reward" ] = traj .reward
294297
295298 try :
You can’t perform that action at this time.
0 commit comments