fix: strip logprobs from trajectories before sending to RULER judge

JRMeyer · JRMeyer · commit 5b26fd9d385a · 2025-12-01T12:30:54.000-05:00
diff --git a/src/art/rewards/ruler.py b/src/art/rewards/ruler.py
@@ -14,6 +14,8 @@
 from typing import List
 
 from litellm import acompletion
+
+from art.utils.strip_logprobs import strip_logprobs
 from litellm.types.utils import ModelResponse
 from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
 from pydantic import BaseModel, Field
@@ -287,9 +289,10 @@ async def ruler_score_group(
         new_trajectories.append(new_traj)
 
     # Extract message lists and preserve original rewards for comparison
+    # Strip logprobs to avoid sending huge token probability data to the judge
     message_lists: list[list[ChatCompletionMessageParam]] = []
     for traj in new_trajectories:
-        message_lists.append(traj.messages())
+        message_lists.append(strip_logprobs(traj.messages()))
         traj.metrics["independent_reward"] = traj.reward
 
     try: