Skip to content

Commit

Permalink
fix reward score
Browse files Browse the repository at this point in the history
  • Loading branch information
Tong Li committed Mar 11, 2025
1 parent 71a0181 commit abca66e
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions applications/ColossalChat/coati/distributed/reward/reward_fn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@


def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
format_score = 1.0
acc_score = 9.0
tokenizer = kwargs["tokenizer"]
reward = torch.tensor(0.0)
format_reward = torch.tensor(0.0)
Expand All @@ -20,16 +22,16 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):

# Check format accuracy
if format_valid:
format_reward += 1.0
reward += 1.0
format_reward += format_score
reward += format_score

# Check answer accuracy
if (
final_answer is not None
and gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower()
):
acc_reward += 5.0
reward += 5.0
acc_reward += acc_score
reward += acc_score

return torch.tensor([reward, format_reward, acc_reward]).to(input_ids.device)

Expand Down

0 comments on commit abca66e

Please sign in to comment.