Skip to content

Commit abca66e

Browse files
author
Tong Li
committed
fix reward score
1 parent 71a0181 commit abca66e

File tree

1 file changed

+6
-4
lines changed
  • applications/ColossalChat/coati/distributed/reward

1 file changed

+6
-4
lines changed

applications/ColossalChat/coati/distributed/reward/reward_fn.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55

66
def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
7+
format_score = 1.0
8+
acc_score = 9.0
79
tokenizer = kwargs["tokenizer"]
810
reward = torch.tensor(0.0)
911
format_reward = torch.tensor(0.0)
@@ -20,16 +22,16 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
2022

2123
# Check format accuracy
2224
if format_valid:
23-
format_reward += 1.0
24-
reward += 1.0
25+
format_reward += format_score
26+
reward += format_score
2527

2628
# Check answer accuracy
2729
if (
2830
final_answer is not None
2931
and gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower()
3032
):
31-
acc_reward += 5.0
32-
reward += 5.0
33+
acc_reward += acc_score
34+
reward += acc_score
3335

3436
return torch.tensor([reward, format_reward, acc_reward]).to(input_ids.device)
3537

0 commit comments

Comments
 (0)