diff --git a/lm_eval/tasks/gsm8k.py b/lm_eval/tasks/gsm8k.py index 4254e27a3b..1829a8c0e5 100644 --- a/lm_eval/tasks/gsm8k.py +++ b/lm_eval/tasks/gsm8k.py @@ -79,7 +79,7 @@ def construct_requests(self, doc, ctx): """ # NOTE: The paper implements "verifiers" that assign a score to multiple # solutions and output the highest ranked solution. - completion = rf.greedy_until(ctx, {"until": [":", "Question:", "Question"]}) + completion = rf.greedy_until(ctx, {"until": ["Question:", "Question"]}) return completion def _extract_answer(self, completion):