diff --git a/lm_eval/tasks/gsm8k.py b/lm_eval/tasks/gsm8k.py
index 4254e27a3b..1829a8c0e5 100644
--- a/lm_eval/tasks/gsm8k.py
+++ b/lm_eval/tasks/gsm8k.py
@@ -79,7 +79,7 @@ def construct_requests(self, doc, ctx):
         """
         # NOTE: The paper implements "verifiers" that assign a score to multiple
         # solutions and output the highest ranked solution.
-        completion = rf.greedy_until(ctx, {"until": [":", "Question:", "Question"]})
+        completion = rf.greedy_until(ctx, {"until": ["Question:", "Question"]})
         return completion
 
     def _extract_answer(self, completion):