From e796e018f19de7019853f7d7e4075e16b8a00b9c Mon Sep 17 00:00:00 2001 From: JonasElburgUVA Date: Tue, 21 Jan 2025 14:52:06 +0100 Subject: [PATCH 1/2] Add use_effective_order parameter Original BLEU fails at answers shorter than four tokens, since the amount of 4-grams is zero. Adding the use effective order parameter allows the user to still obtain scores considering only n-grams up to the amount of tokens in the answer when N<4. --- src/ragas/metrics/_bleu_score.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ragas/metrics/_bleu_score.py b/src/ragas/metrics/_bleu_score.py index 71505a4bf..fd2f92386 100644 --- a/src/ragas/metrics/_bleu_score.py +++ b/src/ragas/metrics/_bleu_score.py @@ -15,6 +15,7 @@ class BleuScore(SingleTurnMetric): default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}} ) language: str = "english" + use_effective_order: bool=False def __post_init__(self): try: From 9c7aa4f9e1aa56aaaf847f1f9862ef9508f99323 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Tue, 21 Jan 2025 11:08:51 -0800 Subject: [PATCH 2/2] added kwargs --- src/ragas/metrics/_bleu_score.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ragas/metrics/_bleu_score.py b/src/ragas/metrics/_bleu_score.py index fd2f92386..a938dd0d6 100644 --- a/src/ragas/metrics/_bleu_score.py +++ b/src/ragas/metrics/_bleu_score.py @@ -15,7 +15,7 @@ class BleuScore(SingleTurnMetric): default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}} ) language: str = "english" - use_effective_order: bool=False + kwargs: t.Dict[str, t.Any] = field(default_factory=dict) def __post_init__(self): try: @@ -42,7 +42,7 @@ async def _single_turn_ascore( reference = [[reference] for reference in reference_sentences] response = response_sentences - score = self.corpus_bleu(response, reference).score / 100 + score = self.corpus_bleu(response, reference, **self.kwargs).score / 100 assert isinstance(score, float), "Expecting a float" return score