Skip to content

Commit

Permalink
add rag judges that use mistral-large-instruct
Browse files Browse the repository at this point in the history
Signed-off-by: lilacheden <[email protected]>
  • Loading branch information
lilacheden committed Feb 11, 2025
1 parent 16ebe42 commit 56347a8
Show file tree
Hide file tree
Showing 23 changed files with 272 additions and 0 deletions.
2 changes: 2 additions & 0 deletions prepare/metrics/llm_as_judge/rag_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def get_prediction_field(metric_type):
"llama_3_3_70b_instruct_watsonx": "engines.classification.llama_3_3_70b_instruct_watsonx",
"llama_3_3_70b_instruct_rits": "engines.classification.llama_3_3_70b_instruct_rits",
"gpt_4o_azure": "engines.classification.gpt_4o_2024_08_06_azure_openai",
"mistral_large_instruct_watsonx": "engines.classification.mistral_large_watsonx",
"mistral_large_instruct_rits": "engines.classification.mistral_large_instruct_2407_rits",
generic_engine_label: GenericInferenceEngine(),
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
"task": "tasks.rag_eval.answer_correctness.binary",
"format": null,
"main_score": "answer_correctness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
"task": "tasks.rag_eval.answer_correctness.binary",
"format": null,
"main_score": "answer_correctness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
"task": "tasks.rag_eval.answer_relevance.binary",
"format": null,
"main_score": "answer_relevance_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
"task": "tasks.rag_eval.answer_relevance.binary",
"format": null,
"main_score": "answer_relevance_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
"task": "tasks.rag_eval.context_relevance.binary",
"format": null,
"main_score": "context_relevance_judge",
"prediction_field": "contexts",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
"task": "tasks.rag_eval.context_relevance.binary",
"format": null,
"main_score": "context_relevance_judge",
"prediction_field": "contexts",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
"task": "tasks.rag_eval.faithfulness.binary",
"format": null,
"main_score": "faithfulness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
"task": "tasks.rag_eval.faithfulness.binary",
"format": null,
"main_score": "faithfulness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
"task": "tasks.rag_eval.answer_correctness.binary",
"format": null,
"main_score": "answer_correctness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
"task": "tasks.rag_eval.answer_correctness.binary",
"format": null,
"main_score": "answer_correctness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
"task": "tasks.rag_eval.answer_relevance.binary",
"format": null,
"main_score": "answer_relevance_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
"task": "tasks.rag_eval.answer_relevance.binary",
"format": null,
"main_score": "answer_relevance_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
"task": "tasks.rag_eval.context_relevance.binary",
"format": null,
"main_score": "context_relevance_judge",
"prediction_field": "contexts",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
"task": "tasks.rag_eval.context_relevance.binary",
"format": null,
"main_score": "context_relevance_judge",
"prediction_field": "contexts",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
"task": "tasks.rag_eval.faithfulness.binary",
"format": null,
"main_score": "faithfulness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
"task": "tasks.rag_eval.faithfulness.binary",
"format": null,
"main_score": "faithfulness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
"task": "tasks.rag_eval.answer_correctness.binary",
"format": null,
"main_score": "answer_correctness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
"task": "tasks.rag_eval.answer_correctness.binary",
"format": null,
"main_score": "answer_correctness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
"task": "tasks.rag_eval.answer_relevance.binary",
"format": null,
"main_score": "answer_relevance_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
"task": "tasks.rag_eval.answer_relevance.binary",
"format": null,
"main_score": "answer_relevance_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_instruct_2407_rits",
"template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
"task": "tasks.rag_eval.faithfulness.binary",
"format": null,
"main_score": "faithfulness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"__type__": "task_based_ll_mas_judge",
"inference_model": "engines.classification.mistral_large_watsonx",
"template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
"task": "tasks.rag_eval.faithfulness.binary",
"format": null,
"main_score": "faithfulness_judge",
"prediction_field": "answer",
"infer_log_probs": false,
"judge_to_generator_fields_mapping": {
"ground_truths": "reference_answers"
}
}

0 comments on commit 56347a8

Please sign in to comment.