diff --git a/prepare/cards/ragbench_faithfulness.py b/prepare/cards/ragbench_faithfulness.py
new file mode 100644
index 0000000000..0e718cd2a3
--- /dev/null
+++ b/prepare/cards/ragbench_faithfulness.py
@@ -0,0 +1,41 @@
+from unitxt import add_to_catalog
+from unitxt.blocks import (
+    LoadHF,
+    TaskCard,
+)
+from unitxt.operators import Copy, ExecuteExpression
+from unitxt.templates import NullTemplate
+
+for subset in [
+    "covidqa",
+    "cuad",
+    "delucionqa",
+    "emanual",
+    "expertqa",
+    "finqa",
+    "hagrid",
+    "hotpotqa",
+    "msmarco",
+    "pubmedqa",
+    "tatqa",
+    "techqa",
+]:
+    card = TaskCard(
+        loader=LoadHF(
+            path="rungalileo/ragbench",
+            name=subset,
+            split="test"
+        ),
+        preprocess_steps=[
+            Copy(field="response", to_field="answer"),
+            Copy(field="documents", to_field="contexts"),
+            ExecuteExpression(expression="int(adherence_score)", to_field="number_val"),
+            ExecuteExpression(expression="['yes' if adherence_score else 'no']", to_field="is_faithful"),
+        ],
+        task="tasks.rag_eval.faithfulness.binary",
+        templates={"default": NullTemplate()},
+    )
+
+    add_to_catalog(
+        card, f"cards.rag_eval.faithfulness.ragbench.{subset}", overwrite=True
+    )
diff --git a/prepare/engines/classification/classification_engines.py b/prepare/engines/classification/classification_engines.py
index 635b203319..e996a03b1d 100644
--- a/prepare/engines/classification/classification_engines.py
+++ b/prepare/engines/classification/classification_engines.py
@@ -6,6 +6,7 @@
 )
 
 model_names_to_provider = {
+    "mistral-large-instruct": ["watsonx", "rits"],
     "llama-3-3-70b-instruct": ["watsonx", "rits"],
     "llama-3-1-70b-instruct": ["watsonx", "rits"],
     "gpt-4o": ["open-ai"],
diff --git a/prepare/metrics/llm_as_judge/rag_judge.py b/prepare/metrics/llm_as_judge/rag_judge.py
index aa418de2d1..ded8353f12 100644
--- a/prepare/metrics/llm_as_judge/rag_judge.py
+++ b/prepare/metrics/llm_as_judge/rag_judge.py
@@ -102,6 +102,8 @@ def get_prediction_field(metric_type):
     "llama_3_3_70b_instruct_watsonx": "engines.classification.llama_3_3_70b_instruct_watsonx",
     "llama_3_3_70b_instruct_rits": "engines.classification.llama_3_3_70b_instruct_rits",
     "gpt_4o_azure": "engines.classification.gpt_4o_2024_08_06_azure_openai",
+    "mistral_large_instruct_watsonx": "engines.classification.mistral_large_watsonx",
+    "mistral_large_instruct_rits": "engines.classification.mistral_large_instruct_2407_rits",
     generic_engine_label: GenericInferenceEngine(),
 }
 
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/covidqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/covidqa.json
new file mode 100644
index 0000000000..ede04fe9b7
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/covidqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "covidqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/cuad.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/cuad.json
new file mode 100644
index 0000000000..cf123101a1
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/cuad.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "cuad",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/delucionqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/delucionqa.json
new file mode 100644
index 0000000000..707fa49e11
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/delucionqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "delucionqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/emanual.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/emanual.json
new file mode 100644
index 0000000000..2cdcf49a41
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/emanual.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "emanual",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/expertqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/expertqa.json
new file mode 100644
index 0000000000..ceeb85882f
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/expertqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "expertqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/finqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/finqa.json
new file mode 100644
index 0000000000..d61854f528
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/finqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "finqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/hagrid.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/hagrid.json
new file mode 100644
index 0000000000..5dc19716da
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/hagrid.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "hagrid",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/hotpotqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/hotpotqa.json
new file mode 100644
index 0000000000..a484e6bb45
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/hotpotqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "hotpotqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/msmarco.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/msmarco.json
new file mode 100644
index 0000000000..d962ba0724
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/msmarco.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "msmarco",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/pubmedqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/pubmedqa.json
new file mode 100644
index 0000000000..f0012c22f7
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/pubmedqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "pubmedqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/tatqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/tatqa.json
new file mode 100644
index 0000000000..35b3623725
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/tatqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "tatqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/techqa.json b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/techqa.json
new file mode 100644
index 0000000000..769fedaff2
--- /dev/null
+++ b/src/unitxt/catalog/cards/rag_eval/faithfulness/ragbench/techqa.json
@@ -0,0 +1,37 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "rungalileo/ragbench",
+        "name": "techqa",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "copy",
+            "field": "response",
+            "to_field": "answer"
+        },
+        {
+            "__type__": "copy",
+            "field": "documents",
+            "to_field": "contexts"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "int(adherence_score)",
+            "to_field": "number_val"
+        },
+        {
+            "__type__": "execute_expression",
+            "expression": "['yes' if adherence_score else 'no']",
+            "to_field": "is_faithful"
+        }
+    ],
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "templates": {
+        "default": {
+            "__type__": "null_template"
+        }
+    }
+}
diff --git a/src/unitxt/catalog/engines/classification/mistral_large_instruct_2407_rits.json b/src/unitxt/catalog/engines/classification/mistral_large_instruct_2407_rits.json
new file mode 100644
index 0000000000..cc8530861d
--- /dev/null
+++ b/src/unitxt/catalog/engines/classification/mistral_large_instruct_2407_rits.json
@@ -0,0 +1,9 @@
+{
+    "__type__": "cross_provider_inference_engine",
+    "model": "mistral-large-instruct",
+    "logprobs": true,
+    "max_tokens": 5,
+    "temperature": 0.0,
+    "top_logprobs": 5,
+    "provider": "rits"
+}
diff --git a/src/unitxt/catalog/engines/classification/mistral_large_watsonx.json b/src/unitxt/catalog/engines/classification/mistral_large_watsonx.json
new file mode 100644
index 0000000000..6b1f7f1b0a
--- /dev/null
+++ b/src/unitxt/catalog/engines/classification/mistral_large_watsonx.json
@@ -0,0 +1,9 @@
+{
+    "__type__": "cross_provider_inference_engine",
+    "model": "mistral-large-instruct",
+    "logprobs": true,
+    "max_tokens": 5,
+    "temperature": 0.0,
+    "top_logprobs": 5,
+    "provider": "watsonx"
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/answer_correctness/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/answer_correctness/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..9830442635
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/answer_correctness/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
+    "task": "tasks.rag_eval.answer_correctness.binary",
+    "format": null,
+    "main_score": "answer_correctness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/answer_correctness/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/answer_correctness/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..dc161455b2
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/answer_correctness/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
+    "task": "tasks.rag_eval.answer_correctness.binary",
+    "format": null,
+    "main_score": "answer_correctness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/answer_relevance/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/answer_relevance/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..71e179025a
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/answer_relevance/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
+    "task": "tasks.rag_eval.answer_relevance.binary",
+    "format": null,
+    "main_score": "answer_relevance_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/answer_relevance/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/answer_relevance/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..4d2bd7ab72
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/answer_relevance/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
+    "task": "tasks.rag_eval.answer_relevance.binary",
+    "format": null,
+    "main_score": "answer_relevance_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/context_relevance/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/context_relevance/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..837ae89c94
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/context_relevance/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
+    "task": "tasks.rag_eval.context_relevance.binary",
+    "format": null,
+    "main_score": "context_relevance_judge",
+    "prediction_field": "contexts",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/context_relevance/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/context_relevance/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..501588e549
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/context_relevance/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
+    "task": "tasks.rag_eval.context_relevance.binary",
+    "format": null,
+    "main_score": "context_relevance_judge",
+    "prediction_field": "contexts",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/faithfulness/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/faithfulness/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..5b3dd429ef
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/faithfulness/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "format": null,
+    "main_score": "faithfulness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/end_to_end/faithfulness/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/end_to_end/faithfulness/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..14ad584b97
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/end_to_end/faithfulness/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "format": null,
+    "main_score": "faithfulness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/answer_correctness/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/answer_correctness/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..722315d42b
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/answer_correctness/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
+    "task": "tasks.rag_eval.answer_correctness.binary",
+    "format": null,
+    "main_score": "answer_correctness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/answer_correctness/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/answer_correctness/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..5e6aec3c8a
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/answer_correctness/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
+    "task": "tasks.rag_eval.answer_correctness.binary",
+    "format": null,
+    "main_score": "answer_correctness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/answer_relevance/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/answer_relevance/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..f7d2f00aba
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/answer_relevance/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
+    "task": "tasks.rag_eval.answer_relevance.binary",
+    "format": null,
+    "main_score": "answer_relevance_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/answer_relevance/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/answer_relevance/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..d5e34f703f
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/answer_relevance/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
+    "task": "tasks.rag_eval.answer_relevance.binary",
+    "format": null,
+    "main_score": "answer_relevance_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/context_relevance/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/context_relevance/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..d7955a7def
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/context_relevance/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
+    "task": "tasks.rag_eval.context_relevance.binary",
+    "format": null,
+    "main_score": "context_relevance_judge",
+    "prediction_field": "contexts",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/context_relevance/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/context_relevance/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..8712d4da17
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/context_relevance/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.context_relevance.judge_context_relevance_ares_numeric",
+    "task": "tasks.rag_eval.context_relevance.binary",
+    "format": null,
+    "main_score": "context_relevance_judge",
+    "prediction_field": "contexts",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/faithfulness/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/faithfulness/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..6e8898e710
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/faithfulness/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "format": null,
+    "main_score": "faithfulness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/faithfulness/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/external_rag/faithfulness/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..c32a744b0b
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/external_rag/faithfulness/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,11 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "format": null,
+    "main_score": "faithfulness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {}
+}
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/answer_correctness/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/answer_correctness/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..9830442635
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/response_generation/answer_correctness/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
+    "task": "tasks.rag_eval.answer_correctness.binary",
+    "format": null,
+    "main_score": "answer_correctness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/answer_correctness/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/answer_correctness/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..dc161455b2
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/response_generation/answer_correctness/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.answer_correctness.judge_loose_match_no_context_numeric",
+    "task": "tasks.rag_eval.answer_correctness.binary",
+    "format": null,
+    "main_score": "answer_correctness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/answer_relevance/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/answer_relevance/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..71e179025a
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/response_generation/answer_relevance/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
+    "task": "tasks.rag_eval.answer_relevance.binary",
+    "format": null,
+    "main_score": "answer_relevance_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/answer_relevance/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/answer_relevance/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..4d2bd7ab72
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/response_generation/answer_relevance/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.answer_relevance.judge_answer_relevance_numeric",
+    "task": "tasks.rag_eval.answer_relevance.binary",
+    "format": null,
+    "main_score": "answer_relevance_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/faithfulness/mistral_large_instruct_rits_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/faithfulness/mistral_large_instruct_rits_judge.json
new file mode 100644
index 0000000000..5b3dd429ef
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/response_generation/faithfulness/mistral_large_instruct_rits_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_instruct_2407_rits",
+    "template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "format": null,
+    "main_score": "faithfulness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/faithfulness/mistral_large_instruct_watsonx_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/faithfulness/mistral_large_instruct_watsonx_judge.json
new file mode 100644
index 0000000000..14ad584b97
--- /dev/null
+++ b/src/unitxt/catalog/metrics/rag/response_generation/faithfulness/mistral_large_instruct_watsonx_judge.json
@@ -0,0 +1,13 @@
+{
+    "__type__": "task_based_ll_mas_judge",
+    "inference_model": "engines.classification.mistral_large_watsonx",
+    "template": "templates.rag_eval.faithfulness.judge_with_question_simplified_verbal",
+    "task": "tasks.rag_eval.faithfulness.binary",
+    "format": null,
+    "main_score": "faithfulness_judge",
+    "prediction_field": "answer",
+    "infer_log_probs": false,
+    "judge_to_generator_fields_mapping": {
+        "ground_truths": "reference_answers"
+    }
+}
diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
index fd9871bb05..1205b6f731 100644
--- a/src/unitxt/inference.py
+++ b/src/unitxt/inference.py
@@ -2972,6 +2972,7 @@ class CrossProviderInferenceEngine(InferenceEngine, StandardAPIParamsMixin):
             "llama-3-2-1b-instruct": "watsonx/meta-llama/llama-3-2-1b-instruct",
             "llama-3-2-11b-vision-instruct": "watsonx/meta-llama/llama-3-2-11b-vision-instruct",
             "llama-3-2-90b-vision-instruct": "watsonx/meta-llama/llama-3-2-90b-vision-instruct",
+            "mistral-large-instruct": "watsonx/mistralai/mistral-large",
         },
         "watsonx-sdk": {
             "llama-3-2-11b-vision-instruct": "meta-llama/llama-3-2-11b-vision-instruct",
diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index dd1f98b554..7ce8253a50 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -4259,7 +4259,7 @@ class FaithfulnessHHEM(BulkInstanceMetric):
     batch_size: int = 2
     model_name: str = "vectara/hallucination_evaluation_model"
     prediction_type = str
-    single_reference_per_prediction = True
+   # single_reference_per_prediction = True
     max_context_words = 4096
     reduction_map = {"mean": [main_score]}
 
@@ -4291,7 +4291,8 @@ def compute(
 
         # treat the references as the contexts and the predictions as answers
         # concat references
-        contexts = ["\n".join(refs) for refs in references]
+
+        contexts = ["\n".join([str(r) for r in refs]) for refs in references]
         contexts = [" ".join(c.split(" ")[: self.max_context_words]) for c in contexts]
         answers = predictions
 
@@ -4360,7 +4361,7 @@ def compute(
         from collections import defaultdict
 
         query_to_predictions_and_references = defaultdict(lambda: [[], []])
-        references = [reference[0] for reference in references]
+        references = ["\n".join(reference) for reference in references]
         for reference, pred, inputs_dict in zip(references, predictions, task_data):
             query = inputs_dict.get("query")
             query_to_predictions_and_references[query][0].append(pred)