IBM · elronbandel · Feb 13, 2025 · Feb 11, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/.gitignore b/.gitignore
@@ -154,6 +154,7 @@ kaggle.json
 src/unitxt/catalog_back/*
 src/unitxt/catalog/metrics/example/accuracy.json
 src/unitxt/catalog/processors/example/to_string.json
+src/unitxt/catalog/temp_recipe_name.json
 prod_env/*
 benchmark_output/*
 .litellm_cache

diff --git a/prepare/cards/ai2d.py b/prepare/cards/ai2d.py
@@ -24,7 +24,6 @@
     ],
     task="tasks.qa.multiple_choice.with_context[metrics=[metrics.exact_match_mm]]",
     templates=[template, *templates.items],
-    default_template=template,
     __tags__={},
     __description__=(
         "AI2 Diagrams (AI2D) is a dataset of over 5000 grade school science diagrams with over 150000 rich annotations, their ground truth syntactic parses, and more than 15000 corresponding multiple choice questions."

diff --git a/prepare/cards/chart_qa.py b/prepare/cards/chart_qa.py
@@ -26,7 +26,6 @@
     ],
     task="tasks.qa.with_context",
     templates=[template, *templates.items],
-    default_template=template,
     __tags__={
         "license": "GPL-3.0",
         "multilinguality": "monolingual",
@@ -53,7 +52,6 @@
     ],
     task="tasks.qa.with_context.with_type[metrics=[metrics.relaxed_correctness]]",
     templates=[template, *templates.items],
-    default_template=template,
     __tags__={
         "license": "GPL-3.0",
         "multilinguality": "monolingual",

diff --git a/prepare/cards/doc_vqa.py b/prepare/cards/doc_vqa.py
@@ -28,7 +28,6 @@
         ],
         task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
         templates=[template, *templates.items],
-        default_template=template,
         __tags__={
             "license": "apache-2.0",
             "multilinguality": "monolingual",
@@ -57,7 +56,6 @@
     ],
     task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
     templates=[template, *templates.items],
-    default_template=template,
     __tags__={
         "license": "apache-2.0",
         "multilinguality": "monolingual",

diff --git a/prepare/cards/info_vqa.py b/prepare/cards/info_vqa.py
@@ -29,7 +29,6 @@
     ],
     task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
     templates=[template, *templates.items],
-    default_template=template,
     __tags__={
         "license": "Unknown",
         "multilinguality": "monolingual",
@@ -59,12 +58,7 @@
         Set(fields={"context_type": "image"}),
     ],
     task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
-    templates="templates.qa.with_context.all",
-    default_template=MultiReferenceTemplate(
-        input_format="{context}\n{question}\nAnswer the question using a single word or phrase.",
-        references_field="answers",
-        __description__="lmms-evals default template for infovqa.",
-    ),
+    templates=[template, *templates.items],
     __tags__={
         "license": "apache-2.0",
         "multilinguality": "monolingual",

diff --git a/prepare/cards/seed_bench.py b/prepare/cards/seed_bench.py
@@ -1,10 +1,19 @@
 from unitxt.blocks import LoadHF, Set, TaskCard
-from unitxt.catalog import add_to_catalog
+from unitxt.catalog import add_to_catalog, get_from_catalog
 from unitxt.image_operators import ToImage, ToRGB
 from unitxt.operators import ListFieldValues, MapValues
 from unitxt.templates import MultipleChoiceTemplate
 from unitxt.test_utils.card import test_card
 
+templates = get_from_catalog("templates.qa.multiple_choice.with_context.no_intro.all")
+template = MultipleChoiceTemplate(
+        input_format="{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
+        choices_separator="\n",
+        target_field="answer",
+        enumerator="capitals",
+        __description__="lmms-evals default template for seed bench.",
+    )
+
 card = TaskCard(
     loader=LoadHF(path="lmms-lab/SEED-Bench"),
     preprocess_steps=[
@@ -17,14 +26,7 @@
         MapValues(mapping={"A": 0, "B": 1, "C": 2, "D": 3}, field="answer"),
     ],
     task="tasks.qa.multiple_choice.with_context",
-    templates="templates.qa.multiple_choice.with_context.no_intro.all",
-    default_template=MultipleChoiceTemplate(
-        input_format="{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
-        choices_separator="\n",
-        target_field="answer",
-        enumerator="capitals",
-        __description__="lmms-evals default template for seed bench.",
-    ),
+    templates=[template, *templates.items],
     __tags__={},
     __description__=(
         "SEED-Bench-1 consists of 19K multiple-choice questions with accurate human annotations, covering 12 evaluation dimensions including both the spatial and temporal understanding."

diff --git a/prepare/cards/websrc.py b/prepare/cards/websrc.py
@@ -26,7 +26,6 @@
     ],
     task="tasks.qa.with_context.with_domain[metrics=[metrics.websrc_squad_f1]]",
     templates=[template, *templates.items],
-    default_template=template,
     __tags__={
         "license": "Unknown",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/card.py b/src/unitxt/card.py
@@ -21,8 +21,6 @@ class TaskCard(Artifact):
             specifies the fields (of the already (pre)processed instance) making the inputs, the fields making the outputs, and the metrics to be used for evaluating the model output.
         templates:
             format strings to be applied on the input fields (specified by the task) and the output fields. The template also carries the instructions and the list of postprocessing steps, to be applied to the model output.
-        default_template:
-            a default template for tasks with very specific task dataset specific template
     """
 
     loader: Loader
@@ -31,5 +29,4 @@ class TaskCard(Artifact):
     templates: Union[
         TemplatesDict, TemplatesList, Dict[str, Template], List[Template]
     ] = None
-    default_template: Template = None
     sampler: Sampler = OptionalField(default_factory=RandomSampler)
diff --git a/src/unitxt/catalog/cards/ai2d.json b/src/unitxt/catalog/cards/ai2d.json
@@ -40,13 +40,6 @@
         "templates.qa.multiple_choice.with_context.no_intro.mmlu",
         "templates.qa.multiple_choice.with_context.no_intro.lm_eval_harness"
     ],
-    "default_template": {
-        "__type__": "multiple_choice_template",
-        "input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
-        "choices_separator": "\n",
-        "target_field": "answer",
-        "enumerator": "capitals"
-    },
     "__tags__": {},
     "__description__": "AI2 Diagrams (AI2D) is a dataset of over 5000 grade school science diagrams with over 150000 rich annotations, their ground truth syntactic parses, and more than 15000 corresponding multiple choice questions."
 }
diff --git a/src/unitxt/catalog/cards/chart_qa.json b/src/unitxt/catalog/cards/chart_qa.json
@@ -53,12 +53,6 @@
         "templates.qa.with_context.title",
         "templates.qa.with_context.lmms_eval"
     ],
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\n{question}\nAnswer the question using a single word.",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for chartqa."
-    },
     "__tags__": {
         "license": "GPL-3.0",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/catalog/cards/chart_qa_lmms_eval.json b/src/unitxt/catalog/cards/chart_qa_lmms_eval.json
@@ -41,12 +41,6 @@
         "templates.qa.with_context.title",
         "templates.qa.with_context.lmms_eval"
     ],
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\n{question}\nAnswer the question using a single word.",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for chartqa."
-    },
     "__tags__": {
         "license": "GPL-3.0",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/catalog/cards/doc_vqa/en.json b/src/unitxt/catalog/cards/doc_vqa/en.json
@@ -59,12 +59,6 @@
         "templates.qa.with_context.title",
         "templates.qa.with_context.lmms_eval"
     ],
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for docvqa."
-    },
     "__tags__": {
         "license": "apache-2.0",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/catalog/cards/doc_vqa/fr.json b/src/unitxt/catalog/cards/doc_vqa/fr.json
@@ -59,12 +59,6 @@
         "templates.qa.with_context.title",
         "templates.qa.with_context.lmms_eval"
     ],
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for docvqa."
-    },
     "__tags__": {
         "license": "apache-2.0",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json b/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json
@@ -45,12 +45,6 @@
         "templates.qa.with_context.title",
         "templates.qa.with_context.lmms_eval"
     ],
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for docvqa."
-    },
     "__tags__": {
         "license": "apache-2.0",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/catalog/cards/info_vqa.json b/src/unitxt/catalog/cards/info_vqa.json
@@ -54,12 +54,6 @@
         "templates.qa.with_context.title",
         "templates.qa.with_context.lmms_eval"
     ],
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\n{question}\nAnswer the question using a single word.",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for chartqa."
-    },
     "__tags__": {
         "license": "Unknown",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/catalog/cards/info_vqa_lmms_eval.json b/src/unitxt/catalog/cards/info_vqa_lmms_eval.json
@@ -28,13 +28,23 @@
         }
     ],
     "task": "tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
-    "templates": "templates.qa.with_context.all",
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for infovqa."
-    },
+    "templates": [
+        {
+            "__type__": "multi_reference_template",
+            "input_format": "{context}\n{question}\nAnswer the question using a single word.",
+            "references_field": "answers",
+            "__description__": "lmms-evals default template for chartqa."
+        },
+        "templates.qa.with_context",
+        "templates.qa.extractive",
+        "templates.qa.with_context.simple",
+        "templates.qa.with_context.simple2",
+        "templates.qa.with_context.with_type",
+        "templates.qa.with_context.question_first",
+        "templates.qa.with_context.ffqa",
+        "templates.qa.with_context.title",
+        "templates.qa.with_context.lmms_eval"
+    ],
     "__tags__": {
         "license": "apache-2.0",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/catalog/cards/seed_bench.json b/src/unitxt/catalog/cards/seed_bench.json
@@ -44,15 +44,19 @@
         }
     ],
     "task": "tasks.qa.multiple_choice.with_context",
-    "templates": "templates.qa.multiple_choice.with_context.no_intro.all",
-    "default_template": {
-        "__type__": "multiple_choice_template",
-        "input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
-        "choices_separator": "\n",
-        "target_field": "answer",
-        "enumerator": "capitals",
-        "__description__": "lmms-evals default template for seed bench."
-    },
+    "templates": [
+        {
+            "__type__": "multiple_choice_template",
+            "input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
+            "choices_separator": "\n",
+            "target_field": "answer",
+            "enumerator": "capitals",
+            "__description__": "lmms-evals default template for seed bench."
+        },
+        "templates.qa.multiple_choice.with_context.no_intro.helm",
+        "templates.qa.multiple_choice.with_context.no_intro.mmlu",
+        "templates.qa.multiple_choice.with_context.no_intro.lm_eval_harness"
+    ],
     "__tags__": {},
     "__description__": "SEED-Bench-1 consists of 19K multiple-choice questions with accurate human annotations, covering 12 evaluation dimensions including both the spatial and temporal understanding."
 }
diff --git a/src/unitxt/catalog/cards/websrc.json b/src/unitxt/catalog/cards/websrc.json
@@ -54,12 +54,6 @@
         "templates.qa.with_context.title",
         "templates.qa.with_context.lmms_eval"
     ],
-    "default_template": {
-        "__type__": "multi_reference_template",
-        "input_format": "{context}\nAnswer the question using a single word or phrase.\n{question}",
-        "references_field": "answers",
-        "__description__": "lmms-evals default template for websrc."
-    },
     "__tags__": {
         "license": "Unknown",
         "multilinguality": "monolingual",

diff --git a/src/unitxt/collections.py b/src/unitxt/collections.py
@@ -22,6 +22,10 @@ def __getitem__(self, key: Hashable) -> Any:
     def keys(self) -> List[Hashable]:
         pass
 
+    @abstractmethod
+    def __len__(self):
+        pass
+
 
 class ListCollection(Collection):
     items: List[Artifact] = field(default_factory=list)
@@ -48,6 +52,11 @@ class DictCollection(Collection):
     def keys(self) -> List[Hashable]:
         return list(self.items.keys())
 
+    def len(self):
+        return len(self.items)
+
+    def __len__(self):
+        return len(self.items)
 
 class ItemPicker(Artifact):
     item: object = None