diff --git a/.gitignore b/.gitignore index 2a0cc07d6c..b0b3da272e 100644 --- a/.gitignore +++ b/.gitignore @@ -154,6 +154,7 @@ kaggle.json src/unitxt/catalog_back/* src/unitxt/catalog/metrics/example/accuracy.json src/unitxt/catalog/processors/example/to_string.json +src/unitxt/catalog/temp_recipe_name.json prod_env/* benchmark_output/* .litellm_cache diff --git a/prepare/cards/ai2d.py b/prepare/cards/ai2d.py index a2a5172893..0ad2be427d 100644 --- a/prepare/cards/ai2d.py +++ b/prepare/cards/ai2d.py @@ -24,7 +24,6 @@ ], task="tasks.qa.multiple_choice.with_context[metrics=[metrics.exact_match_mm]]", templates=[template, *templates.items], - default_template=template, __tags__={}, __description__=( "AI2 Diagrams (AI2D) is a dataset of over 5000 grade school science diagrams with over 150000 rich annotations, their ground truth syntactic parses, and more than 15000 corresponding multiple choice questions." diff --git a/prepare/cards/chart_qa.py b/prepare/cards/chart_qa.py index bf20fd6717..eaefd8e5c3 100644 --- a/prepare/cards/chart_qa.py +++ b/prepare/cards/chart_qa.py @@ -26,7 +26,6 @@ ], task="tasks.qa.with_context", templates=[template, *templates.items], - default_template=template, __tags__={ "license": "GPL-3.0", "multilinguality": "monolingual", @@ -53,7 +52,6 @@ ], task="tasks.qa.with_context.with_type[metrics=[metrics.relaxed_correctness]]", templates=[template, *templates.items], - default_template=template, __tags__={ "license": "GPL-3.0", "multilinguality": "monolingual", diff --git a/prepare/cards/doc_vqa.py b/prepare/cards/doc_vqa.py index d1878258db..d999b726fa 100644 --- a/prepare/cards/doc_vqa.py +++ b/prepare/cards/doc_vqa.py @@ -28,7 +28,6 @@ ], task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]", templates=[template, *templates.items], - default_template=template, __tags__={ "license": "apache-2.0", "multilinguality": "monolingual", @@ -57,7 +56,6 @@ ], task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]", templates=[template, *templates.items], - default_template=template, __tags__={ "license": "apache-2.0", "multilinguality": "monolingual", diff --git a/prepare/cards/info_vqa.py b/prepare/cards/info_vqa.py index b9b0d10803..1da33682cd 100644 --- a/prepare/cards/info_vqa.py +++ b/prepare/cards/info_vqa.py @@ -29,7 +29,6 @@ ], task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]", templates=[template, *templates.items], - default_template=template, __tags__={ "license": "Unknown", "multilinguality": "monolingual", @@ -59,12 +58,7 @@ Set(fields={"context_type": "image"}), ], task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]", - templates="templates.qa.with_context.all", - default_template=MultiReferenceTemplate( - input_format="{context}\n{question}\nAnswer the question using a single word or phrase.", - references_field="answers", - __description__="lmms-evals default template for infovqa.", - ), + templates=[template, *templates.items], __tags__={ "license": "apache-2.0", "multilinguality": "monolingual", diff --git a/prepare/cards/seed_bench.py b/prepare/cards/seed_bench.py index bd1b82a3a9..22d42244bc 100644 --- a/prepare/cards/seed_bench.py +++ b/prepare/cards/seed_bench.py @@ -1,10 +1,19 @@ from unitxt.blocks import LoadHF, Set, TaskCard -from unitxt.catalog import add_to_catalog +from unitxt.catalog import add_to_catalog, get_from_catalog from unitxt.image_operators import ToImage, ToRGB from unitxt.operators import ListFieldValues, MapValues from unitxt.templates import MultipleChoiceTemplate from unitxt.test_utils.card import test_card +templates = get_from_catalog("templates.qa.multiple_choice.with_context.no_intro.all") +template = MultipleChoiceTemplate( + input_format="{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.", + choices_separator="\n", + target_field="answer", + enumerator="capitals", + __description__="lmms-evals default template for seed bench.", + ) + card = TaskCard( loader=LoadHF(path="lmms-lab/SEED-Bench"), preprocess_steps=[ @@ -17,14 +26,7 @@ MapValues(mapping={"A": 0, "B": 1, "C": 2, "D": 3}, field="answer"), ], task="tasks.qa.multiple_choice.with_context", - templates="templates.qa.multiple_choice.with_context.no_intro.all", - default_template=MultipleChoiceTemplate( - input_format="{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.", - choices_separator="\n", - target_field="answer", - enumerator="capitals", - __description__="lmms-evals default template for seed bench.", - ), + templates=[template, *templates.items], __tags__={}, __description__=( "SEED-Bench-1 consists of 19K multiple-choice questions with accurate human annotations, covering 12 evaluation dimensions including both the spatial and temporal understanding." diff --git a/prepare/cards/websrc.py b/prepare/cards/websrc.py index afb6f29676..e7cc1f0786 100644 --- a/prepare/cards/websrc.py +++ b/prepare/cards/websrc.py @@ -26,7 +26,6 @@ ], task="tasks.qa.with_context.with_domain[metrics=[metrics.websrc_squad_f1]]", templates=[template, *templates.items], - default_template=template, __tags__={ "license": "Unknown", "multilinguality": "monolingual", diff --git a/src/unitxt/card.py b/src/unitxt/card.py index 1516f5a7a7..462392c2f2 100644 --- a/src/unitxt/card.py +++ b/src/unitxt/card.py @@ -21,8 +21,6 @@ class TaskCard(Artifact): specifies the fields (of the already (pre)processed instance) making the inputs, the fields making the outputs, and the metrics to be used for evaluating the model output. templates: format strings to be applied on the input fields (specified by the task) and the output fields. The template also carries the instructions and the list of postprocessing steps, to be applied to the model output. - default_template: - a default template for tasks with very specific task dataset specific template """ loader: Loader @@ -31,5 +29,4 @@ class TaskCard(Artifact): templates: Union[ TemplatesDict, TemplatesList, Dict[str, Template], List[Template] ] = None - default_template: Template = None sampler: Sampler = OptionalField(default_factory=RandomSampler) diff --git a/src/unitxt/catalog/cards/ai2d.json b/src/unitxt/catalog/cards/ai2d.json index f3a0a10ae4..69c8276736 100644 --- a/src/unitxt/catalog/cards/ai2d.json +++ b/src/unitxt/catalog/cards/ai2d.json @@ -40,13 +40,6 @@ "templates.qa.multiple_choice.with_context.no_intro.mmlu", "templates.qa.multiple_choice.with_context.no_intro.lm_eval_harness" ], - "default_template": { - "__type__": "multiple_choice_template", - "input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.", - "choices_separator": "\n", - "target_field": "answer", - "enumerator": "capitals" - }, "__tags__": {}, "__description__": "AI2 Diagrams (AI2D) is a dataset of over 5000 grade school science diagrams with over 150000 rich annotations, their ground truth syntactic parses, and more than 15000 corresponding multiple choice questions." } diff --git a/src/unitxt/catalog/cards/chart_qa.json b/src/unitxt/catalog/cards/chart_qa.json index 50a80ef424..ca68fd1f31 100644 --- a/src/unitxt/catalog/cards/chart_qa.json +++ b/src/unitxt/catalog/cards/chart_qa.json @@ -53,12 +53,6 @@ "templates.qa.with_context.title", "templates.qa.with_context.lmms_eval" ], - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\n{question}\nAnswer the question using a single word.", - "references_field": "answers", - "__description__": "lmms-evals default template for chartqa." - }, "__tags__": { "license": "GPL-3.0", "multilinguality": "monolingual", diff --git a/src/unitxt/catalog/cards/chart_qa_lmms_eval.json b/src/unitxt/catalog/cards/chart_qa_lmms_eval.json index bf8100de56..397ab9dd13 100644 --- a/src/unitxt/catalog/cards/chart_qa_lmms_eval.json +++ b/src/unitxt/catalog/cards/chart_qa_lmms_eval.json @@ -41,12 +41,6 @@ "templates.qa.with_context.title", "templates.qa.with_context.lmms_eval" ], - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\n{question}\nAnswer the question using a single word.", - "references_field": "answers", - "__description__": "lmms-evals default template for chartqa." - }, "__tags__": { "license": "GPL-3.0", "multilinguality": "monolingual", diff --git a/src/unitxt/catalog/cards/doc_vqa/en.json b/src/unitxt/catalog/cards/doc_vqa/en.json index b900151cca..f33a50ba44 100644 --- a/src/unitxt/catalog/cards/doc_vqa/en.json +++ b/src/unitxt/catalog/cards/doc_vqa/en.json @@ -59,12 +59,6 @@ "templates.qa.with_context.title", "templates.qa.with_context.lmms_eval" ], - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.", - "references_field": "answers", - "__description__": "lmms-evals default template for docvqa." - }, "__tags__": { "license": "apache-2.0", "multilinguality": "monolingual", diff --git a/src/unitxt/catalog/cards/doc_vqa/fr.json b/src/unitxt/catalog/cards/doc_vqa/fr.json index eb20e6c415..aa619f16d1 100644 --- a/src/unitxt/catalog/cards/doc_vqa/fr.json +++ b/src/unitxt/catalog/cards/doc_vqa/fr.json @@ -59,12 +59,6 @@ "templates.qa.with_context.title", "templates.qa.with_context.lmms_eval" ], - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.", - "references_field": "answers", - "__description__": "lmms-evals default template for docvqa." - }, "__tags__": { "license": "apache-2.0", "multilinguality": "monolingual", diff --git a/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json b/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json index 3e48341076..e63c0d70ca 100644 --- a/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json +++ b/src/unitxt/catalog/cards/doc_vqa/lmms_eval.json @@ -45,12 +45,6 @@ "templates.qa.with_context.title", "templates.qa.with_context.lmms_eval" ], - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.", - "references_field": "answers", - "__description__": "lmms-evals default template for docvqa." - }, "__tags__": { "license": "apache-2.0", "multilinguality": "monolingual", diff --git a/src/unitxt/catalog/cards/info_vqa.json b/src/unitxt/catalog/cards/info_vqa.json index 29c905cae7..70dbb9a507 100644 --- a/src/unitxt/catalog/cards/info_vqa.json +++ b/src/unitxt/catalog/cards/info_vqa.json @@ -54,12 +54,6 @@ "templates.qa.with_context.title", "templates.qa.with_context.lmms_eval" ], - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\n{question}\nAnswer the question using a single word.", - "references_field": "answers", - "__description__": "lmms-evals default template for chartqa." - }, "__tags__": { "license": "Unknown", "multilinguality": "monolingual", diff --git a/src/unitxt/catalog/cards/info_vqa_lmms_eval.json b/src/unitxt/catalog/cards/info_vqa_lmms_eval.json index 1de29fb845..04b3c47f98 100644 --- a/src/unitxt/catalog/cards/info_vqa_lmms_eval.json +++ b/src/unitxt/catalog/cards/info_vqa_lmms_eval.json @@ -28,13 +28,23 @@ } ], "task": "tasks.qa.with_context.abstractive[metrics=[metrics.anls]]", - "templates": "templates.qa.with_context.all", - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.", - "references_field": "answers", - "__description__": "lmms-evals default template for infovqa." - }, + "templates": [ + { + "__type__": "multi_reference_template", + "input_format": "{context}\n{question}\nAnswer the question using a single word.", + "references_field": "answers", + "__description__": "lmms-evals default template for chartqa." + }, + "templates.qa.with_context", + "templates.qa.extractive", + "templates.qa.with_context.simple", + "templates.qa.with_context.simple2", + "templates.qa.with_context.with_type", + "templates.qa.with_context.question_first", + "templates.qa.with_context.ffqa", + "templates.qa.with_context.title", + "templates.qa.with_context.lmms_eval" + ], "__tags__": { "license": "apache-2.0", "multilinguality": "monolingual", diff --git a/src/unitxt/catalog/cards/seed_bench.json b/src/unitxt/catalog/cards/seed_bench.json index a2df0a93e3..a09f9ad08e 100644 --- a/src/unitxt/catalog/cards/seed_bench.json +++ b/src/unitxt/catalog/cards/seed_bench.json @@ -44,15 +44,19 @@ } ], "task": "tasks.qa.multiple_choice.with_context", - "templates": "templates.qa.multiple_choice.with_context.no_intro.all", - "default_template": { - "__type__": "multiple_choice_template", - "input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.", - "choices_separator": "\n", - "target_field": "answer", - "enumerator": "capitals", - "__description__": "lmms-evals default template for seed bench." - }, + "templates": [ + { + "__type__": "multiple_choice_template", + "input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.", + "choices_separator": "\n", + "target_field": "answer", + "enumerator": "capitals", + "__description__": "lmms-evals default template for seed bench." + }, + "templates.qa.multiple_choice.with_context.no_intro.helm", + "templates.qa.multiple_choice.with_context.no_intro.mmlu", + "templates.qa.multiple_choice.with_context.no_intro.lm_eval_harness" + ], "__tags__": {}, "__description__": "SEED-Bench-1 consists of 19K multiple-choice questions with accurate human annotations, covering 12 evaluation dimensions including both the spatial and temporal understanding." } diff --git a/src/unitxt/catalog/cards/websrc.json b/src/unitxt/catalog/cards/websrc.json index 5ef982f32c..02bc059c98 100644 --- a/src/unitxt/catalog/cards/websrc.json +++ b/src/unitxt/catalog/cards/websrc.json @@ -54,12 +54,6 @@ "templates.qa.with_context.title", "templates.qa.with_context.lmms_eval" ], - "default_template": { - "__type__": "multi_reference_template", - "input_format": "{context}\nAnswer the question using a single word or phrase.\n{question}", - "references_field": "answers", - "__description__": "lmms-evals default template for websrc." - }, "__tags__": { "license": "Unknown", "multilinguality": "monolingual", diff --git a/src/unitxt/collections.py b/src/unitxt/collections.py index bf95436a8c..03824aa94b 100644 --- a/src/unitxt/collections.py +++ b/src/unitxt/collections.py @@ -22,6 +22,10 @@ def __getitem__(self, key: Hashable) -> Any: def keys(self) -> List[Hashable]: pass + @abstractmethod + def __len__(self): + pass + class ListCollection(Collection): items: List[Artifact] = field(default_factory=list) @@ -48,6 +52,11 @@ class DictCollection(Collection): def keys(self) -> List[Hashable]: return list(self.items.keys()) + def len(self): + return len(self.items) + + def __len__(self): + return len(self.items) class ItemPicker(Artifact): item: object = None diff --git a/src/unitxt/standard.py b/src/unitxt/standard.py index 4d6c179e4a..c4b74d3c6a 100644 --- a/src/unitxt/standard.py +++ b/src/unitxt/standard.py @@ -27,7 +27,12 @@ from .stream import MultiStream from .system_prompts import EmptySystemPrompt, SystemPrompt from .task import Task -from .templates import ApplyRandomTemplate, ApplySingleTemplate, Template, TemplatesList +from .templates import ( + ApplyRandomTemplate, + ApplySingleTemplate, + Template, + TemplatesList, +) from .type_utils import isoftype from .utils import LRUCache, recursive_copy @@ -658,35 +663,31 @@ def reset_pipeline(self): self.finalize.steps.append(FinalizeDataset(group_by=self.group_by)) + @property + def has_card_templates(self): + return self.card is not None and self.card.templates is not None and len(self.card.templates) > 0 + + @property + def has_no_templates(self): + return self.template_card_index is None and self.template is None + def prepare(self): assert ( self.template_card_index is None or self.template is None ), f"Specify either template ({self.template}) or template_card_index ({self.template_card_index}) but not both" - if self.template_card_index is None and self.template is None: - # First try to use the defined defaults - if self.card.default_template is not None: - self.template = self.card.default_template + if self.has_no_templates: + if self.has_card_templates: + if isinstance(self.card.templates, list): + self.template_card_index = 0 + else: + self.template_card_index = next(iter(self.card.templates.keys())) + logger.warning( + "Template was not specified in recipe, using the first template from the card by default." + ) else: self.template = self.card.task.default_template - # Than try to infer the default - if self.template is None: - if ( - self.card is not None - and self.card.templates is not None - and len(self.card.templates) > 0 - ): - self.template_card_index = ( - 0 - if isinstance(self.card.templates, list) - else next(iter(self.card.templates.keys())) - ) - logger.warning( - "Template was not specified in recipe, using the first template from the card by default." - ) - else: - self.template = self.card.task.default_template if self.template is None and self.template_card_index is not None: try: @@ -704,6 +705,7 @@ def prepare(self): raise ValueError( "No template was specified in the the 'template' or 'template_card_index' recipe arguments, and no default templates are defined the card or task" ) + if self.use_demos: assert ( self.demos_pool is not None @@ -726,6 +728,7 @@ def prepare(self): if isinstance(self.template, TemplatesList): self.template = self.template.items + self.reset_pipeline()