Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prioritize using default templates from card over task #1596

Merged
merged 9 commits into from
Feb 13, 2025
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ kaggle.json
src/unitxt/catalog_back/*
src/unitxt/catalog/metrics/example/accuracy.json
src/unitxt/catalog/processors/example/to_string.json
src/unitxt/catalog/temp_recipe_name.json
prod_env/*
benchmark_output/*
.litellm_cache
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/ai2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
],
task="tasks.qa.multiple_choice.with_context[metrics=[metrics.exact_match_mm]]",
templates=[template, *templates.items],
default_template=template,
__tags__={},
__description__=(
"AI2 Diagrams (AI2D) is a dataset of over 5000 grade school science diagrams with over 150000 rich annotations, their ground truth syntactic parses, and more than 15000 corresponding multiple choice questions."
Expand Down
2 changes: 0 additions & 2 deletions prepare/cards/chart_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
],
task="tasks.qa.with_context",
templates=[template, *templates.items],
default_template=template,
__tags__={
"license": "GPL-3.0",
"multilinguality": "monolingual",
Expand All @@ -53,7 +52,6 @@
],
task="tasks.qa.with_context.with_type[metrics=[metrics.relaxed_correctness]]",
templates=[template, *templates.items],
default_template=template,
__tags__={
"license": "GPL-3.0",
"multilinguality": "monolingual",
Expand Down
2 changes: 0 additions & 2 deletions prepare/cards/doc_vqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
],
task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
templates=[template, *templates.items],
default_template=template,
__tags__={
"license": "apache-2.0",
"multilinguality": "monolingual",
Expand Down Expand Up @@ -57,7 +56,6 @@
],
task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
templates=[template, *templates.items],
default_template=template,
__tags__={
"license": "apache-2.0",
"multilinguality": "monolingual",
Expand Down
8 changes: 1 addition & 7 deletions prepare/cards/info_vqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
],
task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
templates=[template, *templates.items],
default_template=template,
__tags__={
"license": "Unknown",
"multilinguality": "monolingual",
Expand Down Expand Up @@ -59,12 +58,7 @@
Set(fields={"context_type": "image"}),
],
task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
templates="templates.qa.with_context.all",
default_template=MultiReferenceTemplate(
input_format="{context}\n{question}\nAnswer the question using a single word or phrase.",
references_field="answers",
__description__="lmms-evals default template for infovqa.",
),
templates=[template, *templates.items],
__tags__={
"license": "apache-2.0",
"multilinguality": "monolingual",
Expand Down
20 changes: 11 additions & 9 deletions prepare/cards/seed_bench.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
from unitxt.blocks import LoadHF, Set, TaskCard
from unitxt.catalog import add_to_catalog
from unitxt.catalog import add_to_catalog, get_from_catalog
from unitxt.image_operators import ToImage, ToRGB
from unitxt.operators import ListFieldValues, MapValues
from unitxt.templates import MultipleChoiceTemplate
from unitxt.test_utils.card import test_card

templates = get_from_catalog("templates.qa.multiple_choice.with_context.no_intro.all")
template = MultipleChoiceTemplate(
input_format="{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
choices_separator="\n",
target_field="answer",
enumerator="capitals",
__description__="lmms-evals default template for seed bench.",
)

card = TaskCard(
loader=LoadHF(path="lmms-lab/SEED-Bench"),
preprocess_steps=[
Expand All @@ -17,14 +26,7 @@
MapValues(mapping={"A": 0, "B": 1, "C": 2, "D": 3}, field="answer"),
],
task="tasks.qa.multiple_choice.with_context",
templates="templates.qa.multiple_choice.with_context.no_intro.all",
default_template=MultipleChoiceTemplate(
input_format="{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
choices_separator="\n",
target_field="answer",
enumerator="capitals",
__description__="lmms-evals default template for seed bench.",
),
templates=[template, *templates.items],
__tags__={},
__description__=(
"SEED-Bench-1 consists of 19K multiple-choice questions with accurate human annotations, covering 12 evaluation dimensions including both the spatial and temporal understanding."
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/websrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
],
task="tasks.qa.with_context.with_domain[metrics=[metrics.websrc_squad_f1]]",
templates=[template, *templates.items],
default_template=template,
__tags__={
"license": "Unknown",
"multilinguality": "monolingual",
Expand Down
3 changes: 0 additions & 3 deletions src/unitxt/card.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ class TaskCard(Artifact):
specifies the fields (of the already (pre)processed instance) making the inputs, the fields making the outputs, and the metrics to be used for evaluating the model output.
templates:
format strings to be applied on the input fields (specified by the task) and the output fields. The template also carries the instructions and the list of postprocessing steps, to be applied to the model output.
default_template:
a default template for tasks with very specific task dataset specific template
"""

loader: Loader
Expand All @@ -31,5 +29,4 @@ class TaskCard(Artifact):
templates: Union[
TemplatesDict, TemplatesList, Dict[str, Template], List[Template]
] = None
default_template: Template = None
sampler: Sampler = OptionalField(default_factory=RandomSampler)
7 changes: 0 additions & 7 deletions src/unitxt/catalog/cards/ai2d.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,6 @@
"templates.qa.multiple_choice.with_context.no_intro.mmlu",
"templates.qa.multiple_choice.with_context.no_intro.lm_eval_harness"
],
"default_template": {
"__type__": "multiple_choice_template",
"input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
"choices_separator": "\n",
"target_field": "answer",
"enumerator": "capitals"
},
"__tags__": {},
"__description__": "AI2 Diagrams (AI2D) is a dataset of over 5000 grade school science diagrams with over 150000 rich annotations, their ground truth syntactic parses, and more than 15000 corresponding multiple choice questions."
}
6 changes: 0 additions & 6 deletions src/unitxt/catalog/cards/chart_qa.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,6 @@
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word.",
"references_field": "answers",
"__description__": "lmms-evals default template for chartqa."
},
"__tags__": {
"license": "GPL-3.0",
"multilinguality": "monolingual",
Expand Down
6 changes: 0 additions & 6 deletions src/unitxt/catalog/cards/chart_qa_lmms_eval.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word.",
"references_field": "answers",
"__description__": "lmms-evals default template for chartqa."
},
"__tags__": {
"license": "GPL-3.0",
"multilinguality": "monolingual",
Expand Down
6 changes: 0 additions & 6 deletions src/unitxt/catalog/cards/doc_vqa/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
"references_field": "answers",
"__description__": "lmms-evals default template for docvqa."
},
"__tags__": {
"license": "apache-2.0",
"multilinguality": "monolingual",
Expand Down
6 changes: 0 additions & 6 deletions src/unitxt/catalog/cards/doc_vqa/fr.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
"references_field": "answers",
"__description__": "lmms-evals default template for docvqa."
},
"__tags__": {
"license": "apache-2.0",
"multilinguality": "monolingual",
Expand Down
6 changes: 0 additions & 6 deletions src/unitxt/catalog/cards/doc_vqa/lmms_eval.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
"references_field": "answers",
"__description__": "lmms-evals default template for docvqa."
},
"__tags__": {
"license": "apache-2.0",
"multilinguality": "monolingual",
Expand Down
6 changes: 0 additions & 6 deletions src/unitxt/catalog/cards/info_vqa.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word.",
"references_field": "answers",
"__description__": "lmms-evals default template for chartqa."
},
"__tags__": {
"license": "Unknown",
"multilinguality": "monolingual",
Expand Down
24 changes: 17 additions & 7 deletions src/unitxt/catalog/cards/info_vqa_lmms_eval.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,23 @@
}
],
"task": "tasks.qa.with_context.abstractive[metrics=[metrics.anls]]",
"templates": "templates.qa.with_context.all",
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word or phrase.",
"references_field": "answers",
"__description__": "lmms-evals default template for infovqa."
},
"templates": [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add this as a "template.qa.single_word_answer?"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The whole point of it being that it is curated for this card specifically

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's pretty generic instruction - but not critical.

{
"__type__": "multi_reference_template",
"input_format": "{context}\n{question}\nAnswer the question using a single word.",
"references_field": "answers",
"__description__": "lmms-evals default template for chartqa."
},
"templates.qa.with_context",
"templates.qa.extractive",
"templates.qa.with_context.simple",
"templates.qa.with_context.simple2",
"templates.qa.with_context.with_type",
"templates.qa.with_context.question_first",
"templates.qa.with_context.ffqa",
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"__tags__": {
"license": "apache-2.0",
"multilinguality": "monolingual",
Expand Down
22 changes: 13 additions & 9 deletions src/unitxt/catalog/cards/seed_bench.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,19 @@
}
],
"task": "tasks.qa.multiple_choice.with_context",
"templates": "templates.qa.multiple_choice.with_context.no_intro.all",
"default_template": {
"__type__": "multiple_choice_template",
"input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
"choices_separator": "\n",
"target_field": "answer",
"enumerator": "capitals",
"__description__": "lmms-evals default template for seed bench."
},
"templates": [
{
"__type__": "multiple_choice_template",
"input_format": "{context}\n{question}\n{choices}\nAnswer with the option's letter from the given choices directly.",
"choices_separator": "\n",
"target_field": "answer",
"enumerator": "capitals",
"__description__": "lmms-evals default template for seed bench."
},
"templates.qa.multiple_choice.with_context.no_intro.helm",
"templates.qa.multiple_choice.with_context.no_intro.mmlu",
"templates.qa.multiple_choice.with_context.no_intro.lm_eval_harness"
],
"__tags__": {},
"__description__": "SEED-Bench-1 consists of 19K multiple-choice questions with accurate human annotations, covering 12 evaluation dimensions including both the spatial and temporal understanding."
}
6 changes: 0 additions & 6 deletions src/unitxt/catalog/cards/websrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@
"templates.qa.with_context.title",
"templates.qa.with_context.lmms_eval"
],
"default_template": {
"__type__": "multi_reference_template",
"input_format": "{context}\nAnswer the question using a single word or phrase.\n{question}",
"references_field": "answers",
"__description__": "lmms-evals default template for websrc."
},
"__tags__": {
"license": "Unknown",
"multilinguality": "monolingual",
Expand Down
9 changes: 9 additions & 0 deletions src/unitxt/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ def __getitem__(self, key: Hashable) -> Any:
def keys(self) -> List[Hashable]:
pass

@abstractmethod
def __len__(self):
pass


class ListCollection(Collection):
items: List[Artifact] = field(default_factory=list)
Expand All @@ -48,6 +52,11 @@ class DictCollection(Collection):
def keys(self) -> List[Hashable]:
return list(self.items.keys())

def len(self):
return len(self.items)

def __len__(self):
return len(self.items)

class ItemPicker(Artifact):
item: object = None
Expand Down
Loading
Loading