Skip to content

Commit

Permalink
Fix bird dataset (#1593)
Browse files Browse the repository at this point in the history
add streaming, add tests
  • Loading branch information
perlitz authored Feb 10, 2025
1 parent 98243fb commit 8157230
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 6 deletions.
2 changes: 1 addition & 1 deletion examples/evaluate_text2sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
evaluated_dataset[0]["score"]["global"]["score"] >= 0.44
), "results have been degraded, something is wrong with the metric"

# with llama-3-70b-instruct
# with llama-3-3-70b-instruct
# num_of_instances (int):
# 100
# execution_accuracy (float):
Expand Down
15 changes: 11 additions & 4 deletions prepare/cards/text2sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from unitxt.blocks import Copy, Rename, Set, TaskCard
from unitxt.loaders import LoadHF
from unitxt.operators import ExecuteExpression, Shuffle
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="premai-io/birdbench", split="validation"),
loader=LoadHF(path="premai-io/birdbench", split="validation", streaming=True),
preprocess_steps=[
Shuffle(page_size=sys.maxsize),
Rename(
Expand Down Expand Up @@ -45,9 +46,15 @@
)


# test_card(
# card, num_demos=0, demos_pool_size=0, demos_taken_from="test"
# )
test_card(
card,
num_demos=0,
demos_pool_size=0,
demos_taken_from="validation",
loader_limit=10,
test_exact_match_score_when_predictions_equal_references=False,
test_full_mismatch_score_with_full_mismatch_prediction_values=False,
)

add_to_catalog(
card,
Expand Down
3 changes: 2 additions & 1 deletion src/unitxt/catalog/cards/text2sql/bird.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"loader": {
"__type__": "load_hf",
"path": "premai-io/birdbench",
"split": "validation"
"split": "validation",
"streaming": true
},
"preprocess_steps": [
{
Expand Down

0 comments on commit 8157230

Please sign in to comment.