Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move gradually to lazy loader #1585

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions performance/bluebench_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@
from typing import Any, Dict, List, Union

from unitxt.api import _source_to_dataset, evaluate, load_recipe
from unitxt.artifact import fetch_artifact
from unitxt.benchmark import Benchmark
from unitxt.card import TaskCard
from unitxt.inference import (
CrossProviderInferenceEngine,
InferenceEngine,
TextGenerationInferenceOutput,
)
from unitxt.logging_utils import get_logger
from unitxt.settings_utils import get_settings
from unitxt.standard import DatasetRecipe
from unitxt.templates import TemplatesDict, TemplatesList

logger = get_logger()
settings = get_settings()
Expand Down Expand Up @@ -61,8 +65,29 @@ class BlueBenchProfiler:

def profiler_instantiate_benchmark_recipe(
self, dataset_query: str, **kwargs
) -> Benchmark:
return load_recipe(dataset_query, **kwargs)
) -> Union[Benchmark, DatasetRecipe]:
benchmark_or_card , _ = fetch_artifact(dataset_query)
if isinstance(benchmark_or_card, (Benchmark, DatasetRecipe)):
return load_recipe(dataset_query, **kwargs)
assert isinstance(benchmark_or_card, TaskCard)
# benchmark_or_card is a taskcard. determine a template for it:
if isinstance(benchmark_or_card.templates, list):
template = benchmark_or_card.templates[0]
elif isinstance(benchmark_or_card.templates, TemplatesList):
template = benchmark_or_card.templates.items[0]
elif isinstance(benchmark_or_card.templates, dict):
for templ in benchmark_or_card.templates.values():
template = templ
break
elif isinstance(benchmark_or_card.templates, TemplatesDict):
for templ in benchmark_or_card.templates.items.values():
template = templ
break
else:
raise ValueError(
f"Unidentified type of templates {benchmark_or_card.templates} in card {dataset_query}"
)
return DatasetRecipe(card=benchmark_or_card, template=template)

def profiler_generate_benchmark_dataset(
self, benchmark_recipe: Benchmark, split: str, **kwargs
Expand Down Expand Up @@ -104,6 +129,8 @@ def profiler_do_the_profiling(self, dataset_query: str, split: str, **kwargs):


dataset_query = "benchmarks.bluebench[loader_limit=30,max_samples_per_subset=30]"
# dataset_query = "cards.cola"
# dataset_query = "recipes.bluebench.knowledge.mmlu_pro_math"


def profile_benchmark_blue_bench():
Expand Down Expand Up @@ -154,17 +181,20 @@ def main():
pst.strip_dirs()
pst.sort_stats("name") # sort by function name
pst.print_stats(
"profile_benchmark_blue_bench|profiler_instantiate_benchmark_recipe|profiler_generate_benchmark_dataset|profiler_instantiate_model|profiler_infer_predictions|profiler_evaluate_predictions|load_data|load_iterables"
"profile_benchmark_blue_bench|profiler_instantiate_benchmark_recipe|profiler_generate_benchmark_dataset|profiler_instantiate_model|profiler_infer_predictions|profiler_evaluate_predictions|load_data|load_iterables|split_generator"
)
s = f.getvalue()
assert s.split("\n")[7].split()[3] == "cumtime"
overall_tot_time = find_cummtime_of(
"profile_benchmark_blue_bench", "bluebench_profiler.py", s
)
load_time = find_cummtime_of("load_data", "loaders.py", s)
just_load_no_initial_ms_time = find_cummtime_of(
# load_time = find_cummtime_of("load_data", "loaders.py", s)
load_time = find_cummtime_of(
"load_iterables", "loaders.py", s
)
load_time += find_cummtime_of(
"split_generator", "loaders.py", s
)
instantiate_benchmark_time = find_cummtime_of(
"profiler_instantiate_benchmark_recipe", "bluebench_profiler.py", s
)
Expand All @@ -186,7 +216,6 @@ def main():
"dataset_query": dataset_query,
"total_time": overall_tot_time,
"load_time": load_time,
"load_time_no_initial_ms": just_load_no_initial_ms_time,
"instantiate_benchmark_time": instantiate_benchmark_time,
"generate_benchmark_dataset_time": generate_benchmark_dataset_time,
"instantiate_model_time": instantiate_model_time,
Expand Down
26 changes: 14 additions & 12 deletions performance/compare_benchmark_performance_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,17 @@
print(f"used_eager_mode in PR = {pr_perf['used_eager_mode']}")
print(f"use Mocked inference = {os.environ['UNITXT_MOCK_INFERENCE_MODE']}")

ratio0 = pr_perf["total_time"] / main_perf["total_time"]

ratio1 = (
(pr_perf["generate_benchmark_dataset_time"] - pr_perf["load_time_no_initial_ms"])
(pr_perf["generate_benchmark_dataset_time"] - pr_perf["load_time"])
/ (
main_perf["generate_benchmark_dataset_time"]
- main_perf["load_time_no_initial_ms"]
- main_perf["load_time"]
)
if (
main_perf["generate_benchmark_dataset_time"]
- main_perf["load_time_no_initial_ms"]
- main_perf["load_time"]
)
> 0
else 1
Expand All @@ -47,31 +49,31 @@

line1 = " What is Measured | Main Branch | PR Branch | PR/Main ratio \n"
line2 = "--------------------|-------------|-------------|---------------\n"
line3 = f" Total time | {main_perf['total_time']:>11} | {pr_perf['total_time']:>11} | {pr_perf['total_time'] / main_perf['total_time']:.2f}\n"
line3 = f" Total time | {main_perf['total_time']:>11} | {pr_perf['total_time']:>11} | {ratio0:.2f}\n"
ratio_line4 = (
pr_perf["load_time_no_initial_ms"] / main_perf["load_time_no_initial_ms"]
if main_perf["load_time_no_initial_ms"] > 0
pr_perf["load_time"] / main_perf["load_time"]
if main_perf["load_time"] > 0
else 1
)
line4 = f" Load time | {main_perf['load_time_no_initial_ms']:>11} | {pr_perf['load_time_no_initial_ms']:>11} | {ratio_line4:.2f}\n"
line4 = f" Load time | {main_perf['load_time']:>11} | {pr_perf['load_time']:>11} | {ratio_line4:.2f}\n"
line5 = f" DS Gen. inc. Load | {main_perf['generate_benchmark_dataset_time']:>11} | {pr_perf['generate_benchmark_dataset_time']:>11} | {pr_perf['generate_benchmark_dataset_time'] / main_perf['generate_benchmark_dataset_time']:.2f}\n"
line6 = f" DS Gen. exc. Load | {round(main_perf['generate_benchmark_dataset_time'] - main_perf['load_time_no_initial_ms'], 3):>11} | {round(pr_perf['generate_benchmark_dataset_time'] - pr_perf['load_time_no_initial_ms'], 3):>11} | {ratio1:.2f}\n"
line6 = f" DS Gen. exc. Load | {round(main_perf['generate_benchmark_dataset_time'] - main_perf['load_time'], 3):>11} | {round(pr_perf['generate_benchmark_dataset_time'] - pr_perf['load_time'], 3):>11} | {ratio1:.2f}\n"
line7 = f" Inference time | {main_perf['inference_time']:>11} | {pr_perf['inference_time']:>11} | {pr_perf['inference_time'] / main_perf['inference_time']:.2f}\n"
line8 = f" Evaluate time | {main_perf['evaluation_time']:>11} | {pr_perf['evaluation_time']:>11} | {ratio2:.2f}\n"
line9 = f" Benchmark Instant. | {main_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time'] / main_perf['instantiate_benchmark_time']:.2f}\n"
line9 = f" BM/Recipe Instant. | {main_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time'] / main_perf['instantiate_benchmark_time']:.2f}\n"
line10 = f" Model Instantiation| {main_perf['instantiate_model_time']:>11} | {pr_perf['instantiate_model_time']:>11} | {pr_perf['instantiate_model_time'] / main_perf['instantiate_model_time']:.2f}\n"

print("### Performance Comparison Results, time expressed in seconds:\n")
print(line1 + line2 + line3 + line4 + line5 + line6 + line7 + line8 + line9 + line10)
print("\n\n")
# Performance degradation check (5% threshold)
if ratio1 > 1.05 or ratio2 > 1.05:
if (ratio0 > 1.05) and (ratio1 > 1.05 or ratio2 > 1.05):
print(
"\n**Warning**: Performance degradation in Dataset Generation and/or Evaluation exceeds 5%!"
)
print(
"Explore branch performance via 'python performance/bluebench_profiler.py --output_file=<path to json file>',"
"followed by 'snakeviz <the performance.prof file specified in the output json file>'."
"Explore branch performance via 'python performance/bluebench_profiler.py --output_file=``path to json file``',"
"followed by 'snakeviz ``the performance.prof file specified in the output json file``'."
)
sys.exit(1)

Expand Down
Loading