feat(python script): calculate overhead

HarukiMoriarty · Jan 7, 2025 · 999dda7 · 999dda7
1 parent 52ec840
commit 999dda7
Show file tree

Hide file tree

Showing 6 changed files with 222 additions and 115 deletions.
diff --git a/calmapf/include/log.hpp b/calmapf/include/log.hpp
@@ -39,5 +39,5 @@ struct Log {
     void make_step_log(const Instance& ins, const std::string& output_name, const double comp_time_ms, const std::string& map_name, const int seed, const bool log_short = false);
     void make_life_long_log(const Instance& ins, std::string visual_name);
     void make_throughput_log(uint index, uint* start_cnt, uint make_span);
-    void make_csv_log(double cache_hit_rate, uint make_span, std::vector<uint>* step_percentiles, uint ngoals, bool failure);
+    void make_csv_log(double cache_hit_rate, uint make_span, std::vector<uint>* step_percentiles, uint ngoals, int64_t elapsed_time, bool failure);
 };
diff --git a/calmapf/src/log.cpp b/calmapf/src/log.cpp
@@ -304,10 +304,10 @@ void Log::make_throughput_log(uint index, uint* start_cnt, uint make_span)
   }
 }
 
-void Log::make_csv_log(double cache_hit_rate, uint make_span, std::vector<uint>* step_percentiles, uint ngoals, bool failure)
+void Log::make_csv_log(double cache_hit_rate, uint make_span, std::vector<uint>* step_percentiles, uint ngoals, int64_t elapsed_time, bool failure)
 {
   if (!failure) {
-    csv_output_handler << cache_hit_rate << "," << make_span << "," << (double)ngoals / (double)make_span << "," << (*step_percentiles)[0] << "," << (*step_percentiles)[2] << "," << (*step_percentiles)[6] << std::endl;
+    csv_output_handler << cache_hit_rate << "," << make_span << "," << (double)ngoals / (double)make_span << "," << (*step_percentiles)[0] << "," << (*step_percentiles)[2] << "," << (*step_percentiles)[6] << "," << elapsed_time << std::endl;
   }
   else {
     csv_output_handler << "fail to solve" << std::endl;

diff --git a/calmapf/src/planner.cpp b/calmapf/src/planner.cpp
@@ -64,7 +64,8 @@ Planner::Planner(const Instance* _ins, const Deadline* _deadline, std::mt19937*
   A(Agents(N, nullptr)),
   occupied_now(Agents(V_size, nullptr)),
   occupied_next(Agents(V_size, nullptr))
-{}
+{
+}
 
 Solution Planner::solve()
 {
@@ -146,7 +147,8 @@ Solution Planner::solve()
   //   solution.empty() ? (OPEN.empty() ? "no solution" : "failed")
   //   : "solution found",
   //   "\tloop_itr:", loop_cnt, "\texplored:", CLOSED.size());
-  // // memory management
+
+  // memory management
   for (auto a : A) delete a;
   for (auto M : GC) delete M;
   for (auto p : CLOSED) delete p.second;

diff --git a/main.cpp b/main.cpp
@@ -19,6 +19,7 @@ int main(int argc, char* argv[])
   Log log(&parser);
   // Timer
   auto timer = std::chrono::steady_clock::now();
+  auto start = std::chrono::steady_clock::now();
 
   // solving
   uint nagents_with_new_goals = 0;
@@ -62,7 +63,7 @@ int main(int argc, char* argv[])
 
     // Failure
     if (solution.empty()) {
-      log.make_csv_log(.0, 0, nullptr, parser.num_goals, true);
+      log.make_csv_log(.0, 0, nullptr, parser.num_goals, elapsed_time, true);
       console->error("failed to solve");
       return 1;
     }
@@ -104,8 +105,10 @@ int main(int argc, char* argv[])
     console->info("Total Goals Reached: {:5}   |   Makespan: {:5}   |   P0 Steps: {:5}    |   P50 Steps: {:5}   |   P99 Steps: {:5}", parser.num_goals, makespan, step_percentiles[0], step_percentiles[2], step_percentiles[6]);
   }
 
+  auto end_time = std::chrono::steady_clock::now();
+  auto running_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start).count();
   log.make_life_long_log(ins, parser.output_visual_file);
-  log.make_csv_log(total_cache_rate, makespan, &step_percentiles, parser.num_goals, false);
+  log.make_csv_log(total_cache_rate, makespan, &step_percentiles, parser.num_goals, running_time, false);
 
   return 0;
 }
diff --git a/tools/experiment.py b/tools/experiment.py
@@ -1,114 +1,104 @@
-import argparse
-import itertools
-import logging
-import subprocess
 import pandas as pd
-import yaml
-
-from pathlib import Path
-from typing import TypedDict, List
-
-BASE_PATH = Path(__file__).absolute().parent
-
-LOG = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO, format="%(message)s")
-
-class ExperimentParameters(TypedDict):
-    map: List[str]
-    cache: List[str]
-    look_ahead: List[str]
-    delay_deadline: List[str]
-    ngoals: List[int]
-    gg: List[str]
-    goals_k: List[int]
-    goals_m: List[int]
-    nagents: List[int]
-    seed: int  # Assuming a single value for simplicity, modify as needed.
-    time_limit_sec: int  # Assuming a single value for simplicity, modify as needed.
-    output_step_result: str
-    output_csv_result: str
-    output_throughput_result: str
-    log_short: bool
-    debug: bool
-
-def load_experiment(exp_name: str):
-    exp_path = BASE_PATH / "experiment" / f"{exp_name}.yaml"
-    if not exp_path.exists():
-        LOG.error(f"Experiment file {exp_path} not found.")
-        return None
-
-    with open(exp_path) as f:
-        return yaml.safe_load(f)
-
-def generate_combinations(params: ExperimentParameters):
-    keys = params.keys()
-    values = (params[key] if isinstance(params[key], list) else [params[key]] for key in keys)
-    for combination in itertools.product(*values):
-        yield dict(zip(keys, combination))
-
-def check_and_create_csv(output_csv_path: str):
-    # Convert string path to Path object for easier handling
-    csv_path = Path(output_csv_path)
-    if not csv_path.exists():
-        # Ensure the directory exists
-        csv_path.parent.mkdir(parents=True, exist_ok=True)
-        # Create the file and write the header
-        with open(csv_path, 'w') as csv_file:
-            csv_file.write("map_name,cache,look_ahead,delay_deadline,goal_generation_type,ngoals,nagents,seed,verbose,time_limit_sec,goals_m,goals_k,cache_hit_rate,makespan,throughput,p0_steps,p50_steps,p99steps\n")
-
-def check_and_create_throughput(output_throughput_path: str):
-    # Convert string path to Path object for easier handling
-    throughput_path = Path(output_throughput_path)
-    if not throughput_path.exists():
-        # Ensure the directory exists
-        throughput_path.parent.mkdir(parents=True, exist_ok=True)
-        # Create the file and write the header
-        with open(throughput_path, 'w') as csv_file:
-            csv_file.write("map_name,cache,look_ahead,delay_deadline,goal_generation_type,ngoals,nagents,seed,verbose,time_limit_sec,goals_m,goals_k\n")
-
-def run_experiment(params: ExperimentParameters):
-    check_and_create_csv(params.get("output_csv_result", "./result/result.csv"))
-    check_and_create_throughput(params.get("output_throughput_result", "./result/throughput.csv"))
-
-    cmd_base = [
-        "./build/CAL-MAPF",
-        "-mf", params["map"],
-        "-ct", params["cache"],
-        "-lan", str(params["look_ahead"]),
-        "-ddl", str(params["delay_deadline"]),
-        "-ng", str(params["ngoals"]),
-        "-ggs", params["gg"],
-        "-gmk", str(params["goals_k"]),
-        "-gmm", str(params["goals_m"]),
-        "-na", str(params["nagents"]),
-        "-rs", str(params.get("seed", 0)),
-        "-tls", str(params.get("time_limit_sec", 10)),
-        "-osrf", params.get("output_step_result", "./result/step_result.txt"),
-        "-ocf", params.get("output_csv_result", "./result/result.csv"),
-        "-otf", params.get("output_throughput_result", "./result/throughput.csv")
+import numpy as np
+
+def analyze_cache_overhead(csv_path):
+    """
+    Analyze cache overhead from experimental data.
+    
+    Args:
+        csv_path (str): Path to the CSV file containing experiment results
+    
+    Returns:
+        dict: Dictionary containing P0, P50, P99 overhead percentages
+    """
+    # Read the CSV file
+    df = pd.read_csv(csv_path)
+
+    # Define the columns that constitute an experiment setting
+    setting_columns = [
+        'map_name', 'goal_generation_type', 'ngoals', 'nagents'
     ]
-    if params.get("log_short", False):
-        cmd_base.append("--log_short")
-    if params.get("debug", False):
-        cmd_base.append("--debug")
-
-    LOG.info(f"Executing: {' '.join(cmd_base)}")
-    subprocess.run(cmd_base, check=True)
+
+    # Function to calculate overhead percentage
+    def calculate_overhead(group):
+        # Get baseline (no cache) elapsed time
+        baseline = group[group['cache'] == 'NONE']
+        if baseline.empty:
+            return None
+        baseline_time = baseline['elapsed_time'].iloc[0]
+
+        # Get cached times (excluding NONE)
+        cached_runs = group[group['cache'] != 'NONE']
+        if cached_runs.empty:
+            return None
+
+        # Calculate average cached time across all cache types
+        avg_cached_time = cached_runs['elapsed_time'].mean()
+
+        # Calculate overhead percentage
+        overhead_pct = ((avg_cached_time - baseline_time) / baseline_time) * 100
+        return overhead_pct
+
+    # Group by experiment settings
+    grouped = df.groupby(setting_columns + ['nagents'])
+    overheads = []
+
+    for _, group in grouped:
+        print(group)
+        overhead = calculate_overhead(group)
+        if overhead is not None:
+            overheads.append(overhead)
+
+    if not overheads:
+        return None
+
+    # Calculate percentiles
+    results = {
+        'P0': np.percentile(overheads, 0),
+        'P50': np.percentile(overheads, 50),
+        'P99': np.percentile(overheads, 99)
+    }
+
+    # Also calculate per number of agents
+    nagents_groups = df.groupby('nagents')
+    per_agent_results = {}
+
+    for nagents, group in nagents_groups:
+        agent_overheads = []
+        for _, setting_group in group.groupby(setting_columns):
+            overhead = calculate_overhead(setting_group)
+            if overhead is not None:
+                agent_overheads.append(overhead)
+
+        if agent_overheads:
+            per_agent_results[nagents] = {
+                'P0': np.percentile(agent_overheads, 0),
+                'P50': np.percentile(agent_overheads, 50),
+                'P99': np.percentile(agent_overheads, 99)
+            }
+
+    return results, per_agent_results
 
 def main():
-    parser = argparse.ArgumentParser(description="Run lacam experiments with different parameters.")
-    parser.add_argument("experiment", help="Experiment name to run.")
-    args = parser.parse_args()
-
-    exp_params = load_experiment(args.experiment)
-    if exp_params is None:
-        return
-
-    for combination in generate_combinations(exp_params):
-        try:
-            run_experiment(combination)
-        except subprocess.CalledProcessError as e:
-            LOG.error(f"Experiment failed with error: {e}")
+    csv_path = './result/result_Real.csv'  # Update with your CSV file path
+    results = analyze_cache_overhead(csv_path)
+
+    if results:
+        overall_results, per_agent_results = results
+
+        print("\nOverall Cache Overhead Analysis:")
+        print(f"P0  (minimum) overhead: {overall_results['P0']:.2f}%")
+        print(f"P50 (median)  overhead: {overall_results['P50']:.2f}%")
+        print(f"P99           overhead: {overall_results['P99']:.2f}%")
+
+        print("\nOverhead by Number of Agents:")
+        for nagents, stats in sorted(per_agent_results.items()):
+            print(f"\nAgents: {nagents}")
+            print(f"  P0  (minimum): {stats['P0']:.2f}%")
+            print(f"  P50 (median):  {stats['P50']:.2f}%")
+            print(f"  P99          : {stats['P99']:.2f}%")
+    else:
+        print("Could not calculate overhead. Make sure the data contains both cached and non-cached (NONE) experiments.")
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/tools/overhead.py b/tools/overhead.py
@@ -0,0 +1,112 @@
+import pandas as pd
+import numpy as np
+import argparse
+import os
+
+def analyze_cache_overhead(input_csv, output_csv):
+    """
+    Analyze cache overhead from experimental data and output detailed results to CSV.
+    
+    Args:
+        input_csv (str): Path to the input CSV file containing experiment results
+        output_csv (str): Path to save the output CSV with overhead analysis
+    """
+    # Read the CSV file
+    df = pd.read_csv(input_csv)
+
+    # Define the columns that constitute an experiment setting
+    setting_columns = [
+        'map_name', 'goal_generation_type', 'ngoals', 'nagents'
+    ]
+
+    # Prepare results storage
+    results = []
+
+    # Group by experiment settings and calculate overheads
+    grouped = df.groupby(setting_columns)
+
+    for setting_values, group in grouped:
+        # Check if we have both cached and non-cached data
+        if 'NONE' not in group['cache'].values:
+            continue
+
+        # Get baseline (no cache) elapsed time
+        baseline_time = group[group['cache'] == 'NONE']['elapsed_time'].iloc[0]
+
+        # Get cached times (excluding NONE)
+        cached_times = group[group['cache'] != 'NONE']['elapsed_time']
+
+        if cached_times.empty:
+            continue
+
+        # Calculate average cached time
+        avg_cached_time = cached_times.mean()
+
+        # Calculate overhead percentage
+        overhead_pct = ((avg_cached_time - baseline_time) / baseline_time) * 100
+
+        # Create result dictionary with all setting values
+        result = dict(zip(setting_columns, setting_values))
+        result.update({
+            'baseline_time': baseline_time,
+            'avg_cached_time': avg_cached_time,
+            'overhead_percentage': overhead_pct
+        })
+
+        results.append(result)
+
+    if not results:
+        print("Could not calculate overhead. Make sure the data contains both cached and non-cached (NONE) experiments.")
+        return None
+
+    # Convert results to DataFrame and save to CSV
+    results_df = pd.DataFrame(results)
+
+    # Calculate overall percentiles
+    p0 = np.percentile(results_df['overhead_percentage'], 0)
+    p50 = np.percentile(results_df['overhead_percentage'], 50)
+    p99 = np.percentile(results_df['overhead_percentage'], 99)
+
+    # Print summary statistics
+    print("\nCache Overhead Analysis Results:")
+    print(f"P0  (minimum) overhead: {p0:.2f}%")
+    print(f"P50 (median)  overhead: {p50:.2f}%")
+    print(f"P99           overhead: {p99:.2f}%")
+
+    # Sort results by map_name and nagents for better readability
+    results_df = results_df.sort_values(['map_name', 'nagents'])
+
+    # Save to CSV
+    results_df.to_csv(output_csv, index=False, float_format='%.2f')
+    print(f"\nDetailed results saved to: {output_csv}")
+
+    return {
+        'P0': p0,
+        'P50': p50,
+        'P99': p99
+    }
+
+def main():
+    parser = argparse.ArgumentParser(description='Analyze cache overhead from experimental results.')
+    parser.add_argument('input_csv', help='Path to the input CSV file')
+    parser.add_argument('--output', '-o', 
+                      help='Path to save the output CSV (default: overhead_analysis.csv)',
+                      default='overhead_analysis.csv')
+
+    args = parser.parse_args()
+
+    # Check if input file exists
+    if not os.path.exists(args.input_csv):
+        print(f"Error: Input file '{args.input_csv}' does not exist.")
+        return
+
+    # Create output directory if it doesn't exist
+    output_dir = os.path.dirname(args.output)
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    # Run analysis
+    analyze_cache_overhead(args.input_csv, args.output)
+
+if __name__ == "__main__":
+    main()