Add AOT benchmarks. (#407)

* add ffi to translate to llvm ir * better * progress * module to object * ld * compile * add compile * clean * fix ci * fix macos * progress * progress * progress * try * investigate * needed 1 more argument * changes * weird * realloc * clean a bit * cleanup * AOT preview. * Fix ARM trampoline. * Fix stuff. * Fix support for any number of arguments (stack alignment). * Prepare for x86_64 support. * Make it work on x86_64 linux. * Document x86_64 trampoline assembly. * Add call arguments flattening. * Updates. * Fix return values in `aarch64`. * Add AOT versions of NativeExecutor and ProgramCache. * Rename `JITValue` to `JitValue` to comply with Rust's naming convention. * Add enum ABI testing. * Fix docs. * Handle enum arguments. * Refactor enums into the stack. * Minor fix. * Make structures optionally memory-allocated. * Fix tail recursion with memory-allocated arguments. * Fix JIT value deserialization when memory allocated. * Fix typo. * Fix bool libfuncs. * Add `print_i8` debug utillity. * Fix invocation arguments. * Register debug utils on integration tests. * Fix memory-allocated enum payloads. * Fix JitValue deserialization. * Add `print_i128` debug utility. * Fix JIT invoke return pointer offsets. * Fix stuff. * Merge `src/jit_runtime.rs` into `src/executor/jit.rs`. Unify APIs. Various fixes. * Fix stuff. * It seems to be working now. * Support stack-allocated C-style enums. * Remove unused file. * Remove builtins from function signatures. Fix `enum_init` for non-memory-allocated enums. * Fix boolean libfuncs (bools are not memory-allocated). * Implement multi-variant non-memory-allocated enum support. * Support non-memory-allocated enum matches. * Fix gas handling. * Fix `enum_match` libfunc for C-style and single-variant non-memory-allocated enums. * Add support for `sint8`, `sint16` and `sint32`. * Lots of fixes. * Reorganize code for DRY. Support Starknet contracts. * Support `EcPoint`, `EcState` and `NonZero<T>` return types. * Support the syscall handler when calling `invoke_dynamic` directly. * Make contracts accept only felts and convert to `Span<felt252>` internally. Support snapshot arguments. Various fixes. * Fix gas on integration tests. * Minor fix. * Fix non-syscall-handler Starknet types. * Support dictionaries. Minor fixes. * Fix non-syscall-handler Starknet types support. * Minor fix. * Fix linker. * Merge branch 'main' into aot-with-stack-enums * Remove irrelevant autogenerated files. * Remove calling convention discovery testing code. * Revert "Remove irrelevant autogenerated files.". Reason: those files weren't autogenerated by my testing. This reverts commit 53f65ed. * Undo conditional benchmark execution. * Fix formatting and clippy issues. * Remove unsafe transmute that is no longer necessary. * Sort dependencies alphabetically. * Implement `Debug` for the contract caches. * Refactor out the necessary refcell. * Actually remove the refcell. * Fix stuff from previous changes. * Fix stuff. * Fix warnings. * Fix errors. * Fix argument stack align. * Fix warnings. * use env var to add aditional dir to search for runtime lib (#400) * use env var to add aditional dir to search for runtime lib * oops * Ignore failing tests. * Rename env var to something that makes more sense. Set the variable on the CI runners. * Fix macOS and coverage CI runs. * Fix CI again. * Add new bench to bench script. * Remove debug utils commented code. * Add optimization level flag to CLI. * Add shortcuts. * Add benchmarks. * Add direct invoke benchmarks. * Remove merge duplicates. * Fix after merge (tests). * Fix tests (workaround). * Fix fmt and issue (workaround). * Fix issue (workaround). * Add optimization level warning in the CLIs. * Limit the scope of an `unsafe` block. * Disable optimizations for criterion benchmarks. * Fix formatting. * Disable optimizations on compile time benchmarks. * Fix benches. * Disable direct invoke benches on Mac OS. --------- Co-authored-by: Edgar Luque <[email protected]>
lambdaclass · Jan 4, 2024 · 9ec1790 · 9ec1790
1 parent 82884eb
commit 9ec1790
Show file tree

Hide file tree

Showing 23 changed files with 371 additions and 69 deletions.
diff --git a/.github/workflows/bench-hyperfine.yml b/.github/workflows/bench-hyperfine.yml
@@ -18,6 +18,7 @@ jobs:
     name: Hyperfine
     runs-on: ubuntu-latest
     env:
+      CAIRO_NATIVE_RUNTIME_LIBDIR: ${{ github.workspace }}/target/release
       CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
       MLIR_SYS_170_PREFIX: /usr/lib/llvm-17/
       TABLEGEN_170_PREFIX: /usr/lib/llvm-17/
@@ -62,6 +63,8 @@ jobs:
         run: make deps
       - name: Build project
         run: make build
+      - name: Build runtime subproject
+        run: cargo build --release --package=cairo-native-runtime
       - name: Run benchmarks
         run: ./scripts/bench-hyperfine.sh programs/benches/*.cairo
       - name: Create markdown file

diff --git a/Cargo.toml b/Cargo.toml
@@ -92,6 +92,10 @@ opt-level = 3
 [profile.dev.package."*"]
 opt-level = 1
 
+[[bench]]
+name = "benches"
+harness = false
+
 [[bench]]
 name = "compile_time"
 harness = false

diff --git a/benches/benches.rs b/benches/benches.rs
@@ -0,0 +1,154 @@
+use cairo_lang_compiler::{
+    compile_prepared_db, db::RootDatabase, project::setup_project, CompilerConfig,
+};
+use cairo_lang_sierra::program::Program;
+use cairo_native::{
+    cache::{AotProgramCache, JitProgramCache},
+    context::NativeContext,
+    utils::find_function_id,
+    OptLevel,
+};
+use criterion::{criterion_group, criterion_main, Criterion};
+use starknet_types_core::felt::Felt;
+use std::path::Path;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let context = NativeContext::new();
+    let mut aot_cache = AotProgramCache::new(&context);
+    let mut jit_cache = JitProgramCache::new(&context);
+
+    let factorial = load_contract("programs/benches/factorial_2M.cairo");
+    let fibonacci = load_contract("programs/benches/fib_2M.cairo");
+    let logistic_map = load_contract("programs/benches/logistic_map.cairo");
+
+    let aot_factorial = aot_cache.compile_and_insert(Felt::from(0), &factorial, OptLevel::None);
+    let aot_fibonacci = aot_cache.compile_and_insert(Felt::from(1), &fibonacci, OptLevel::None);
+    let aot_logistic_map =
+        aot_cache.compile_and_insert(Felt::from(2), &logistic_map, OptLevel::None);
+
+    let jit_factorial = jit_cache.compile_and_insert(Felt::from(0), &factorial, OptLevel::None);
+    let jit_fibonacci = jit_cache.compile_and_insert(Felt::from(1), &fibonacci, OptLevel::None);
+    let jit_logistic_map =
+        jit_cache.compile_and_insert(Felt::from(2), &logistic_map, OptLevel::None);
+
+    let factorial_function_id = find_function_id(&factorial, "factorial_2M::factorial_2M::main");
+    let fibonacci_function_id = find_function_id(&fibonacci, "fib_2M::fib_2M::main");
+    let logistic_map_function_id =
+        find_function_id(&logistic_map, "logistic_map::logistic_map::main");
+
+    c.bench_function("Cached JIT factorial_2M", |b| {
+        b.iter(|| jit_factorial.invoke_dynamic(factorial_function_id, &[], Some(u128::MAX), None));
+    });
+    c.bench_function("Cached JIT fib_2M", |b| {
+        b.iter(|| jit_fibonacci.invoke_dynamic(fibonacci_function_id, &[], Some(u128::MAX), None));
+    });
+    c.bench_function("Cached JIT logistic_map", |b| {
+        b.iter(|| {
+            jit_logistic_map.invoke_dynamic(logistic_map_function_id, &[], Some(u128::MAX), None)
+        });
+    });
+
+    c.bench_function("Cached AOT factorial_2M", |b| {
+        b.iter(|| aot_factorial.invoke_dynamic(factorial_function_id, &[], Some(u128::MAX), None));
+    });
+    c.bench_function("Cached AOT fib_2M", |b| {
+        b.iter(|| aot_fibonacci.invoke_dynamic(fibonacci_function_id, &[], Some(u128::MAX), None));
+    });
+    c.bench_function("Cached AOT logistic_map", |b| {
+        b.iter(|| {
+            aot_logistic_map.invoke_dynamic(logistic_map_function_id, &[], Some(u128::MAX), None)
+        });
+    });
+
+    #[cfg(target_arch = "x86_64")]
+    {
+        use std::mem::MaybeUninit;
+
+        #[allow(dead_code)]
+        struct PanicResult {
+            tag: u8,
+            payload: MaybeUninit<(i32, i32, *mut [u64; 4])>,
+        }
+
+        let aot_factorial_fn = unsafe {
+            std::mem::transmute::<*const (), extern "C" fn(u128) -> (u128, PanicResult)>(
+                aot_factorial
+                    .find_function_ptr(factorial_function_id)
+                    .cast(),
+            )
+        };
+        let aot_fibonacci_fn = unsafe {
+            std::mem::transmute::<*const (), extern "C" fn(u128) -> (u128, PanicResult)>(
+                aot_fibonacci
+                    .find_function_ptr(fibonacci_function_id)
+                    .cast(),
+            )
+        };
+        let aot_logistic_map_fn = unsafe {
+            std::mem::transmute::<*const (), extern "C" fn(u128) -> (u128, PanicResult)>(
+                aot_logistic_map
+                    .find_function_ptr(logistic_map_function_id)
+                    .cast(),
+            )
+        };
+        let jit_factorial_fn = unsafe {
+            std::mem::transmute::<*const (), extern "C" fn(u128) -> (u128, PanicResult)>(
+                jit_factorial
+                    .find_function_ptr(factorial_function_id)
+                    .cast(),
+            )
+        };
+        let jit_fibonacci_fn = unsafe {
+            std::mem::transmute::<*const (), extern "C" fn(u128) -> (u128, PanicResult)>(
+                jit_fibonacci
+                    .find_function_ptr(fibonacci_function_id)
+                    .cast(),
+            )
+        };
+        let jit_logistic_map_fn = unsafe {
+            std::mem::transmute::<*const (), extern "C" fn(u128) -> (u128, PanicResult)>(
+                jit_logistic_map
+                    .find_function_ptr(logistic_map_function_id)
+                    .cast(),
+            )
+        };
+
+        c.bench_function("Cached JIT factorial_2M (direct invoke)", |b| {
+            b.iter(|| jit_factorial_fn(u128::MAX));
+        });
+        c.bench_function("Cached JIT fib_2M (direct invoke)", |b| {
+            b.iter(|| jit_fibonacci_fn(u128::MAX));
+        });
+        c.bench_function("Cached JIT logistic_map (direct invoke)", |b| {
+            b.iter(|| jit_logistic_map_fn(u128::MAX));
+        });
+
+        c.bench_function("Cached AOT factorial_2M (direct invoke)", |b| {
+            b.iter(|| aot_factorial_fn(u128::MAX));
+        });
+        c.bench_function("Cached AOT fib_2M (direct invoke)", |b| {
+            b.iter(|| aot_fibonacci_fn(u128::MAX));
+        });
+        c.bench_function("Cached AOT logistic_map (direct invoke)", |b| {
+            b.iter(|| aot_logistic_map_fn(u128::MAX));
+        });
+    }
+}
+
+fn load_contract(path: impl AsRef<Path>) -> Program {
+    let mut db = RootDatabase::builder().detect_corelib().build().unwrap();
+    let main_crate_ids = setup_project(&mut db, path.as_ref()).unwrap();
+    (*compile_prepared_db(
+        &mut db,
+        main_crate_ids,
+        CompilerConfig {
+            replace_ids: true,
+            ..Default::default()
+        },
+    )
+    .unwrap())
+    .clone()
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/benches/compile_time.rs b/benches/compile_time.rs
@@ -1,4 +1,4 @@
-use cairo_native::{context::NativeContext, module_to_object};
+use cairo_native::{context::NativeContext, module_to_object, OptLevel};
 use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 use util::prepare_programs;
 
@@ -44,7 +44,7 @@ pub fn bench_compile_time(c: &mut Criterion) {
                 b.iter(|| {
                     let native_context = NativeContext::new();
                     let module = native_context.compile(black_box(program)).unwrap();
-                    let object = module_to_object(module.module())
+                    let object = module_to_object(module.module(), OptLevel::None)
                         .expect("to compile correctly to a object file");
                     black_box(object)
                 })
@@ -61,7 +61,7 @@ pub fn bench_compile_time(c: &mut Criterion) {
             c.bench_with_input(BenchmarkId::new(filename, 1), &program, |b, program| {
                 b.iter(|| {
                     let module = native_context.compile(black_box(program)).unwrap();
-                    let object = module_to_object(module.module())
+                    let object = module_to_object(module.module(), OptLevel::None)
                         .expect("to compile correctly to a object file");
                     black_box(object)
                 })

diff --git a/benches/libfuncs.rs b/benches/libfuncs.rs
@@ -52,7 +52,8 @@ pub fn bench_libfuncs(c: &mut Criterion) {
                     b.iter(|| {
                         let module = native_context.compile(program).unwrap();
                         // pass manager internally verifies the MLIR output is correct.
-                        let native_executor = JitNativeExecutor::new(module);
+                        let native_executor =
+                            JitNativeExecutor::from_native_module(module, Default::default());
 
                         // Execute the program.
                         let result = native_executor
@@ -70,7 +71,8 @@ pub fn bench_libfuncs(c: &mut Criterion) {
                     let native_context = NativeContext::new();
                     let module = native_context.compile(program).unwrap();
                     // pass manager internally verifies the MLIR output is correct.
-                    let native_executor = JitNativeExecutor::new(module);
+                    let native_executor =
+                        JitNativeExecutor::from_native_module(module, Default::default());
 
                     // warmup
                     for _ in 0..5 {

diff --git a/examples/easy_api.rs b/examples/easy_api.rs
@@ -24,7 +24,7 @@ fn main() {
     let entry_point_id = cairo_native::utils::find_function_id(&sierra_program, entry_point);
 
     // Instantiate the executor.
-    let native_executor = JitNativeExecutor::new(native_program);
+    let native_executor = JitNativeExecutor::from_native_module(native_program, Default::default());
 
     // Execute the program.
     let result = native_executor

diff --git a/examples/erc20.rs b/examples/erc20.rs
@@ -313,7 +313,7 @@ fn main() {
         find_entry_point_by_idx(&sierra_program, entry_point.function_idx).unwrap();
     let fn_id = &entry_point_fn.id;
 
-    let native_executor = JitNativeExecutor::new(native_program);
+    let native_executor = JitNativeExecutor::from_native_module(native_program, Default::default());
 
     let result = native_executor
         .invoke_contract_dynamic(

diff --git a/examples/invoke.rs b/examples/invoke.rs
@@ -28,7 +28,7 @@ fn main() {
 
     let fn_id = &entry_point_fn.id;
 
-    let native_executor = JitNativeExecutor::new(native_program);
+    let native_executor = JitNativeExecutor::from_native_module(native_program, Default::default());
 
     let output = native_executor.invoke_dynamic(fn_id, &[JitValue::Felt252(1.into())], None, None);
 

diff --git a/examples/starknet.rs b/examples/starknet.rs
@@ -313,7 +313,7 @@ fn main() {
 
     let fn_id = &entry_point_fn.id;
 
-    let native_executor = JitNativeExecutor::new(native_program);
+    let native_executor = JitNativeExecutor::from_native_module(native_program, Default::default());
 
     let result = native_executor
         .invoke_contract_dynamic(

diff --git a/scripts/bench-hyperfine.sh b/scripts/bench-hyperfine.sh
@@ -90,22 +90,23 @@ run_bench() {
         -o "$OUTPUT_DIR/$base_name-march-native" \
         >> /dev/stderr
 
-    hyperfine \
+    CAIRO_NATIVE_RUNTIME_LIBDIR="$ROOT_DIR/target/release" hyperfine \
         --warmup 3 \
         --export-markdown "$OUTPUT_DIR/$base_name.md" \
         --export-json "$OUTPUT_DIR/$base_name.json" \
         -n "Cairo-vm (Rust, Cairo 1)" "$CAIRO_RUN --available-gas 18446744073709551615 -s $base_path.cairo" \
-        -n "cairo-native (JIT MLIR ORC Engine)" "$JIT_CLI $base_path.cairo $base_name::$base_name::main" \
-        -n "cairo-native (AOT Native binary)" "$OUTPUT_DIR/$base_name" \
-        -n "cairo-native (AOT Native binary with host CPU features, march=native)" "$OUTPUT_DIR/$base_name-march-native" \
+        -n "cairo-native (embedded AOT)" "$JIT_CLI --mode=aot $base_path.cairo $base_name::$base_name::main" \
+        -n "cairo-native (embedded JIT using LLVM's ORC Engine)" "$JIT_CLI --mode=jit $base_path.cairo $base_name::$base_name::main" \
+        -n "cairo-native (standalone AOT)" "$OUTPUT_DIR/$base_name" \
+        -n "cairo-native (standalone AOT with -march=native)" "$OUTPUT_DIR/$base_name-march-native" \
         >> /dev/stderr
 }
 
 echo "Rust cairo-run version: $($CAIRO_RUN --version)"
 
 if [ $# -eq 0 ]
 then
-    echo "${bold}Benchmarking ${#CAIRO_SRCS[@]} programs.${normal}${normal}"
+    echo "${bold}Benchmarking ${#CAIRO_SRCS[@]} programs.${normal}"
 
     count=1
     for program in "${CAIRO_SRCS[@]}"
@@ -116,7 +117,7 @@ then
         count=$((count + 1))
     done
 else
-    echo "${bold}Benchmarking $# programs."
+    echo "${bold}Benchmarking $# programs.${normal}"
 
     count=1
     for program in "$@"

diff --git a/src/bin/cairo-native-compile.rs b/src/bin/cairo-native-compile.rs
@@ -16,6 +16,7 @@ use cairo_native::{
     debug_info::{DebugInfo, DebugLocations},
     metadata::{runtime_bindings::RuntimeBindingsMeta, MetadataStorage},
     utils::run_pass_manager,
+    OptLevel,
 };
 use clap::Parser;
 use melior::{
@@ -32,23 +33,9 @@ use std::{
 };
 use tracing_subscriber::{EnvFilter, FmtSubscriber};
 
-#[derive(Parser, Debug)]
-#[command(author, version, about, long_about = None)]
-struct Args {
-    /// Input .sierra or .cairo program.
-    input: PathBuf,
-
-    /// Output file, .so on linux, .dylib on macOS
-    output: PathBuf,
-
-    /// Whether the program is a contract.
-    #[arg(short, long)]
-    starknet: bool,
-}
-
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Parse command-line arguments.
-    let args = Args::parse();
+    let args = CmdLine::parse();
 
     // Configure logging and error handling.
     tracing::subscriber::set_global_default(
@@ -90,8 +77,21 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     run_pass_manager(&context, &mut module)?;
 
-    let object = cairo_native::module_to_object(&module)?;
-    cairo_native::object_to_shared_lib(&object, &args.output)?;
+    let opt_level = match args.opt_level {
+        0 => OptLevel::None,
+        1 => OptLevel::Less,
+        2 => OptLevel::Default,
+        _ => OptLevel::Aggressive,
+    };
+
+    let object = cairo_native::module_to_object(&module, opt_level)?;
+    cairo_native::object_to_shared_lib(
+        &object,
+        match &args.output {
+            CompilerOutput::Stdout => Path::new("/dev/stdout"),
+            CompilerOutput::Path(x) => x,
+        },
+    )?;
 
     Ok(())
 }
@@ -184,6 +184,10 @@ struct CmdLine {
     #[clap(value_parser = parse_input)]
     input: PathBuf,
 
+    /// Note: This is bugged for any non-zero values. See https://github.com/lambdaclass/cairo_native/issues/404.
+    #[clap(short = 'O', long, default_value = "0")]
+    opt_level: usize,
+
     #[clap(short = 'o', long = "output", value_parser = parse_output, default_value = "-")]
     output: CompilerOutput,