Skip to content

Commit

Permalink
Also better label naming for hand-written compiler-optimised kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
anyzelman committed Jan 11, 2025
1 parent bbdaab9 commit 081a1c9
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 6 deletions.
4 changes: 4 additions & 0 deletions tests/performance/bench_kernels.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#ifdef BENCH_KERNELS_OPENMP

bool bench_kernels_parallel() { return true; }

void bench_kernels_axpy(
double * restrict a,
const double alpha, const double * restrict x,
Expand Down Expand Up @@ -120,6 +122,8 @@ void bench_kernels_reduce(

#else

bool bench_kernels_parallel() { return false; }

void bench_kernels_axpy(
double * restrict a,
const double alpha, const double * restrict x,
Expand Down
9 changes: 8 additions & 1 deletion tests/performance/bench_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <omp.h>
#include <assert.h>
#include <stddef.h> //for size_t
#include <stddef.h> // for size_t


#ifdef __cplusplus
Expand All @@ -41,10 +41,14 @@ extern "C" {
double * __restrict__ const, const double * __restrict__, const size_t
);

bool bench_kernels_parallel();

}

#else

#include <stdbool.h> // for bool

/**
* Executes \f$ a = \alpha x + y \f$ for \a a, \a x, and \a y vectors of
* length \a n.
Expand Down Expand Up @@ -89,5 +93,8 @@ void bench_kernels_reduce(
double * restrict const alpha, const double * restrict x, const size_t n
);

/** @returns Whether the kernels defined here are (shared-memory) parallel. */
bool bench_kernels_parallel();

#endif

10 changes: 8 additions & 2 deletions tests/performance/dot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,14 @@ int main( int argc, char ** argv ) {
}

// start benchmark test 1
std::cout << "\nBenchmark label: compiler-optimised dot product on raw "
<< "arrays of size " << in.n << std::endl;
std::cout << "\nBenchmark label: ";
if( bench_kernels_parallel() ) {
std::cout << "parallel (OpenMP) ";
} else {
std::cout << "sequential (C) ";
}
std::cout << "compiler-optimised dot product on raw arrays of size " << in.n
<< std::endl;
if( bench.exec( &bench_raw, in, out, 1, outer, true ) != SUCCESS ) {
std::cerr << "Error launching raw benchmark test.\nTest FAILED." << std::endl;
return 60;
Expand Down
9 changes: 7 additions & 2 deletions tests/performance/fma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,13 @@ int main( int argc, char ** argv ) {
return 50;
}

std::cout << "\nBenchmark label: compiler-optimised axpy of size " << in.n
<< std::endl;
std::cout << "\nBenchmark label: ";
if( bench_kernels_parallel() ) {
std::cout << "parallel (OpenMP) ";
} else {
std::cout << "sequential (C) ";
}
std::cout << "compiler-optimised axpy of size " << in.n << std::endl;
rc = bench.exec( &(test< RAW >), in, out, 1, outer, true );
if( rc != SUCCESS || out.error != SUCCESS ) {
std::cerr << "Functional test exits with nonzero exit code. "
Expand Down
8 changes: 7 additions & 1 deletion tests/performance/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,13 @@ int main( int argc, char ** argv ) {
rc = bench.exec( &(test< LAMBDA >), in, out, 1, outer, true );
}
if( rc == SUCCESS ) {
std::cout << "\nBenchmark label: compiler-optimised reduce-to-scalar of size "
std::cout << "\nBenchmark label: ";
if( bench_kernels_parallel() ) {
std::cout << "parallel (OpenMP) ";
} else {
std::cout << "sequential (C) ";
}
std::cout << "compiler-optimised reduce-to-scalar of size "
<< in.n << std::endl;
rc = bench.exec( &(test< RAW >), in, out, 1, outer, true );
}
Expand Down

0 comments on commit 081a1c9

Please sign in to comment.