From 55d74ec6de8a26dda072a7ffae2cb3ef6e5648b1 Mon Sep 17 00:00:00 2001 From: anyzelman <101567333+anyzelman@users.noreply.github.com> Date: Thu, 9 Jan 2025 00:12:09 +0100 Subject: [PATCH] 338 request for fuselets (#339) This MR provides so-called _fuselets_. Fuselets are sequences of operations that may occur in existing code bases, and could be accelerated using the nonblocking backend. A fuselet represents such a sequence of operations, which are exposed as a single plain-C function call. ALP during its standard build will generate a library of those fuselets using its nonblocking backend, thus resulting in high-performance, automatically fused kernels. The fuselets furthermore, at run-time and again automatically, tunes performance parameters such as tile sizes and the number of threads to be used. Finally, the example fuselets provided in `include/transition/fuselets.h` and `src/transition/fuselets.cpp` demonstrate how easy it is to use ALP to provide arbitrary fuselets that existing code bases may require, and are easily modified or extended. This MR also includes: - smoke and performance tests for the new fuselets; - standard semiring and monoid definitions in the new `grb::monoids` and `grb::semirings` namespaces; - tests for those standard semirings and monoids (insofar possible); - a bugfix in the stdout of the spmspm unit test; - a bugfix for missing cstdint includes in various headers; and - as always, minor code style fixes. --- include/graphblas.hpp | 6 + include/graphblas/base/io.hpp | 2 + include/graphblas/hyperdags/hyperdags.hpp | 1 + include/graphblas/identities.hpp | 224 ++-- include/graphblas/monoid.hpp | 314 ++++++ include/graphblas/nonblocking/coordinates.hpp | 4 +- include/graphblas/reference/coordinates.hpp | 1 + include/graphblas/reference/init.hpp | 1 + include/graphblas/reference/io.hpp | 2 +- include/graphblas/semiring.hpp | 421 ++++++++ include/graphblas/utils/suppressions.h | 5 + include/transition/fuselets.h | 455 ++++++++ src/transition/CMakeLists.txt | 3 + src/transition/fuselets.cpp | 669 ++++++++++++ tests/performance/CMakeLists.txt | 5 + tests/performance/fuselets.cpp | 970 ++++++++++++++++++ tests/performance/performancetests.sh | 9 + tests/performance/spmspm.cpp | 2 +- tests/smoke/CMakeLists.txt | 5 + tests/smoke/smoketests.sh | 9 + tests/unit/CMakeLists.txt | 8 + tests/unit/monoids.cpp | 299 ++++++ tests/unit/parser.cpp | 5 +- tests/unit/semirings.cpp | 423 ++++++++ tests/unit/unittests.sh | 12 + 25 files changed, 3767 insertions(+), 88 deletions(-) create mode 100644 include/transition/fuselets.h create mode 100644 src/transition/fuselets.cpp create mode 100644 tests/performance/fuselets.cpp create mode 100644 tests/unit/monoids.cpp create mode 100644 tests/unit/semirings.cpp diff --git a/include/graphblas.hpp b/include/graphblas.hpp index 029f77bf7..dd2f63102 100644 --- a/include/graphblas.hpp +++ b/include/graphblas.hpp @@ -170,6 +170,12 @@ * -# #grb::Monoid, and * -# #grb::Semiring. * + * A list of standard operators, monoids, and semirings are provided in their + * respective name spaces: + * -# #grb::operators, + * -# #grb::monoids, and + * -# #grb::semirings. + * * Binary operators are parametrised in two input domains and one output domain, * \f$ D_1 \times D_2 \to D_3 \f$. The \f$ D_i \f$ are given as template * arguments to the operator. A #grb::Monoid is composed from a binary operator diff --git a/include/graphblas/base/io.hpp b/include/graphblas/base/io.hpp index 0a33a556a..0c0bab768 100644 --- a/include/graphblas/base/io.hpp +++ b/include/graphblas/base/io.hpp @@ -41,6 +41,8 @@ #include +#include // for uintptr_t + namespace grb { diff --git a/include/graphblas/hyperdags/hyperdags.hpp b/include/graphblas/hyperdags/hyperdags.hpp index 5ecc4dbef..2ae951565 100644 --- a/include/graphblas/hyperdags/hyperdags.hpp +++ b/include/graphblas/hyperdags/hyperdags.hpp @@ -30,6 +30,7 @@ #include #include #include +#include // uintptr_t #include #include #include diff --git a/include/graphblas/identities.hpp b/include/graphblas/identities.hpp index fdbb7c7f7..837b1a5ec 100644 --- a/include/graphblas/identities.hpp +++ b/include/graphblas/identities.hpp @@ -55,91 +55,130 @@ namespace grb { /** Standard identity for numerical addition. */ template< typename D > class zero { - static_assert( std::is_convertible< int, D >::value, "Cannot form identity under the requested domain" ); - - public: - /** - * @tparam D The domain of the value to return. - * @return The identity under standard addition (i.e., `zero'). - */ - static constexpr D value() { - return static_cast< D >( 0 ); - } + + static_assert( std::is_convertible< int, D >::value, + "Cannot form identity under the requested domain" ); + + public: + + /** + * @tparam D The domain of the value to return. + * @return The identity under standard addition (i.e., `zero'). + */ + static constexpr D value() { + return static_cast< D >( 0 ); + } + }; + template< typename K, typename V > class zero< std::pair< K, V > > { - public: - static constexpr std::pair< K, V > value() { - return std::make_pair( zero< K >::value(), zero< V >::value() ); - } + + public: + + static constexpr std::pair< K, V > value() { + return std::make_pair( zero< K >::value(), zero< V >::value() ); + } + }; /** Standard identity for numerical multiplication. */ template< typename D > class one { - static_assert( std::is_convertible< int, D >::value, "Cannot form identity under the requested domain" ); - - public: - /** - * @tparam D The domain of the value to return. - * @return The identity under standard multiplication (i.e., `one'). - */ - static constexpr D value() { - return static_cast< D >( 1 ); - } + + static_assert( std::is_convertible< int, D >::value, + "Cannot form identity under the requested domain" ); + + public: + + /** + * @tparam D The domain of the value to return. + * @return The identity under standard multiplication (i.e., `one'). + */ + static constexpr D value() { + return static_cast< D >( 1 ); + } + }; + template< typename K, typename V > class one< std::pair< K, V > > { - public: - static constexpr std::pair< K, V > value() { - return std::make_pair( one< K >::value(), one< V >::value() ); - } + + public: + + static constexpr std::pair< K, V > value() { + return std::make_pair( one< K >::value(), one< V >::value() ); + } + }; /** Standard identity for the minimum operator. */ template< typename D > class infinity { - static_assert( std::is_arithmetic< D >::value, "Cannot form identity under the requested domain" ); - public: - /** - * @tparam D The domain of the value to return. - * @return The identity under the standard min operator (i.e., `infinity'), - * of type \a D. - */ - static constexpr D value() { - return std::numeric_limits< D >::has_infinity ? std::numeric_limits< D >::infinity() : std::numeric_limits< D >::max(); - } + static_assert( std::is_arithmetic< D >::value, + "Cannot form identity under the requested domain" ); + + public: + + /** + * @tparam D The domain of the value to return. + * @return The identity under the standard min operator (i.e., `infinity'), + * of type \a D. + */ + static constexpr D value() { + return std::numeric_limits< D >::has_infinity + ? std::numeric_limits< D >::infinity() + : std::numeric_limits< D >::max(); + } + }; + template< typename K, typename V > class infinity< std::pair< K, V > > { - public: - static constexpr std::pair< K, V > value() { - return std::make_pair( infinity< K >::value(), infinity< V >::value() ); - } + + public: + + static constexpr std::pair< K, V > value() { + return std::make_pair( infinity< K >::value(), infinity< V >::value() ); + } + }; /** Standard identity for the maximum operator. */ template< typename D > class negative_infinity { + static_assert( std::is_arithmetic< D >::value, "Cannot form identity under the requested domain" ); - public: - /** - * @tparam D The domain of the value to return. - * @return The identity under the standard max operator, i.e., - * `minus infinity'. - */ - static constexpr D value() { - return std::numeric_limits< D >::min() == 0 ? 0 : ( std::numeric_limits< D >::has_infinity ? -std::numeric_limits< D >::infinity() : std::numeric_limits< D >::min() ); - } + public: + + /** + * @tparam D The domain of the value to return. + * @return The identity under the standard max operator, i.e., + * `minus infinity'. + */ + static constexpr D value() { + return std::numeric_limits< D >::min() == 0 + ? 0 + : ( std::numeric_limits< D >::has_infinity + ? -std::numeric_limits< D >::infinity() + : std::numeric_limits< D >::min() + ); + } + }; + template< typename K, typename V > class negative_infinity< std::pair< K, V > > { - public: + + public: + static constexpr std::pair< K, V > value() { - return std::make_pair( negative_infinity< K >::value(), negative_infinity< V >::value() ); + return std::make_pair( negative_infinity< K >::value(), + negative_infinity< V >::value() ); } + }; /** @@ -149,24 +188,33 @@ namespace grb { */ template< typename D > class logical_false { - static_assert( std::is_convertible< bool, D >::value, "Cannot form identity under the requested domain" ); - - public: - /** - * @tparam D The domain of the value to return. - * @return The identity under the standard logical OR operator, i.e., - * \a false. - */ - static const constexpr D value() { - return static_cast< D >( false ); - } + + static_assert( std::is_convertible< bool, D >::value, + "Cannot form identity under the requested domain" ); + + public: + + /** + * @tparam D The domain of the value to return. + * @return The identity under the standard logical OR operator, i.e., + * \a false. + */ + static const constexpr D value() { + return static_cast< D >( false ); + } + }; + template< typename K, typename V > class logical_false< std::pair< K, V > > { - public: - static constexpr std::pair< K, V > value() { - return std::make_pair( logical_false< K >::value(), logical_false< V >::value() ); - } + + public: + + static constexpr std::pair< K, V > value() { + return std::make_pair( logical_false< K >::value(), + logical_false< V >::value() ); + } + }; /** @@ -176,27 +224,37 @@ namespace grb { */ template< typename D > class logical_true { - static_assert( std::is_convertible< bool, D >::value, "Cannot form identity under the requested domain" ); - - public: - /** - * @tparam D The domain of the value to return. - * @return The identity under the standard logical AND operator, i.e., - * \a true. - */ - static constexpr D value() { - return static_cast< D >( true ); - } + + static_assert( std::is_convertible< bool, D >::value, + "Cannot form identity under the requested domain" ); + + public: + + /** + * @tparam D The domain of the value to return. + * @return The identity under the standard logical AND operator, i.e., + * \a true. + */ + static constexpr D value() { + return static_cast< D >( true ); + } + }; + template< typename K, typename V > class logical_true< std::pair< K, V > > { - public: - static constexpr std::pair< K, V > value() { - return std::make_pair( logical_true< K >::value(), logical_true< V >::value() ); - } + + public: + + static constexpr std::pair< K, V > value() { + return std::make_pair( logical_true< K >::value(), + logical_true< V >::value() ); + } + }; } // namespace identities + } // namespace grb #endif diff --git a/include/graphblas/monoid.hpp b/include/graphblas/monoid.hpp index 0130a8bb6..c055b80cd 100644 --- a/include/graphblas/monoid.hpp +++ b/include/graphblas/monoid.hpp @@ -144,6 +144,320 @@ namespace grb { > >::value; }; + // after all of the standard definitions, declare some standard monoids + + /** + * A name space that contains a set of standard monoids. + * + * Standard monoids include: + * - #plus, for numerical addition + * - #times, for numerical multiplication + * - #min, for the minimum relation + * - #max, for the maximum relation + * - #lor, for the logical-or relation + * - #land, for the logical-and relation + * - #lxor, for the exclusive-or relation + * - #lxnor, for the negated exclusive-or relation. + * + * \note In the above, the prefix letter l stands for \em logical, + * e.g., lor stands for logical-or. + * + * There are also a couple of aliases to match different preferences: + * - #add (same as #plus), + * - #mul (same as #times), + * - #lneq (same as #lxor), and + * - #leq (same as #lxnor). + * + * \note The #min and #max monoids have different identities depending on the + * domain. The standard monoids defined here auto-adapt to the correct + * identity. + */ + namespace monoids { + + /** + * The plus monoid. + * + * Uses \em addition (plus) as the operator, and zero as its identity. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + * + * @see #add. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using plus = grb::Monoid< + grb::operators::add< D1, D2, D3 >, + grb::identities::zero + >; + + /** + * The times monoid. + * + * Uses \em multiplication (times) as the operator, and one as its identity. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * @see #mul. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using times = grb::Monoid< + grb::operators::mul< D1, D2, D3 >, + grb::identities::one + >; + + /** + * This is an alias of #plus. + * + * @see #add. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using add = plus< D1, D2, D3 >; + + /** + * This is an alias of #times. + * + * @see #times. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using mul = times< D1, D2, D3 >; + + /** + * The min monoid. + * + * Uses \em min as the operator. If the domain is floating-point, uses + * infinity as its identity; if the domain is integer, uses its maximum + * representable value as the identity of this monoid. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using min = grb::Monoid< + grb::operators::min< D1, D2, D3 >, + grb::identities::infinity + >; + + /** + * The max monoid. + * + * Uses \em max as the operator. If the domain is floating-point, uses + * negative infinity (\f$ -\infty \f$) as its identity; if the domain is + * integer, uses its minimum representable value as the identity of this + * monoid. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using max = grb::Monoid< + grb::operators::max< D1, D2, D3 >, + grb::identities::negative_infinity + >; + + /** + * The logical-or monoid. + * + * Uses \em logical-or as the operator and false as its identity. + * + * If the domain is non-boolean, inputs will be cast to a Boolean before the + * operator is invoked, while the result will be cast to the target domain on + * output. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using lor = grb::Monoid< + grb::operators::logical_or< D1, D2, D3 >, + grb::identities::logical_false + >; + + /** + * The logical-and monoid. + * + * Uses \em logical-and as the operator and true as its identity. + * + * If the domain is non-boolean, inputs will be cast to a Boolean before the + * operator is invoked, while the result will be cast to the target domain on + * output. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using land = grb::Monoid< + grb::operators::logical_and< D1, D2, D3 >, + grb::identities::logical_true + >; + + /** + * The logical-exclusive-or monoid. + * + * Uses \em logical-exclusive-or as the operator and false as its + * identity. + * + * If the domain is non-boolean, inputs will be cast to a Boolean before the + * operator is invoked, while the result will be cast to the target domain on + * output. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + * + * @see #lneq. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using lxor = grb::Monoid< + grb::operators::not_equal< D1, D2, D3 >, + grb::identities::logical_false + >; + + /** + * The logical-not-equals monoid. + * + * Uses \em logical-not-equals as the operator and false as its + * identity. + * + * If the domain is non-boolean, inputs will be cast to a Boolean before the + * operator is invoked, while the result will be cast to the target domain on + * output. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + * + * @see #lxor. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using lneq = lxor< D1, D2, D3 >; + + /** + * The logical-negated-exclusive-or monoid. + * + * Uses \em logical-negated-exclusive-or as the operator and true as + * its identity. + * + * If the domain is non-boolean, inputs will be cast to a Boolean before the + * operator is invoked, while the result will be cast to the target domain on + * output. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + * + * @see #leq. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using lxnor = grb::Monoid< + grb::operators::equal< D1, D2, D3 >, + grb::identities::logical_true + >; + + /** + * The logical-equals monoid. + * + * Uses \em logical-equals as the operator and true as its identity. + * + * If the domain is non-boolean, inputs will be cast to a Boolean before the + * operator is invoked, while the result will be cast to the target domain on + * output. + * + * The three domains of the monoid are: + * + * @tparam D1 The left-hand input domain of the operator + * @tparam D2 The right-hand input domain of the operator + * @tparam D3 The output domain of the operator + * + * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given, + * it will be set to \a D2. If \a D2 is not explicitly given, it will be set + * to \a D1. + * + * This is a commutative monoid (assuming \a D1 equals \a D2). + * + * @see #lxnor. + */ + template< typename D1, typename D2 = D1, typename D3 = D2 > + using leq = lxnor< D1, D2, D3 >; + + } + } // namespace grb #endif diff --git a/include/graphblas/nonblocking/coordinates.hpp b/include/graphblas/nonblocking/coordinates.hpp index 1aca8e322..ff6789287 100644 --- a/include/graphblas/nonblocking/coordinates.hpp +++ b/include/graphblas/nonblocking/coordinates.hpp @@ -27,12 +27,14 @@ #ifndef _H_GRB_NONBLOCKING_COORDINATES #define _H_GRB_NONBLOCKING_COORDINATES -#include //std::runtime_error #include +#include // uintptr_t +#include //std::runtime_error #if defined _DEBUG && ! defined NDEBUG #include #endif + #include //size_t #include diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp index dd3d44cd4..2a1f90137 100644 --- a/include/graphblas/reference/coordinates.hpp +++ b/include/graphblas/reference/coordinates.hpp @@ -26,6 +26,7 @@ #include //size_t +#include // uintptr_t #include // std::max #include // std::runtime_error diff --git a/include/graphblas/reference/init.hpp b/include/graphblas/reference/init.hpp index 9332c8b88..a7bdf0b02 100644 --- a/include/graphblas/reference/init.hpp +++ b/include/graphblas/reference/init.hpp @@ -24,6 +24,7 @@ #define _H_GRB_REFERENCE_INIT #include +#include // for uintptr_t #include #include diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp index b788527a1..ad34a276c 100644 --- a/include/graphblas/reference/io.hpp +++ b/include/graphblas/reference/io.hpp @@ -1239,7 +1239,7 @@ namespace grb { utils::template prefixSum_seq< true >( out_ccs_offsets, n ); } #else - size_t crs_ws, ccs_ws; + NIT crs_ws, ccs_ws; utils::template prefixSum_ompPar_phase1< true >( out_crs_offsets, m, crs_ws ); if( !(descr & descriptors::force_row_major) ) { diff --git a/include/graphblas/semiring.hpp b/include/graphblas/semiring.hpp index 23a7210e3..28354ac74 100644 --- a/include/graphblas/semiring.hpp +++ b/include/graphblas/semiring.hpp @@ -383,6 +383,427 @@ namespace grb { }; + // after all of the standard definitions, declare some standard semirings + + /** + * A name space that contains a set of standard semirings. + * + * Standard semirings include: + * - #plusTimes, for numerical linear algebra + * - #minPlus, for, e.g., shortest-path graph queries + * - #maxPlus, for, e.g., longest-path graph queries + * - #minTimes, for, e.g., least-reliable-path graph queries + * - #maxTimes, for, e.g., most-reliable-path graph queries, + * - #boolean, for, e.g., reachability graph queries. + * - etc. + * + * A list of all pre-defined semirings, in addition to the above, follows: + * #minMax, #maxMin, #plusMin, #lorLand, #landLor, #lxorLand, #lxnorLor, + * #lneqLand, and #leqLor. + * + * \note Here, lor stands for logical-or and land stands for logical-and, while + * ne stands for not-equal and eq for equal. + * + * \note The #lorLand semiring over the Boolean domains is the same as the + * #boolean semiring. + * + * \note The #lxorLand semiring is the same as the #lneqLand semiring. + * + * \note The #lxnorLor semiring is the same as the #leqLor semiring. + * + * \warning Some of these pre-defined semirings are not proper semirings over + * all domains. For example, the #maxPlus semiring over unsigned + * integers would have both max and + identities be zero, and thus + * cannot act as an annihilator over +. + * + * \warning While ALP does a best-effort in catching erroneous semirings, by + * virtue of templates it cannot catch all erroneous semirings. E.g., + * continuing the above #maxPlus semiring example: even if ALP + * prevents the definition of #maxPlus semirings over unsigned types + * by relying on the std::is_unsigned type trait, a user + * could still define their own unsigned integer type that erroneously + * overloads this type trait to false. We cannot catch such + * errors and consider those programming errors. + * + * \note We do not pre-define any improper semiring, such as plusMin, that do + * appear in the GraphBLAS C specification. Instead, ALP has, for every + * primitive that takes a semiring, a variant of that primitive that + * instead of a semiring, takes 1) a cummutative monoid as an additive + * operator, and 2) any binary operator as the multiplicative operator. + * These variants do not (and may not) rely on the additive identity + * being an annihilator over the multiplicative operation, and do not + * (may not) rely on any distributive property over the two operations. + * + * Each semiring except #boolean takes up to four domains as template + * arguments, while semirings as a pure mathematical concept take only a single + * domain. The first three domains indicate the left-hand input domain, the + * right-hand input domain, and the output domain of the multiplicative monoid, + * respectively. The third and fourth domains indicate the left-hand and right- + * hand input domain of the additive monoid. The fourth domain also indicates + * the output domain of the additive monoid. + * + * \note This particular extension of semirings to four domains is rooted in + * C-Monoid categories. All useful mixed-domain semirings ALP has + * presently been applied with are C-Monoid categories, while since + * assuming this underlying algebra, the ALP code base that relates to + * algebraic structures, algebraic type traits, and their application, + * has simplified significantly. + */ + namespace semirings { + + /** + * The plusTimes semiring. + * + * Uses \em addition (plus) as the additive commutative monoid and + * \em multiplication (times) as the multiplicative monoid. The identities + * for each monoid are zero and one, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using plusTimes = grb::Semiring< + grb::operators::add< D3, D4, D4 >, + grb::operators::mul< D1, D2, D3 >, + grb::identities::zero, grb::identities::one + >; + + /** + * The minPlus semiring. + * + * Uses \em min as the additive commutative monoid and \em addition as the + * multiplicative monoid. The identities for each monoid are \f$ \infty \f$ + * and zero, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using minPlus = grb::Semiring< + grb::operators::min< D3, D4, D4 >, + grb::operators::add< D1, D2, D3 >, + grb::identities::infinity, grb::identities::zero + >; + + /** + * The maxPlus semiring. + * + * Uses \em max as the additive commutative monoid and \em addition as the + * multiplicative monoid. The identities for each monoid are \f$ -\infty \f$ + * and zero, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using maxPlus = grb::Semiring< + grb::operators::max< D3, D4, D4 >, + grb::operators::add< D1, D2, D3 >, + grb::identities::negative_infinity, grb::identities::zero + >; + + /** + * The minTimes semiring. + * + * Uses \em min as the additive commutative monoid and \em multiplication as + * the multiplicative monoid. The identities for each monoid are + * \f$ \infty \f$ and one, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using minTimes = grb::Semiring< + grb::operators::min< D3, D4, D4 >, + grb::operators::mul< D1, D2, D3 >, + grb::identities::infinity, grb::identities::one + >; + + /** + * The maxTimes semiring. + * + * Uses \em max as the additive commutative monoid and \em multiplication as + * the multiplicative monoid. The identities for each monoid are + * \f$ -infty \f$ and one, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using maxTimes = grb::Semiring< + grb::operators::max< D3, D4, D4 >, + grb::operators::mul< D1, D2, D3 >, + grb::identities::negative_infinity, grb::identities::one + >; + + /** + * The minMax semiring. + * + * Uses \em min as the additive commutative monoid and \em max as the + * multiplicative monoid. The identities for each monoid are \f$ \infty \f$ + * and \f$ -\infty \f$, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using minMax = grb::Semiring< + grb::operators::min< D3, D4, D4 >, + grb::operators::max< D1, D2, D3 >, + grb::identities::infinity, grb::identities::negative_infinity + >; + + /** + * The maxMin semiring. + * + * Uses \em max as the additive commutative monoid and \em min as the + * multiplicative monoid. The identities for each monoid are \f$ -\infty \f$ + * and \f$ \infty \f$, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using maxMin = grb::Semiring< + grb::operators::max< D3, D4, D4 >, + grb::operators::min< D1, D2, D3 >, + grb::identities::negative_infinity, grb::identities::infinity + >; + + /** + * The plusMin semiring. + * + * Uses \em plus as the additive commutative monoid and \em min as the + * multiplicative monoid. The identities for each monoid are \f$ 0 \f$ and + * \f$ \infty \f$, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using plusMin = grb::Semiring< + grb::operators::add< D3, D4, D4 >, + grb::operators::min< D1, D2, D3 >, + grb::identities::zero, grb::identities::infinity + >; + + /** + * The logical-or logical-and semiring. + * + * Uses \em or as the additive commutative monoid and \em and as the + * multiplicative monoid. The identities for each monoid are \em false and + * \em true, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using lorLand = grb::Semiring< + grb::operators::logical_or< D3, D4, D4 >, + grb::operators::logical_and< D1, D2, D3 >, + grb::identities::logical_false, grb::identities::logical_true + >; + + /** + * The Boolean semiring. + * + * Uses \em or as the additive commutative monoid and \em and as the + * multiplicative monoid. The identities for each monoid are \em false and + * \em true, respectively. All domains are fixed to bool. + */ + using boolean = lorLand< bool >; + + /** + * The logical-and logical-or semiring. + * + * Uses \em and as the additive commutative monoid and \em or as the + * multiplicative monoid. The identities for each monoid are \em true and + * em false, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using landLor = grb::Semiring< + grb::operators::logical_and< D3, D3, D4 >, + grb::operators::logical_or< D1, D2, D3 >, + grb::identities::logical_true, grb::identities::logical_false + >; + + /** + * The exclusive-logical-or logical-and semiring. + * + * Uses not-equals as the additive commutative monoid and logical-and + * as the multiplicative monoid. The identities for each monoid are \em false + * and \em true, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using lxorLand = grb::Semiring< + grb::operators::not_equal< D3, D3, D4 >, + grb::operators::logical_and< D1, D2, D3 >, + grb::identities::logical_false, grb::identities::logical_true + >; + + /** + * The not-equals logical-and semiring. + * + * Uses not-equal as the additive commutative monoid and \em and as + * the multiplicative monoid. The identities for each monoid are \em false and + * \em true, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using lneqLand = lxorLand< D1, D2, D3, D4 >; + + /** + * The negated-exclusive-or logical-or semring. + * + * Uses negated exclusive or as the additive commutative monoid and + * \em or as the multiplicative monoid. The identities for each monoid are + * \em true and \em false, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using lxnorLor = grb::Semiring< + grb::operators::equal< D3, D4, D4 >, + grb::operators::logical_or< D1, D2, D3 >, + grb::identities::logical_true, grb::identities::logical_false + >; + + /** + * The equals logical-or semiring. + * + * Uses \em equals as the additive commutative monoid and \em or as the + * multiplicative monoid. The identities for each monoid are \em true and + * \em false, respectively. + * + * The three domains of the multiplicative monoid are: + * + * @tparam D1 The left-hand input domain of the multiplicative monoid + * @tparam D2 The right-hand input domain of the multiplicative monoid + * @tparam D3 The output domain of the multiplicative monoid + * + * The domains of the additive monoid are \a D3 and: + * + * @tparam D4 The right-hand input domain of the additive monoid, as well as + * the output domain of the additive monoid. + */ + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + using leqLor = lxnorLor< D1, D2, D3, D4 >; + + } + } // namespace grb #endif diff --git a/include/graphblas/utils/suppressions.h b/include/graphblas/utils/suppressions.h index 2b7b656af..db2217666 100644 --- a/include/graphblas/utils/suppressions.h +++ b/include/graphblas/utils/suppressions.h @@ -45,6 +45,10 @@ _Pragma( "GCC diagnostic push" ) ;\ _Pragma( "GCC diagnostic ignored \"-Wclass-memaccess\"" );\ + #define GRB_UTIL_IGNORE_INT_IN_BOOL_CONTEXT \ + _Pragma( "GCC diagnostic push" ) ;\ + _Pragma( "GCC diagnostic ignored \"-Wint-in-bool-context\"" );\ + #define GRB_UTIL_RESTORE_WARNINGS \ _Pragma( "GCC diagnostic pop" );\ @@ -54,6 +58,7 @@ #define GRB_UTIL_IGNORE_STRING_TRUNCATION #define GRB_UTIL_IGNORE_CLASS_MEMACCESS #define GRB_UTIL_RESTORE_WARNINGS + #define GRB_UTIL_IGNORE_INT_IN_BOOL_CONTEXT #endif #endif // end ``_H_GRB_UTILS_SUPRESSIONS'' diff --git a/include/transition/fuselets.h b/include/transition/fuselets.h new file mode 100644 index 000000000..dbe208c47 --- /dev/null +++ b/include/transition/fuselets.h @@ -0,0 +1,455 @@ + +/* + * Copyright 2024 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * + * \ingroup TRANS + * + * Provides a set of fused level-1 and level-2 ALP kernels. + * + * The fused kernels are designed to be easily callable from existing code + * bases, using standard data structures such as raw pointers to vectors and the + * Compressed Row Storage (CRS) for sparse matrices. + * + * @author A. N. Yzelman + * @date 27/09/2024 + */ + +/** + * \defgroup TRANS_FUSELETS Fuselets + * \ingroup TRANS + * @{ + * + * While Algebraic Programming (ALP) is a programming model at its core, one + * may also use ALP to generate libraries callable from any application code, + * even if said application is not written using ALP. + * + * Fuselets use the ALP nonblocking backend by Mastoras et al. [1,2] in + * particular to generate a set of fused kernels that mix dense level-1 BLAS + * operations with `level-2' Sparse BLAS operations. The fused kernels exhibit + * higher performance due to enhanced data reuse. An example of a fuselet is a + * dense vector update, followed by a sparse matrix--vector multiplication, + * finally followed by a dot-product of two dense vectors: #update_spmv_dot_dsu. + * + * The exposed API for the fuselets is standard C. The header may also be safely + * included from standard C++. In standard configuration, ALP builds and + * installs both static and dynamic fuselet libraries. + * + * \warning Any application that relies on fuselets is \em strongly encouraged + * to enable standard OpenMP thread pinning (such as, for example, + * achieved by setting the OMP_PROC_BIND environment variable + * to true). + * + * While the presently-implemented fuselets were requested for accelerating a + * pre-existing distributed-memory solver, the implementation of the fuselets + * demonstrates how effectively ALP can be used for code generation. The tiny + * size of the implementation is particularly compelling, with typical gains + * between 50 to 200 percent (1.5-3x), depending on the vector sizes as well as + * the nonzero-to-size ratio [1,2], as well as on the depth of the fuselets. + * + * \note The maximum pipeline depth in the pre-defined fuselets here provided, + * is three. This pipeline depth forms a simple upper bound on achievable + * speedup. + * + * If using fuselets for scientific work, please cite the related papers [1,2]; + * and please refer to the same papers for more details about the nonblocking + * backend: + * + * [1] Design and implementation for nonblocking execution in GraphBLAS: + * tradeoffs and performance by Aristeidis Mastoras, Sotiris Anagnostidis, + * and A. N. Yzelman, ACM Transactions on Architecture and Code Optimization + * (TACO) 20, 1, Article 6 (2023). + * [2] Nonblocking execution in GraphBLAS by Aristeidis Mastoras, Sotiris + * Anagnostidis, and A. N. Yzelman in IPDPSW, pp. 230-233, IEEE (2022). + * + * Users interested in defining their own fuselets may directly edit the + * following files to add any new functions they require: + * - include/transition/fuselets.h, and + * - src/transition/fuselets.cpp. + * After any such edits, simply issue make install again, which will + * update your installed fuselets to include your newly-defined routines, thus + * providing an easily-customisable extensible framework. + * + * For fuselets that take matrix inputs, we here assume standard Compressed Row + * Storage (CRS), also known as Compressed Sparse Rows (CSR). The fuselets + * currently defined use a different postfix to distinguish between the possible + * types one may use for each of the three standard CRS arrays. For example, the + * postfix _dsu indicates a double-precision nonzero value array (d), + * an offset array of type size_t (s), and a column index array of type + * unsigned int (u). The postfix _dii instead indicates arrays + * of type double, int, and int, respectively. + * + * More details about the presently provided variants may be found in the + * following note. + * + * \note The de-facto industry-standard Compressed Row Storage (CRS) comprises + * three arrays: a row offset array, a column index array, and, for + * non-pattern matrices, a value array. The element types of the former + * two arrays can have multiple sensible values: + * - 64-bit unsigned integers for the offset array (s), default-sized + * (usually 32-bit) integers (i), or default-sized unsigned integers + * (u). + * - 64-bit unsigned integers for the column indices (s), default-sized + * (usually 32-bit) integers instead (i), or default-sized unsigned + * integers (u). + * For values, we presently only support double-precision (d). All postfix + * combinations presently supported are _dsu and _dii. + * + * \note Other postfix combinations are easily provided; if unclear how or if a + * presently missing combination should be of wide interest, please create + * a feature requests and/or contact the maintainers. + * + * Typical example work estimation for adding a new fuselets, assuming + * familiarity with the use of ALP and allowing copying code snippets from + * pre-existing fuselets: + * - writing the spec for a new fuselet: 12 minutes + * - implementing the new fuselet: 8 minutes + + * \note This was measured for spmv_dot_norm2. While the code includes proper + * error handling, it does not include time required for writing and + * adding automated tests. + */ + +#ifndef _H_ALP_FUSELETS +#define _H_ALP_FUSELETS + +#include // for size_t + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * Initialisation routine that should be called before calling any fuselets. + * + * A user application shall + * 1. call this function after application start and before calling any + * fuselets, as well as + * 2. call this function after a call to #finalize_fuselets and before any + * subsequent calls to fuselets. + * This function shall not be called in any other case. + * + * \note For example, it is not legal to call this function twice without an + * call to #finalize_fuselets in between. + * + * To ensure proper clean-up before application termination, all calls to this + * function should be matched with a call to #finalize_fuselets. + * + * @returns Zero, if the initialisation has proceeded successfully. + * @returns Any other value, if initialisation has failed. In this case, + * it shall be as though this call had never occurred. In particular, + * any subsequent calls to fuselets shall (thus) induce undefined + * behaviour. + * + * The recommendation is to call this function once and as soon as possible + * after the application main function has started. + */ + int initialize_fuselets(); + + /** + * Cleans up fuselet resources. + * + * It may only be called once after every call to #initialize_fuselets. Cannot + * follow another call to #finalize_fuselets without a call to + * #initialize_fuselets in between. + * + * The recommendation is to call this function once and just before the + * application main function terminates. + */ + int finalize_fuselets(); + + /** + * Computes \f$ v, \beta \f$ from: + * + * - \f$ v = Ay + \alpha v \f$, + * - \f$ \beta = (r,v) \f$. + * + * @param[in,out] v The input and output vector \f$ v \f$ + * @param[out] beta The output scalar \f$ \beta \f$ + * + * The pointer \a v should point to an array, while the pointer \a beta should + * point to a scalar. In the case the initial values of \a v should be ignored, + * set the argument \a alpha to zero. + * + * @param[in] alpha The input scalar \f$ \alpha \f$ + * @param[in] ia The CRS row offset array of \f$ A \f$ + * @param[in] ij The CRS column index array of \f$ A \f$ + * @param[in] iv The CRS value array of \f$ A \f$ + * @param[in] y The input vector \f$ y \f$ + * + * Here, \a alpha is a scalar value. The pointers \a ia, \a ij, and \a iv + * correspond to a CRS of \f$ A \f$. The pointer \a y should point to an array. + * + * @param[in] r The input vector \f$ r \f$ + * + * The pointer \a r should point to an array. + * + * @param[in] n The row-wise \em and column-wise dimension of \f$ A \f$ + * + * The sizes of the arrays pointed to by \a v, \a y, and \a r should have size + * \f$ n \f$. + * + * @returns Zero if and only if the call executed successfully. + * @returns A nonzero error code otherwise. + */ + int spmv_dot_dsu( + double * const v, double * const beta, // outputs + const size_t * const ia, const unsigned int * const ij, + const double * const iv, const double * const y, + const double alpha, // input 1 + const double * const r, // input 2 + const size_t n // size + ); + + /** + * @see spmv_dot_dii + */ + int spmv_dot_dii( + double * const v, double * const beta, + const int * const ia, const int * const ij, + const double * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n + ); + + /** + * Computes \f$ v, \beta, \gamma \f$ from: + * - \f$ v = Ay + \alpha v \f$, + * - \f$ \beta = (v,r) \f$, + * - \f$ \gamma = ||v||_2^2 \f$. + * + * @param[in,out] v The input and output vector \f$ v \f$ + * @param[out] beta The output scalar \f$ \beta \f$ + * @param[out] gamma The output scalar \f$ \gamma \f$ + * + * The pointer \a v should point to an array, while the pointers \a beta and + * \a gamma should point to scalars. In the case the initial values of \a v + * should be ignored, set the argument \a alpha to zero. + * + * @param[in] alpha The input scalar \f$ \alpha \f$ + * @param[in] ia The CRS row offset array of \f$ A \f$ + * @param[in] ij The CRS column index array of \f$ A \f$ + * @param[in] iv The CRS value array of \f$ A \f$ + * @param[in] y The input vector \f$ y \f$ + * + * Here, \a alpha is a scalar value. The pointers \a ia, \a ij, and \a iv + * correspond to a CRS of \f$ A \f$. The pointer \a y should point to an array. + * + * @param[in] r The input vector \f$ r \f$ + * + * The pointer \a r should point to an array. + * + * @param[in] n The row-wise \em and column-wise dimension of \f$ A \f$ + * + * The sizes of the arrays pointed to by \a v, \a y, and \a r should have size + * \f$ n \f$. + * + * @returns Zero if and only if the call executed successfully. + * @returns A nonzero error code otherwise. + */ + int spmv_dot_norm2_dsu( + double * const v, + double * const beta, double * const gamma, // outputs + const size_t * const ia, const unsigned int * const ij, + const double * const iv, const double * const y, + const double alpha, // input 1 + const double * const r, // input 2 + const size_t n // size + ); + + /** @see spmv_dot_norm2_dsu */ + int spmv_dot_norm2_dii( + double * const v, + double * const beta, double * const gamma, + const int * const ia, const int * const ij, + const double * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n + ); + + /** + * Computes \f$ p, u, \alpha \f$ from: + * + * - \f$ p = z + \beta p \f$, + * - \f$ u = Ap \f$, + * - \f$ \alpha = (u,p) \f$. + * + * @param[in,out] p The input and output vector \f$ p \f$ + * @param[out] u The output vector \f$ u \f$ + * @param[out] alpha The output scalar \f$ \alpha \f$ + * + * The pointers \a p and \a u should be pointers to arrays, while \a alpha + * should be a pointer to a scalar. The contents of \a u need \em not be zeroed + * out(!)-- this fuselet will reset the vector. Similarly, the initial value of + * \a alpha will be ignored. + * + * @param[in] z The input vector \f$ z \f$ + * @param[in] beta The input scalar \f$ \beta \f$ + * + * The pointer \a z should point to an array while \a beta is a scalar. + * + * @param[in] ia The CRS row offset array of \f$ A \f$ + * @param[in] ij The CRS column index array of \f$ A \f$ + * @param[in] iv The CRS value array of \f$ A \f$ + * + * The pointers \a ia, \a ij, and \a iv correspond to a CRS of \f$ A \f$. + * + * @param[in] n The row-wise \em and column-wise dimension of \a A + * + * The size of the arrays \a p, \a u, and \a z is \f$ n \f$. The size of the + * array \f$ ia \f$ is \f$ n + 1 \f$. The size of the arrays \a ij and * \a iv + * is ia[n]. + * + * @returns Zero if and only if the call executed successfully. + * @returns A nonzero error code otherwise. + */ + int update_spmv_dot_dsu( + double * const p, double * const u, double * const alpha, // outputs + const double * const z, const double beta, // input 1 + const size_t * const ia, const unsigned int * const ij, + const double * const iv, // input 2 + const size_t n // size + ); + + /** @see update_spmv_dot_dsu */ + int update_spmv_dot_dii( + double * const p, double * const u, double * const alpha, + const double * const z, const double beta, + const int * const ia, const int * const ij, const double * const iv, + const size_t n + ); + + /** + * Computes \f$ x, r, \mathit{norm} \f$ from: + * + * - \f$ x = \alpha p + x \f$, + * - \f$ r = \beta u + r \f$, + * - \f$ \mathit{norm} = ||r||_2^2 \f$. + * + * @param[in,out] x The input and output vector \f$ x \f$ + * @param[in,out] r The input and output vector \f$ r \f$ + * @param[out] norm2 The 2-norm-squared of \f$ r \f$ + * + * The pointers \a x and \a r should be pointers to arrays, while \a norm2 + * should be a pointer to a scalar. The initial value of \a norm2 will be + * ignored. + * + * @param[in] alpha The input scalar \f$ \alpha \f$ + * @param[in] p The input vector \f$ p \f$ + * + * Here, \a alpha is a scalar while \a p is a pointer to an array. + * + * @param[in] beta The input scalar \f$ \beta \f$ + * @param[in] u The input vector \f$ u \f$ + * + * Here, \a beta is a scalar while \a u is a pointer to an array. + * + * @param[in] n The size of the vectors \a x, \a r, \a p, and \a u. + * + * @returns Zero if and only if the call executed successfully. + * @returns A nonzero error code otherwise. + */ + int update_update_norm2( + double * const x, double * const r, double * const norm2, // outputs + const double alpha, const double * const p, // input 1 + const double beta, const double * const u, // input 2 + const size_t n // size + ); + + /** + * Computes \f$ p \f$ from: + * + * - \f$ p = \alpha r + \beta v + \gamma p \f$ + * + * @param[in,out] p The input and output vector \f$ p \f$ + * + * The pointer \a p should be a pointer to an array. + * + * @param[in] alpha The input scalar \f$ \alpha \f$ + * @param[in] r The input vector \f$ r \f$ + * @param[in] beta The input scalar \f$ \beta \f$ + * @param[in] v The input vector \f$ v \f$ + * @param[in] gamma The input scalar \f$ \gamma \f$ + * + * Here, \a alpha, \a beta, \a gamma are scalars while \a p, \a r, and \a v are + * pointers to arrays. + * + * @param[in] n The size of the vectors \a p, \a r, and \a v. + * + * @returns Zero if and only if the call executed successfully. + * @returns A nonzero error code otherwise. + */ + int double_update( + double * const p, // output + const double alpha, const double * const r, // input 1 + const double beta, const double * const v, // input 2 + const double gamma, // input 3 + const size_t n // size + ); + + /** + * Computes \f$ x, r, \theta \f$ from: + * + * - \f$ x = \beta y + \omega z + \alpha x \f$, + * - \f$ r = \eta t + \zeta r \f$, + * - \f$ \theta = ||r||_2^2 \f$. + * + * @param[in,out] x The input and output vector \f$ x \f$ + * @param[in,out] r The input and output vector \f$ r \f$ + * @param[out] theta The output scalar \f$ \theta \f$ + * + * Here, \a x and \a r are pointers to arrays while \a theta is a pointer to + * a scalar. Any initial contents of what \a theta points to, will be ignored. + * If any initial contents of \a x should be ignored, set \a alpha to zero. If + * any initial contents of \a r should be ignored, set \a zeta to zero. + * + * @param[in] beta The input scalar \f$ \beta \f$ + * @param[in] y The input vector \f$ y \f$ + * @param[in] omega The input scalar \f$ \omega \f$ + * @param[in] z The input vector \f$ z \f$ + * @param[in] alpha The input scalar \f$ \alpha \f$ + * + * @param[in] eta The input scalar \f$ \eta \f$ + * @param[in] t The input vector \f$ t \f$ + * @param[in] zeta The input scalar \f$ \zeta \f$ + * + * @param[in] n The vector size (in number of elements). + * + * The sizes of the vectors \a x, \a r, \a y, \a z, and \a t point to, should + * equal \a n. + */ + int doubleUpdate_update_norm2( + double * const x, double * const r, double * const theta, // output + const double beta, const double * const y, + const double omega, const double * const z, + const double alpha, // input 1 + const double eta, const double * const t, + const double zeta, // input 2 + const size_t n // size + ); + +#ifdef __cplusplus +} // end extern "C" +#endif + +#endif // end ifdef _H_ALP_FUSELETS + +/** @} */ // ends doxygen page for fuselets + diff --git a/src/transition/CMakeLists.txt b/src/transition/CMakeLists.txt index cca57f9e1..1eec9a9a8 100644 --- a/src/transition/CMakeLists.txt +++ b/src/transition/CMakeLists.txt @@ -92,5 +92,8 @@ if( WITH_NONBLOCKING_BACKEND ) PRIVATE_LINK_LIBRARIES spsolver_shmem_parallel backend_nonblocking ) endif() + add_transition_library( fuselets STATIC "fuselets" ${CMAKE_CURRENT_SOURCE_DIR}/fuselets.cpp + PRIVATE_LINK_LIBRARIES backend_nonblocking + ) endif() diff --git a/src/transition/fuselets.cpp b/src/transition/fuselets.cpp new file mode 100644 index 000000000..a366fa6d0 --- /dev/null +++ b/src/transition/fuselets.cpp @@ -0,0 +1,669 @@ + +/* + * Copyright 2024 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * + * \ingroup TRANS_FUSELETS + * + * Implements a set of fused level-1 and level-2 ALP kernels. + * + * The fused kernels are designed to be easily callable from existing code + * bases, using standard data structures such as raw pointers to vectors and the + * Compressed Row Storage for sparse matrices. + * + * As a secondary goal, this standard set of fuselets demos the so-called ALP + * native interface, show-casing the ease by which additional fuselets + * that satisfy arbitrary needs can be added. + * + * @author A. N. Yzelman + * @date 27/09/2024 + */ + +#include + +#include +#include + +#include + +#include "fuselets.h" + + +template< typename T > +using MySemiring = grb::semirings::plusTimes< T >; + +template< typename T > +using MyAddMonoid = grb::monoids::plus< T >; + +template< typename T > +using MyTimesMonoid = grb::monoids::times< T >; + +static MySemiring< double > dblSemiring; +static MyAddMonoid< double > dblPlusMonoid; +static MyTimesMonoid< double > dblTimesMonoid; + +int initialize_fuselets() { + const grb::RC rc = grb::init(); + if( rc != grb::SUCCESS ) { + return 255; + } else { + return 0; + } +} + +int finalize_fuselets() { + const grb::RC rc = grb::finalize(); + if( rc != grb::SUCCESS ) { + return 255; + } else { + return 0; + } +} + +template< typename OffsetT, typename IndexT, typename ValueT > +static int spmv_dot( + double * const v, double * const beta, + const OffsetT * const ia, const IndexT * const ij, + const ValueT * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n +) { + // typedef our matrix type, which depends on the above argument types + typedef grb::Matrix< + ValueT, // the matrix value type + grb::config::default_backend, // use the compile-time selected backend + // (set by CMakeLists.txt: nonblocking) + IndexT, IndexT, // the types of the row- and column-indices + OffsetT // the type of the ia array + > MyMatrixType; + + // catch trivial op + if( n == 0 ) { + return 0; + } + + // dynamic checks + assert( v != nullptr ); + assert( beta != nullptr ); + assert( ia != nullptr ); + assert( ij != nullptr ); + assert( iv != nullptr ); + assert( y != nullptr ); + assert( r != nullptr ); + + grb::Vector< double > alp_v = + grb::internal::template wrapRawVector< double >( n, v ); + const MyMatrixType alp_A = grb::internal::wrapCRSMatrix( iv, ij, ia, n, n ); + const grb::Vector< double > alp_y = + grb::internal::template wrapRawVector< double >( n, y ); + + // perform operation 1 + grb::RC rc = grb::SUCCESS; + if( alpha == 0.0 || alpha == -0.0 ) { + rc = grb::set< grb::descriptors::dense >( alp_v, 0.0 ); + } else { + rc = grb::foldr< grb::descriptors::dense >( alpha, alp_v, dblTimesMonoid ); + } + + rc = rc ? rc : grb::mxv< + grb::descriptors::dense | grb::descriptors::force_row_major + >( + alp_v, alp_A, alp_y, dblSemiring + ); + + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets spmv_dot encountered error at operation 1: " + << grb::toString( rc ) << "\n"; + return 10; + } + + // perform operation 2 + const grb::Vector< double > alp_r = + grb::internal::template wrapRawVector< double >( n, r ); + double &alp_beta = *beta; + alp_beta = 0.0; + rc = rc ? rc : grb::dot< grb::descriptors::dense >( + alp_beta, alp_r, alp_v, dblSemiring ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets spmv_dot encountered error at operation 2: " + << grb::toString( rc ) << "\n"; + return 20; + } + + // done + rc = rc ? rc : grb::wait( alp_v ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets spmv_dot encountered error: " + << grb::toString( rc ) << "\n"; + return 255; + } else { + return 0; + } +} + +int spmv_dot_dsu( + double * const v, double * const beta, + const size_t * const ia, const unsigned int * const ij, + const double * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n +) { + return spmv_dot< size_t, unsigned int, double >( + v, beta, ia, ij, iv, y, alpha, r, n ); +} + +int spmv_dot_dii( + double * const v, double * const beta, + const int * const ia, const int * const ij, + const double * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n +) { + return spmv_dot< int, int, double >( + v, beta, ia, ij, iv, y, alpha, r, n ); +} + +template< typename OffsetT, typename IndexT, typename ValueT > +static int spmv_dot_norm2( + double * const v, + double * const beta, double * const gamma, + const OffsetT * const ia, const IndexT * const ij, + const ValueT * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n +) { + // typedef our matrix type, which depends on the above argument types + typedef grb::Matrix< + ValueT, // the matrix value type + grb::config::default_backend, // use the compile-time selected backend + // (set by CMakeLists.txt: nonblocking) + IndexT, IndexT, // the types of the row- and column-indices + OffsetT // the type of the ia array + > MyMatrixType; + + // catch trivial op + if( n == 0 ) { + return 0; + } + + // dynamic checks + assert( v != nullptr ); + assert( beta != nullptr ); + assert( gamma != nullptr ); + assert( ia != nullptr ); + assert( ij != nullptr ); + assert( iv != nullptr ); + assert( y != nullptr ); + assert( r != nullptr ); + + grb::Vector< double > alp_v = + grb::internal::template wrapRawVector< double >( n, v ); + const MyMatrixType alp_A = grb::internal::wrapCRSMatrix( iv, ij, ia, n, n ); + const grb::Vector< double > alp_y = + grb::internal::template wrapRawVector< double >( n, y ); + + // perform operation 1 + grb::RC rc = grb::SUCCESS; + if( alpha == 0.0 || alpha == -0.0 ) { + rc = grb::set< grb::descriptors::dense >( alp_v, 0.0 ); + } else { + rc = grb::foldr< grb::descriptors::dense >( alpha, alp_v, dblTimesMonoid ); + } + + rc = rc ? rc : grb::mxv< + grb::descriptors::dense | grb::descriptors::force_row_major + >( + alp_v, alp_A, alp_y, dblSemiring + ); + + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error at operation 1: " + << grb::toString( rc ) << "\n"; + return 10; + } + + // perform operation 2 + const grb::Vector< double > alp_r = + grb::internal::template wrapRawVector< double >( n, r ); + double &alp_beta = *beta; + alp_beta = 0.0; + rc = rc ? rc : grb::dot< grb::descriptors::dense >( + alp_beta, alp_r, alp_v, dblSemiring ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error at operation 2: " + << grb::toString( rc ) << "\n"; + return 20; + } + + // perform operation 3 + double &alp_gamma = *gamma; + alp_gamma = 0.0; + rc = rc ? rc : grb::dot< grb::descriptors::dense >( + alp_gamma, alp_v, alp_v, dblSemiring ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error at operation 3: " + << grb::toString( rc ) << "\n"; + return 30; + } + + // done + rc = rc ? rc : grb::wait( alp_v ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error: " + << grb::toString( rc ) << "\n"; + return 255; + } else { + return 0; + } +} + +int spmv_dot_norm2_dsu( + double * const v, + double * const beta, double * const gamma, + const size_t * const ia, const unsigned int * const ij, + const double * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n +) { + return spmv_dot_norm2< size_t, unsigned int, double >( + v, beta, gamma, ia, ij, iv, y, alpha, r, n ); +} + +int spmv_dot_norm2_dii( + double * const v, + double * const beta, double * const gamma, + const int * const ia, const int * const ij, + const double * const iv, const double * const y, + const double alpha, + const double * const r, + const size_t n +) { + return spmv_dot_norm2< int, int, double >( + v, beta, gamma, ia, ij, iv, y, alpha, r, n ); +} + +template< typename OffsetT, typename IndexT, typename ValueT > +static int update_spmv_dot( + double * const p, double * const u, double * const alpha, + const double * const z, const double beta, + const OffsetT * const ia, const IndexT * const ij, + const ValueT * const iv, + const size_t n +) { + // typedef our matrix type, which depends on the above argument types + // typedef our matrix type, which depends on the above argument types + typedef grb::Matrix< + ValueT, // the matrix value type + grb::config::default_backend, // use the compile-time selected backend + // (set by CMakeLists.txt: nonblocking) + IndexT, IndexT, // the types of the row- and column-indices + OffsetT // the type of the ia array + > MyMatrixType; + + // catch trivial op + if( n == 0 ) { + return 0; + } + + // simple dynamic sanity checks + assert( p != nullptr ); + assert( u != nullptr ); + assert( alpha != nullptr ); + assert( ia != nullptr ); + assert( ij != nullptr ); + assert( iv != nullptr ); + assert( ij[ n ] / n <= n ); +#ifndef NDEBUG + // we employ defensive programming and perform expensive input checks when + // compiled in debug mode: + for( size_t i = 0; i < n; ++i ) { + assert( ia[ i + 1 ] >= ia[ i ] ); + for( size_t k = ia[ i ]; k < ia[ i + 1 ]; ++k ) { + assert( ij[ k ] < n ); + } + } +#endif + + // get ALP versions of (input/)output containers + grb::Vector< double > alp_p = + grb::internal::template wrapRawVector< double >( n, p ); + grb::Vector< double > alp_u = + grb::internal::template wrapRawVector< double >( n, u ); + double &alp_alpha = *alpha; + + // do first op + const grb::Vector< double > alp_z = + grb::internal::template wrapRawVector< double >( n, z ); + grb::RC ret = grb::foldr< grb::descriptors::dense >( + beta, alp_p, + dblTimesMonoid + ); + ret = ret ? ret : grb::foldr< grb::descriptors::dense >( + alp_z, alp_p, + dblPlusMonoid + ); + if( ret != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 1: " + << grb::toString( ret ) << "\n"; + return 1; + } + + // do second op + const MyMatrixType alp_A = grb::internal::wrapCRSMatrix( iv, ij, ia, n, n ); + ret = grb::set< grb::descriptors::dense >( alp_u, 0.0 ); + ret = ret ? ret : grb::mxv< + grb::descriptors::dense | grb::descriptors::force_row_major + >( alp_u, alp_A, alp_p, dblSemiring ); + if( ret != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 2: " + << grb::toString( ret ) << "\n"; + return 2; + } + + // do third op + alp_alpha = 0.0; + ret = grb::dot< grb::descriptors::dense >( + alp_alpha, alp_u, alp_p, dblSemiring ); + if( ret != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 3: " + << grb::toString( ret ) << "\n"; + return 3; + } + + // done + ret = grb::wait( alp_p, alp_u ); + if( ret == grb::SUCCESS ) { + return 0; + } else { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error: " + << grb::toString( ret ) << "\n"; + return 255; + } +} + +int update_spmv_dot_dsu( + double * const p, double * const u, double * const alpha, + const double * const z, const double beta, + const size_t * const ia, const unsigned int * const ij, + const double * const iv, + const size_t n +) { + return update_spmv_dot< size_t, unsigned int, double >( + p, u, alpha, z, beta, ia, ij, iv, n ); +} + +int update_spmv_dot_dii( + double * const p, double * const u, double * const alpha, + const double * const z, const double beta, + const int * const ia, const int * const ij, const double * const iv, + const size_t n +) { + return update_spmv_dot< int, int, double >( + p, u, alpha, z, beta, ia, ij, iv, n ); +} + +int update_update_norm2( + double * const x, double * const r, double * const norm2, // outputs + const double alpha, const double * const p, // input 1 + const double beta, const double * const u, // input 2 + const size_t n // size +) { + // catch trivial op + if( n == 0 ) { + return 0; + } + + // simple dynamic sanity checks + assert( x != nullptr ); + assert( r != nullptr ); + assert( norm2 != nullptr ); + assert( p != nullptr ); + assert( u != nullptr ); + + // get (input/)output containers + grb::Vector< double > alp_x = + grb::internal::template wrapRawVector< double >( n, x ); + grb::Vector< double > alp_r = + grb::internal::template wrapRawVector< double >( n, r ); + double &alp_norm2 = *norm2; + + // perform operation 1 + const grb::Vector< double > alp_p = + grb::internal::template wrapRawVector< double >( n, p ); + grb::RC ret = grb::eWiseMul< grb::descriptors::dense >( + alp_x, alpha, alp_p, dblSemiring ); + if( ret != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 1: " + << grb::toString( ret ) << "\n"; + return 1; + } + + // perform operation 2 + const grb::Vector< double > alp_u = + grb::internal::template wrapRawVector< double >( n, u ); + ret = grb::eWiseMul< grb::descriptors::dense >( + alp_r, beta, alp_u, dblSemiring ); + if( ret != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 2: " + << grb::toString( ret ) << "\n"; + return 2; + } + + // perform operation 3 + alp_norm2 = 0.0; + ret = grb::dot< grb::descriptors::dense >( + alp_norm2, alp_r, alp_r, dblSemiring ); + if( ret != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 3: " + << grb::toString( ret ) << "\n"; + return 3; + } + + // done + ret = grb::wait( alp_x, alp_r ); + if( ret != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets update_spmv_dot encountered error: " + << grb::toString( ret ) << "\n"; + return 255; + } else { + return 0; + } +} + +int double_update( + double * const p, + const double alpha, const double * const r, + const double beta, const double * const v, + const double gamma, + const size_t n +) { + // check trivial op + if( n == 0 ) { + return 0; + } + + // dynamic checks + assert( p != nullptr ); + assert( r != nullptr ); + assert( v != nullptr ); + + // get container views + grb::Vector< double > alp_p = + grb::internal::template wrapRawVector< double >( n, p ); + const grb::Vector< double > alp_r = + grb::internal::template wrapRawVector< double >( n, r ); + const grb::Vector< double > alp_v = + grb::internal::template wrapRawVector< double >( n, v ); + + grb::RC rc = grb::SUCCESS; + // p = gamma * p + if( gamma != 1.0 ) { + rc = grb::foldr< grb::descriptors::dense >( gamma, alp_p, dblTimesMonoid ); + } else if( gamma == 0.0 || gamma == -0.0 ) { + rc = grb::set( alp_p, 0 ); + } + + if( beta != 0.0 && beta != -0.0 ) { + if( beta != 1.0 ) { + // p = beta .* v + gamma .* p + rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >( + alp_p, beta, alp_v, dblSemiring ); + } else { + // p = v + (gamma .* p) + rc = rc ? rc : grb::foldr< grb::descriptors::dense >( + alp_v, alp_p, dblPlusMonoid ); + } + } + + if( alpha != 0.0 && beta != -0.0 ) { + if( alpha != 1.0 ) { + // p = alpha .* r + beta .* v + gamma .* p + rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >( + alp_p, alpha, alp_r, dblSemiring ); + } else { + // p = r + (beta .* v + gamma .* p) + rc = rc ? rc : grb::foldr< grb::descriptors::dense >( + alp_r, alp_p, dblPlusMonoid ); + } + } + + // done + rc = rc ? rc : grb::wait( alp_p ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets double_update encountered error: " + << grb::toString( rc ) << "\n"; + return 255; + } else { + return 0; + } +} + +int doubleUpdate_update_norm2( + double * const x, double * const r, double * const theta, + const double beta, const double * const y, + const double omega, const double * const z, + const double alpha, + const double eta, const double * const t, + const double zeta, + const size_t n +) { + // check trivial dispatch + if( n == 0 ) { + return 0; + } + + // dynamic checks + assert( x != nullptr ); + assert( r != nullptr ); + assert( theta != nullptr ); + assert( y != nullptr ); + assert( z != nullptr ); + assert( t != nullptr ); + + // get outputs first + grb::Vector< double > alp_x = + grb::internal::template wrapRawVector< double >( n, x ); + grb::Vector< double > alp_r = + grb::internal::template wrapRawVector< double >( n, r ); + double &alp_theta = *theta; + + // do first operation + grb::RC rc = grb::SUCCESS; + const grb::Vector< double > alp_z = + grb::internal::template wrapRawVector< double >( n, z ); + const grb::Vector< double > alp_y = + grb::internal::template wrapRawVector< double >( n, y ); + if( alpha == 0.0 || alpha == -0.0 ) { + rc = grb::set< grb::descriptors::dense >( alp_x, 0.0 ); + } else if( alpha != 1.0 ) { + rc = grb::foldr< grb::descriptors::dense >( alpha, alp_x, dblTimesMonoid ); + } + if( omega != 0.0 && omega != -0.0 ) { + if( omega != 1.0 ) { + rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >( + alp_x, omega, alp_z, dblSemiring ); + } else { + rc = rc ? rc : grb::foldr< grb::descriptors::dense >( + alp_z, alp_x, dblPlusMonoid ); + } + } + if( beta != 0.0 && beta != -0.0 ) { + if( beta != 1.0 ) { + rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >( + alp_x, beta, alp_y, dblSemiring ); + } else { + rc = rc ? rc : grb::foldr< grb::descriptors::dense >( + alp_z, alp_x, dblPlusMonoid ); + } + } + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets doubleUpdate_update_dot " + << "encountered error at operation 1: " << grb::toString( rc ) << "\n"; + return 10; + } + + // perform operation 2: + const grb::Vector< double > alp_t = + grb::internal::template wrapRawVector< double >( n, t ); + if( zeta == 0.0 || zeta == -0.0 ) { + rc = grb::set< grb::descriptors::dense >( alp_r, 0.0 ); + } else if( zeta != 1.0 ) { + rc = grb::foldr< grb::descriptors::dense >( + zeta, alp_r, dblTimesMonoid ); + } + if( eta != 0.0 && eta != -0.0 ) { + if( eta != 1.0 ) { + rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >( + alp_r, eta, alp_t, dblSemiring ); + } else { + rc = rc ? rc : grb::foldr< grb::descriptors::dense >( + alp_t, alp_r, dblPlusMonoid ); + } + } + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets doubleUpdate_update_dot " + << "encountered error at operation 2: " << grb::toString( rc ) << "\n"; + return 20; + } + + // perform last op: + alp_theta = 0.0; + rc = grb::dot< grb::descriptors::dense >( + alp_theta, alp_r, alp_r, dblSemiring ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets doubleUpdate_update_dot " + << "encountered error at operation 3: " << grb::toString( rc ) << "\n"; + return 30; + } + + // done + rc = grb::wait( alp_x, alp_r ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/Fuselets doubleUpdate_update_dot encountered error: " + << grb::toString( rc ) << "\n"; + return 255; + } else { + return 0; + } +} + diff --git a/tests/performance/CMakeLists.txt b/tests/performance/CMakeLists.txt index 732431691..3487cf065 100644 --- a/tests/performance/CMakeLists.txt +++ b/tests/performance/CMakeLists.txt @@ -56,6 +56,11 @@ add_grb_executables( dot-openmp dot.cpp $ ADDITIONAL_LINK_LIBRARIES backend_headers_nodefs OpenMP::OpenMP_CXX ) +add_grb_executables( fuselets_performance fuselets.cpp + BACKENDS reference_omp NO_BACKEND_NAME + ADDITIONAL_LINK_LIBRARIES fuselets +) + add_grb_executables( scaling scaling.cpp ../unit/parser.cpp BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking diff --git a/tests/performance/fuselets.cpp b/tests/performance/fuselets.cpp new file mode 100644 index 000000000..6ed10e7d1 --- /dev/null +++ b/tests/performance/fuselets.cpp @@ -0,0 +1,970 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include //for strtoumax + +#include + +#include + +#include + +#include + + +struct Output { + grb::utils::TimerResults times; + grb::RC error; + size_t reps_used; +}; + +struct Input { + size_t n; + size_t rep; +}; + +template< typename T > +static grb::Matrix< T > setupUpperDiagonalMatrix( const size_t n, const T value ) { + return grb::algorithms::matrices< double >::eye( n, n, value, 1 ); +} + +template< typename T > +static grb::RC setupVectors( + grb::Vector< T > &x, + grb::Vector< T > &y, + grb::Vector< T > &z +) { + grb::RC rc = grb::set< grb::descriptors::use_index >( x, 0.0 ); + rc = rc ? rc : grb::set( y, 0.0 ); + rc = rc ? rc : grb::set( z, 2.0 ); + return rc; +} + +template< typename T > +static void getCRS( + const grb::Matrix< T > &Am, + const T * &av, const size_t * &ai, const unsigned int * &aj +) { + const auto &A = grb::internal::getCRS( Am ); + av = A.values; + ai = A.col_start; + aj = A.row_index; +} + +static grb::RC verifySpMV( const double * const y, const size_t n ) { + if( y[ n - 1 ] != 1.0 ) { + std::cerr << "\t\t error during SpMV output verification (I)\n" + << "\t\t\t expected: 1.0, got: " << y[ n - 1 ] + << " at position " << (n-1) << "\n"; + return grb::FAILED; + } + for( size_t i = 0; i < n - 1; ++i ) { + if( y[ i ] != static_cast< double >( 2 * (i + 1) + 1 ) ) { + std::cerr << "\t\t errpr during SpMV output verification (II)\n" + << "\t\t\t expected: " << ( 2 * ( i + 1 ) + 1 ) << ", got: " << y[ i ] + << " at position " << i << "\n"; + return grb::FAILED; + } + } + return grb::SUCCESS; +} + +void test_spmv_dot( const struct Input &in, struct Output &out ) { + grb::utils::Timer timer; + grb::Semiring< + grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one + > reals; + + // set io time to 0 + out.times.io = 0; + + // start preamble + timer.reset(); + grb::Vector< double > xv( in.n ), yv( in.n ), zv( in.n ); + grb::Matrix< double > Am = setupUpperDiagonalMatrix( in.n, 2.0 ); + grb::RC rc = setupVectors( xv, zv, yv ); + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_spmv_dot: test initialisation FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + + double * const x = xv.raw(), * const y = yv.raw(), * const z = zv.raw(); + const double * av = nullptr; + const size_t * ai = nullptr; const unsigned int * aj = nullptr; + getCRS( Am, av, ai, aj ); + double beta = 0.0; + + rc = rc ? rc : (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED); + + // verify + if( rc == grb::SUCCESS ) { + // A has values 2 above its diagonal + // x is a dense vector with values (0, 1, ..., n-1) + // y is a dense vector of twos + // z is a dense vector of zeroes + // therefore, calling the spmv_dot fuselet with alpha = 0.5 should result in: + // - beta = 0.0 + // - y dense with values(3, 5, 7, ..., 2(n-1), 1) + beta = 1.0; + const int fuselet_rc = spmv_dot_dsu( + y, &beta, + ai, aj, av, + x, + 0.5, z, + in.n + ); + if( fuselet_rc != 0 ) { + std::cerr << "\t test_spmv_dot: verification FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + if( beta != 0.0 || beta != -0.0 ) { + std::cerr << "\t test_spmv_dot: verification FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + out.error = verifySpMV( y, in.n ); + if( out.error != grb::SUCCESS ) { + std::cerr << "\t test_spmv_dot: verification FAILED (III)\n"; + return; + } + } + + out.times.preamble = timer.time(); + // end preamble + + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_spmv_dot: test initialisation FAILED (II)\n"; + out.error = rc; + return; + } + + // benchmark + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + beta = 0.0; + (void) spmv_dot_dsu( + y, &beta, + ai, aj, av, + x, + 0.5, z, + in.n + ); + } + const double fast = timer.time(); + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + (void) grb::foldr< grb::descriptors::dense >( 0.5, yv, + grb::operators::mul< double >() ); + (void) grb::mxv< grb::descriptors::dense >( yv, Am, xv, + grb::semirings::plusTimes< double >() ); + beta = 0.0; + (void) grb::dot< grb::descriptors::dense >( beta, zv, yv, + grb::semirings::plusTimes< double >() ); + (void) grb::wait(); + } + const double slow = timer.time(); + + // record speedup + std::cout << "\t reference_omp: " << slow << " ms.\n\t fuselets: " << fast + << " ms.\n\t (for " << in.rep << " spmv_dots)\n"; + out.times.useful = + static_cast< double >(slow - fast) / static_cast< double >(in.rep); + + // postamble, and done + timer.reset(); + out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC; + out.times.postamble = timer.time(); +} + +void test_spmv_dot_norm2( + const struct Input &in, struct Output &out +) { + grb::utils::Timer timer; + + // I/O phase is empty + out.times.io = 0; + + // start preamble + timer.reset(); + grb::Vector< double > xv( in.n ), yv( in.n ), zv( in.n ); + grb::Matrix< double > Am = setupUpperDiagonalMatrix( in.n, 2.0 ); + grb::RC rc = setupVectors( xv, yv, zv ); + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_spmv_dot_norm2: test initialisation FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + + double * const x = xv.raw(), * const y = yv.raw(), * const z = zv.raw(); + const double * av = nullptr; + const size_t * ai = nullptr; const unsigned int * aj = nullptr; + getCRS( Am, av, ai, aj ); + double beta, gamma; + beta = gamma = 0.0; + + rc = initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED; + + // verify + if( rc == grb::SUCCESS ) { + // A and x are as in the above test + // y is dense with entries 4.23 everywhere + // z is dense with entries twos everywhere + // therefore, as in the above, after spmv_dot_norm2, the output + // - z should be dense with values (3, 5, 7, ..., 2(n-1), 1) + // - beta should be zero + // new to the above, gamma should read norm2-squared of z + beta = 1.13; + gamma = 2.17; + const int fuselet_rc = spmv_dot_norm2_dsu( + z, &beta, &gamma, + ai, aj, av, + x, 0.5, y, + in.n + ); + if( fuselet_rc != 0 ) { + std::cerr << "\t test_spmv_dot_norm2: verification FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + if( beta != 0.0 || beta != -0.0 ) { + std::cerr << "\t test_spmv_dot_norm2: verification FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + double check_gamma = z[ in.n - 1]; + check_gamma *= check_gamma; + for( size_t i = 0; i < in.n - 1; ++i ) { + check_gamma += z[ i ] * z[ i ]; + } + if( !grb::utils::equals( gamma, check_gamma, 2 * in.n - 1) ) { + std::cerr << "\t test_spmv_dot_norm2: verification FAILED (III)\n" + << "\t\t expected: " << check_gamma << ", got: " << gamma << "\n"; + out.error = grb::FAILED; + return; + } + out.error = verifySpMV( z, in.n ); + if( out.error != grb::SUCCESS ) { + std::cerr << "\t test_spmv_dot_norm2: verification FAILED (IV)\n"; + return; + } + } + + out.times.preamble = timer.time(); + // end preamble + + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_spmv_dot_norm2: test initialisation FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + + // benchmark + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + beta = gamma = 0.0; + (void) spmv_dot_norm2_dsu( + z, &beta, &gamma, + ai, aj, av, + x, 0.5, y, + in.n + ); + } + const double fast = timer.time(); + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + grb::semirings::plusTimes< double > plusTimes_FP64; + (void) grb::foldr< grb::descriptors::dense >( 0.5, zv, + grb::operators::mul< double >() ); + (void) grb::mxv< grb::descriptors::dense >( zv, Am, xv, + grb::semirings::plusTimes< double >() ); + beta = gamma = 0.0; + (void) grb::dot< grb::descriptors::dense >( beta, yv, zv, plusTimes_FP64 ); + (void) grb::dot< grb::descriptors::dense >( gamma, zv, zv, plusTimes_FP64 ); + (void) grb::wait(); + } + const double slow = timer.time(); + + // record speedup: + std::cout << "\t test_spmv_dot_norm2 (" << in.rep << " repetitions):\n" + << "\t\t reference_omp: " << slow << " ms.\n" + << "\t\t fuselets: " << fast << " ms.\n"; + out.times.useful = + static_cast< double >(slow - fast) / static_cast< double >(in.rep); + + // postamble, and done + timer.reset(); + out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC; + out.times.postamble = timer.time(); +} + +void test_update_spmv_dot( + const struct Input &in, struct Output &out +) { + grb::utils::Timer timer; + + // I/O phase is empty + out.times.io = 0; + + // start preamble + timer.reset(); + grb::Vector< double > xv( in.n ), yv( in.n ), zv( in.n ); + grb::Matrix< double > Am = setupUpperDiagonalMatrix( in.n, 2.0 ); + grb::RC rc = setupVectors( xv, yv, zv ); + rc = rc ? rc : grb::set< grb::descriptors::dense >( yv, 4.23 ); + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_spmv_dot_norm2: test initialisation FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + + double * const x = xv.raw(), * const y = yv.raw(), * const z = zv.raw(); + const double * av = nullptr; + const size_t * ai = nullptr; const unsigned int * aj = nullptr; + getCRS( Am, av, ai, aj ); + double beta, gamma; + beta = -1.615; + gamma = 1.17; + + rc = initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED; + + // verify + if( rc == grb::SUCCESS ) { + // A and x are as in the above test: + // - A has values 2 above its diagonal + // - x is a dense vector with values (0, 1, ..., n-1) + // y is dense with entries 4.23 everywhere + // z is dense with entries twos everywhere + // therefore, after update_spmv_dot_dsu, the output + // - z should be dense with value one (1.0) everywhere + // - x should be dense with value two (2.0) everywhere + // - except at its last position, which should read zero (0) + // - gamma should read 2.0 * (n-1) + const int fuselet_rc = update_spmv_dot_dsu( + z, x, &gamma, + y, beta, + ai, aj, av, + in.n + ); + if( fuselet_rc != 0 ) { + std::cerr << "\t update_spmv_dot: verification FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + { + bool fail = false; + for( size_t i = 0; i < in.n; ++i ) { + if( !grb::utils::equals( z[ i ], 1.0, 3 ) ) { + fail = true; + std::cerr << "\t\t z[ " << i << " ], expected one, got " + << z[ i ] << "\n"; + } + } + if( fail ) { + std::cerr << "\t update_spmv_dot: verification FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + } + double check_gamma = z[ in.n - 1 ] * x[ in.n - 1 ]; + for( size_t i = 0; i < in.n - 1; ++i ) { + check_gamma += z[ i ] * x[ i ]; + } + if( !grb::utils::equals( gamma, check_gamma, 2 * in.n - 1 ) ) { + std::cerr << "\t update_spmv_dot: verification FAILED (III)\n" + << "\t\t expected: " << check_gamma << ", got: " << gamma << "\n"; + out.error = grb::FAILED; + return; + } + { + bool fail = false; + constexpr double zero = 0.0; + constexpr double two = 2.0; + if( !grb::utils::equals( x[ in.n - 1 ], zero, 4 ) ) { + fail = true; + std::cerr << "\t\t x[ " << (in.n-1) << " ] (last entry) " + << "expected zero, got " << x[ in.n - 1 ] << "\n"; + } + for( size_t i = 0; i < in.n - 1; ++i ) { + if( !grb::utils::equals( x[ i ], two, 4 ) ) { + fail = true; + std::cerr << "\t\t x[ " << i << " ] expected 2.0, got " + << x[ i ] << "\n"; + } + } + if( fail ) { + std::cerr << "\t update_spmv_dot: verification FAILED (IV)\n"; + out.error = grb::FAILED; + return; + } + } + } + + out.times.preamble = timer.time(); + // end preamble + + if( rc != grb::SUCCESS ) { + std::cerr << "\t update_spmv_dot: test initialisation FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + + // benchmark + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + beta = -1.615; + gamma = 1.17; + (void) update_spmv_dot_dsu( + z, x, &gamma, + y, beta, + ai, aj, av, + in.n + ); + } + const double fast = timer.time(); + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + beta = -1.615; + gamma = 1.17; + grb::semirings::plusTimes< double > plusTimes_FP64; + (void) grb::foldr< grb::descriptors::dense >( beta, zv, + grb::operators::mul< double >() ); + (void) grb::foldr< grb::descriptors::dense >( yv, zv, + grb::operators::add< double >() ); + (void) grb::set< grb::descriptors::dense >( xv, 0.0 ); + (void) grb::mxv< grb::descriptors::dense >( xv, Am, zv, plusTimes_FP64 ); + gamma = 0.0; + (void) grb::dot< grb::descriptors::dense >( gamma, zv, xv, plusTimes_FP64 ); + (void) grb::wait(); + } + const double slow = timer.time(); + + // record speedup: + std::cout << "\t test_update_spmv_dot (" << in.rep << " repetitions):\n" + << "\t\t reference_omp: " << slow << " ms.\n" + << "\t\t fuselets: " << fast << " ms.\n"; + out.times.useful = + static_cast< double >(slow - fast) / static_cast< double >(in.rep); + + // postamble, and done + timer.reset(); + out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC; + out.times.postamble = timer.time(); +} + +void test_update_update_norm2( + const struct Input &in, struct Output &out +) { + grb::utils::Timer timer; + + // I/O phase is empty + out.times.io = 0; + + // start preamble + timer.reset(); + grb::Vector< double > xv( in.n ), pv( in.n ), rv( in.n ), uv( in.n ); + grb::RC rc = setupVectors( xv, pv, rv ); + rc = rc ? rc : grb::set( uv, 1.0 ); + double alpha, beta; + alpha = 5.0; + beta = -2.0; + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_update_update_norm2: initalisation FAILED (I)\n"; + out.error = rc; + return; + } + + rc = (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED); + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_update_update_norm2: initialisation FAILED (II)\n"; + out.error = rc; + return; + } + + // On successful initialisation, the vector data are as follows: + // - x is a dense vector (0, 1, 2, ..., in.n-1) + // - p is a dense vector with values zero (0) + // - r is a dense vector with values two (2) + // - u is a dense vector with values one (1) + // hence the expected output of applying the update_update_norm2 fuselet: + // - x is a dense vector (0, 1, 2, ..., in.n-1), + // - r is a dense vector (0, 0, ..., 0), and therefore + // - the norm should be zero also + + // get raw pointers to vector data + double * const x = xv.raw(); + double * const r = rv.raw(); + const double * const p = pv.raw(); + const double * const u = uv.raw(); + + // verify + { + double norm = 50.0; + const int fuselet_rc = update_update_norm2( + x, r, &norm, + alpha, p, + beta, u, + in.n + ); + if( fuselet_rc != 0 ) { + std::cerr << "\t update_update_norm2: verification FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + if( norm != 0.0 || norm != -0.0 ) { + std::cerr << "\t update_update_norm2: verification FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + bool fail = false; + for( size_t i = 0; i < in.n; ++i ) { + constexpr double zero = 0.0; + if( !grb::utils::equals( x[ i ], static_cast< double >(i), 3 ) ) { + fail = true; + std::cerr << "\t\t x[ " << i << " ]: expected " << i << ", got " << x[ i ] + << "\n"; + } + if( !grb::utils::equals( r[ i ], zero, 3 ) ) { + fail = true; + std::cerr << "\t\t r[ " << i << " ]: expected zero, got " << r[ i ] << "\n"; + } + } + if( fail ) { + std::cerr << "\t update_update_norm2: verification FAILED (III)\n"; + out.error = grb::FAILED; + return; + } + } + + // benchmark + { + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + double norm2 = 167; + (void) update_update_norm2( + x, r, &norm2, + alpha, p, + beta, u, + in.n + ); + } + const double fast = timer.time(); + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + double norm2 = 167; + grb::semirings::plusTimes< double > plusTimes_FP64; + (void) grb::eWiseMul< grb::descriptors::dense >( xv, alpha, pv, + plusTimes_FP64 ); + (void) grb::eWiseMul< grb::descriptors::dense >( rv, beta, uv, + plusTimes_FP64 ); + norm2 = 0.0; + (void) grb::dot< grb::descriptors::dense >( norm2, rv, rv, plusTimes_FP64 ); + (void) grb::wait(); + } + + const double slow = timer.time(); + + // record speedup: + std::cout << "\t test_update_update_norm2 (" << in.rep << " repetitions):\n" + << "\t\t reference_omp: " << slow << " ms.\n" + << "\t\t fuselets: " << fast << " ms.\n"; + out.times.useful = + static_cast< double >(slow - fast) / static_cast< double >(in.rep); + } + + // postamble, and done + timer.reset(); + out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC; + out.times.postamble = timer.time(); +} + +void test_double_update( + const struct Input &in, struct Output &out +) { + grb::utils::Timer timer; + + // I/O phase is empty + out.times.io = 0; + + // start preamble + timer.reset(); + grb::Vector< double > rv( in.n ), vv( in.n ), pv( in.n ); + grb::RC rc = setupVectors( rv, vv, pv ); + double alpha, beta, gamma; + alpha = 2.0; + beta = 17.7; + gamma = 0.5; + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_double_update: initalisation FAILED (I)\n"; + out.error = rc; + return; + } + + rc = (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED); + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_double_update: initialisation FAILED (II)\n"; + out.error = rc; + return; + } + + // On successful initialisation, the vector data are as follows: + // - r is a dense vector (0, 1, 2, ..., in.n-1) + // - v is a dense vector with values zero (0) + // - p is a dense vector with values two (2) + // hence the expected output of applying the update_update_norm2 fuselet: + // - p is a dense vector (1, 3, 5, ..., 2*in.n-1) + + // get raw pointers to vector data + double * const p = pv.raw(); + const double * const r = rv.raw(); + const double * const v = vv.raw(); + + // verify + { + const int fuselet_rc = double_update( + p, alpha, r, beta, v, gamma, + in.n + ); + if( fuselet_rc != 0 ) { + std::cerr << "\t double_update: verification FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + bool fail = false; + for( size_t i = 0; i < in.n; ++i ) { + if( !grb::utils::equals( p[ i ], 2*static_cast< double >(i)+1, 5 ) ) { + fail = true; + std::cerr << "\t\t p[ " << i << " ]: expected " + << (i*2+1) << ", got " << p[ i ] << "\n"; + } + } + if( fail ) { + std::cerr << "\t double_update: verification FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + } + + // benchmark + { + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + (void) double_update( + p, alpha, r, beta, v, gamma, + in.n + ); + } + const double fast = timer.time(); + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + grb::semirings::plusTimes< double > plusTimes_FP64; + (void) grb::foldr< grb::descriptors::dense >( gamma, pv, + grb::monoids::times< double >() ); + (void) grb::eWiseMul< grb::descriptors::dense >( pv, alpha, rv, + plusTimes_FP64 ); + (void) grb::eWiseMul< grb::descriptors::dense >( pv, beta, vv, + plusTimes_FP64 ); + (void) grb::wait(); + } + + const double slow = timer.time(); + + // record speedup: + std::cout << "\t test_double_update (" << in.rep << " repetitions):\n" + << "\t\t reference_omp: " << slow << " ms.\n" + << "\t\t fuselets: " << fast << " ms.\n"; + out.times.useful = + static_cast< double >(slow - fast) / static_cast< double >(in.rep); + } + + // postamble, and done + timer.reset(); + out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC; + out.times.postamble = timer.time(); +} + +void test_doubleUpdate_update_norm2( + const struct Input &in, struct Output &out +) { + grb::utils::Timer timer; + + // I/O phase is empty + out.times.io = 0; + + // start preamble + timer.reset(); + grb::Vector< double > xv( in.n ), rv( in.n ), yv( in.n ), zv( in.n ), + tv( in.n ); + grb::RC rc = setupVectors( xv, rv, yv ); + rc = rc ? rc : grb::set( rv, 0.5 ); + rc = rc ? rc : grb::set( zv, 1.0 ); + rc = rc ? rc : grb::set( tv, -1.0 ); + double theta, beta, omega, alpha, eta, zeta; + theta = 17.7; + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_doubleUpdate_update_norm2: initalisation FAILED (I)\n"; + out.error = rc; + return; + } + + rc = (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED); + if( rc != grb::SUCCESS ) { + std::cerr << "\t test_doubleUpdate_update_norm2: initialisation FAILED (II)\n"; + out.error = rc; + return; + } + + // On successful initialisation, the scalar and vector data are as follows: + beta = 0.5; + omega = -1.0; + alpha = 2.0; + eta = 1.0; + zeta = 2.0; + theta = 3.14; + // - x is a dense vector (0, 1, 2, ..., in.n-1) + // - r is a dense vector with values 0.5 everywhere + // - y is a dense vector with values two (2) + // - z is a dense vector with values one (1) + // - t is a dense vector with values minus one (-1) + // hence the expected output of applying the update_update_norm2 fuselet: + // - x is a dense vector (0, 2, ..., 2*in.n-2) + // - r is a dense vector with values zero (0) + // - theta will be zero + + // get raw pointers to vector data + double * const x = xv.raw(); + double * const r = rv.raw(); + const double * const y = yv.raw(); + const double * const z = zv.raw(); + const double * const t = tv.raw(); + + // verify + { + constexpr double zero = 0.0; + const int fuselet_rc = doubleUpdate_update_norm2( + x, r, &theta, + beta, y, + omega, z, + alpha, + eta, t, + zeta, + in.n + ); + if( fuselet_rc != 0 ) { + std::cerr << "\t double_update: verification FAILED (I)\n"; + out.error = grb::FAILED; + return; + } + bool fail = false; + for( size_t i = 0; i < in.n; ++i ) { + if( !grb::utils::equals( x[ i ], 2*static_cast< double >(i), 5 ) ) { + fail = true; + std::cerr << "\t\t x[ " << i << " ]: expected " + << (i*2) << ", got " << x[ i ] << "\n"; + } + if( !grb::utils::equals( r[ i ], zero, 3 ) ) { + fail = true; + std::cerr << "\t\t r[ " << i << " ]: expected zero, got " + << r[ i ] << "\n"; + } + } + if( fail ) { + std::cerr << "\t doubleUpdate_update_norm2: verification FAILED (II)\n"; + out.error = grb::FAILED; + return; + } + if( !grb::utils::equals( theta, zero, 2 * in.n - 1 ) ) { + std::cerr << "\t doubleUpdate_update_norm2: verification FAILED (III)\n" + << "\t\t expected zero, got " << theta << "\n"; + out.error = grb::FAILED; + return; + } + } + + // benchmark + { + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + theta = 3.14; + (void) doubleUpdate_update_norm2( + x, r, &theta, + beta, y, + omega, z, + alpha, + eta, t, + zeta, + in.n + ); + } + const double fast = timer.time(); + + timer.reset(); + for( size_t i = 0; i < in.rep; ++i ) { + grb::semirings::plusTimes< double > plusTimes_FP64; + (void) grb::foldr< grb::descriptors::dense >( alpha, xv, + grb::monoids::times< double >() ); + (void) grb::eWiseMul< grb::descriptors::dense >( xv, omega, zv, + plusTimes_FP64 ); + (void) grb::eWiseMul< grb::descriptors::dense >( xv, beta, yv, + plusTimes_FP64 ); + (void) grb::foldr< grb::descriptors::dense >( zeta, rv, + grb::monoids::times< double >() ); + (void) grb::eWiseMul< grb::descriptors::dense >( rv, eta, tv, + plusTimes_FP64 ); + theta = 3.17; + (void) grb::dot< grb::descriptors::dense >( theta, rv, rv, + plusTimes_FP64 ); + (void) grb::wait(); + } + + const double slow = timer.time(); + + // record speedup: + std::cout << "\t test_doubleUpdate_update_norm2 (" << in.rep << " repetitions):" + << "\n\t\t reference_omp: " << slow << " ms.\n" + << "\t\t fuselets: " << fast << " ms.\n"; + out.times.useful = + static_cast< double >(slow - fast) / static_cast< double >(in.rep); + } + + // postamble, and done + timer.reset(); + out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC; + out.times.postamble = timer.time(); +} + +int main( int argc, char ** argv ) { + // sanity check on program args + if( argc < 2 || argc > 4 ) { + std::cout << "Usage: " << argv[ 0 ] << " (inner iterations) " + << "(outer iterations)" << std::endl; + return 0; + } + std::cout << "Test executable: " << argv[ 0 ] << std::endl; + + // prepare input, output structs + struct Input in; + struct Output out; + + // get vector length + char * end = NULL; + in.n = strtoumax( argv[ 1 ], &end, 10 ); + if( argv[ 1 ] == end ) { + std::cerr << "Could not parse argument " << argv[ 1 ] << " " + << "for vector length." << std::endl; + std::cout << "Test FAILED\n" << std::endl; + return 10; + } + + // get inner number of iterations + in.rep = grb::config::BENCHMARKING::inner(); + if( argc >= 3 ) { + in.rep = strtoumax( argv[ 2 ], &end, 10 ); + if( argv[ 2 ] == end ) { + std::cerr << "Could not parse argument " << argv[ 2 ] << " " + << "for number of inner experiment repititions." << std::endl; + std::cout << "Test FAILED\n" << std::endl; + return 20; + } + } + + // get outer number of iterations + size_t outer = grb::config::BENCHMARKING::outer(); + if( argc >= 4 ) { + outer = strtoumax( argv[ 3 ], &end, 10 ); + if( argv[ 3 ] == end ) { + std::cerr << "Could not parse argument " << argv[ 3 ] << " " + << "for number of outer experiment repititions." << std::endl; + std::cout << "Test FAILED\n" << std::endl; + return 30; + } + } + + // prepare benchmarker + grb::Benchmarker< grb::AUTOMATIC > bench; + + // start tests + std::cout << "\nBenchmark label: spmv_dot of size " << in.n + << std::endl; + grb::RC rc = bench.exec( &(test_spmv_dot), in, out, 1, outer, true ); + + if( rc == grb::SUCCESS ) { + std::cout << "\nBenchmark label: spmv_dot_norm2 of size " << in.n + << std::endl; + rc = bench.exec( &(test_spmv_dot_norm2), in, out, 1, outer, true ); + } + + if( rc == grb::SUCCESS ) { + std::cout << "\nBenchmark label: update_spmv_dot of size " << in.n + << std::endl; + rc = bench.exec( &(test_update_spmv_dot), in, out, 1, outer, true ); + } + + if( rc == grb::SUCCESS ) { + std::cout << "\nBenchmark label: update_update_norm2 of size " << in.n + << std::endl; + rc = bench.exec( &(test_update_update_norm2), in, out, 1, outer, true ); + } + + if( rc == grb::SUCCESS ) { + std::cout << "\nBenchmark label: double_update of size " << in.n + << std::endl; + rc = bench.exec( &(test_double_update), in, out, 1, outer, true ); + } + + if( rc == grb::SUCCESS ) { + std::cout << "\nBenchmark label: doubleUpdate_update_norm2 of size " << in.n + << std::endl; + rc = bench.exec( &(test_doubleUpdate_update_norm2), in, out, 1, outer, true ); + } + + if( rc != grb::SUCCESS ) { + std::cerr << "Test launch failed: " << grb::toString( rc ) << std::endl; + std::cout << "Test FAILED\n" << std::endl; + return EXIT_FAILURE; + } + + if( out.error != grb::SUCCESS ) { + std::cerr << "Functional test exits with nonzero exit code. " + << "Reason: " << grb::toString( out.error ) << "." << std::endl; + std::cout << "Test FAILED\n" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "\nNOTE: please check the above performance figures manually-- " + << "the useful timings should positive, indicating speedup for fuselets vs. " + << "regular blocking execution (for large enough vector lengths)\n"; + + // done + std::cout << "Test OK\n" << std::endl; + return EXIT_SUCCESS; +} + diff --git a/tests/performance/performancetests.sh b/tests/performance/performancetests.sh index f8f198b6a..576b20fc5 100755 --- a/tests/performance/performancetests.sh +++ b/tests/performance/performancetests.sh @@ -196,6 +196,15 @@ if [[ -z $DATASETTORUN && ( -z "$EXPTYPE" || "$EXPTYPE" == "KERNEL" ) ]]; then tail -2 ${TEST_OUT_DIR}/dot-openmp egrep 'label|Overall timings|0,' ${TEST_OUT_DIR}/dot-openmp | grep -v Outer >> ${TEST_OUT_DIR}/benchmarks + echo ">>> [x] [x] Testing fuselets versus standard reference_omp using a" + echo " problem size of 100 000 000." + echo " " + ${TEST_BIN_DIR}/fuselets_performance 100000000 1 30 &> ${TEST_OUT_DIR}/fuselets_performance + head -1 ${TEST_OUT_DIR}/fuselets_performance + grep "Test OK" ${TEST_OUT_DIR}/fuselets_performance || echo "Test FAILED" + echo " " + egrep 'label|Overall timings|0,' ${TEST_OUT_DIR}/fuselets_performance | grep -v Outer >> ${TEST_OUT_DIR}/benchmarks + fi # start definition of helper functions for remainder performance tests diff --git a/tests/performance/spmspm.cpp b/tests/performance/spmspm.cpp index 84e8bf79c..61dcf769a 100644 --- a/tests/performance/spmspm.cpp +++ b/tests/performance/spmspm.cpp @@ -305,7 +305,7 @@ void grbProgram( const struct input &data_in, struct output &out ) { if( s == 0 ) { std::cout << "Info: cold mxm completed" << ". Time taken was " << single_time << " ms. " - << "Deduced inner repetitions parameter of " << out.rep << " " + << "Deduced inner repetitions parameter of " << deduced_inner_reps << " " << "to take 1 second or more per inner benchmark.\n"; out.rep = deduced_inner_reps; } diff --git a/tests/smoke/CMakeLists.txt b/tests/smoke/CMakeLists.txt index 1f99446ee..7e7f2af4a 100644 --- a/tests/smoke/CMakeLists.txt +++ b/tests/smoke/CMakeLists.txt @@ -55,6 +55,11 @@ add_grb_executables( manual_hook_grb_collectives_blas1_raw manual_launcher.cpp BACKENDS bsp1d NO_BACKEND_NAME ) +add_grb_executables( fuselets_smoke ../performance/fuselets.cpp + BACKENDS reference_omp NO_BACKEND_NAME + ADDITIONAL_LINK_LIBRARIES fuselets +) + add_grb_executables( small_knn ../unit/auto_launcher.cpp hook/knn.cpp BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking diff --git a/tests/smoke/smoketests.sh b/tests/smoke/smoketests.sh index 45793fb42..ce6c3ce6b 100755 --- a/tests/smoke/smoketests.sh +++ b/tests/smoke/smoketests.sh @@ -427,6 +427,15 @@ for BACKEND in ${BACKENDS[@]}; do echo " " fi + if [ "$BACKEND" = "reference_omp" ]; then + echo "Non-standard reference-omp specific smoke tests:" + echo " " + echo ">>> [x] [ ] Tests fuselets" + $runner ${TEST_BIN_DIR}/fuselets_smoke 100 1 1 &> ${TEST_OUT_DIR}/fuselets.log + head -1 ${TEST_OUT_DIR}/fuselets.log + grep "Test OK" ${TEST_OUT_DIR}/fuselets.log || echo "Test FAILED" + echo " " + fi done done diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index f7b71cbad..28aa6cb36 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -190,6 +190,14 @@ add_grb_executables( iteratorFilter iteratorFilter.cpp BACKENDS reference NO_BACKEND_NAME ) +add_grb_executables( monoids monoids.cpp + BACKENDS reference NO_BACKEND_NAME +) + +add_grb_executables( semirings semirings.cpp + BACKENDS reference NO_BACKEND_NAME +) + add_grb_executables( RBGaussSeidel RBGaussSeidel.cpp BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking ) diff --git a/tests/unit/monoids.cpp b/tests/unit/monoids.cpp new file mode 100644 index 000000000..d43473158 --- /dev/null +++ b/tests/unit/monoids.cpp @@ -0,0 +1,299 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * + * Tests the default monoid definitions. + * + * @author A. N. Yzelman + * @date 24th of October, 2024 + */ + +#include + +#include + + +template< typename Monoid > +bool runTests() { + + Monoid monoid; + + // get identities (the zeroes) in each input domain + const typename Monoid::D1 d1_zero = + monoid.template getIdentity< typename Monoid::D1 >(); + const typename Monoid::D2 d2_zero = + monoid.template getIdentity< typename Monoid::D2 >(); + + // get nonzeroes in each input domain + // without a semiring structure, we cannot just construct one. What we do here + // instead is use negation of the earlier-retrieved zeroes. + const typename Monoid::D1 d1_nonzero = !(d1_zero); + const typename Monoid::D2 d2_nonzero = !(d2_zero); + + // check zero is an identity under addition, zero left: + { + typename Monoid::D3 tmp; + if( + grb::apply( + tmp, d1_zero, d2_nonzero, monoid.getOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test I\n"; + return false; + } + if( tmp != static_cast< typename Monoid::D3 >( d2_nonzero ) ) { + std::cerr << "Zero in D1 does not act as an identity\n"; + return false; + } + } + + // check zero is an identity under addition, zero right: + { + typename Monoid::D3 tmp; + if( + grb::apply( + tmp, d1_nonzero, d2_zero, monoid.getOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test II\n"; + return false; + } + if( tmp != static_cast< typename Monoid::D3 >( d1_nonzero ) ) { + std::cerr << "Zero in D2 does not act as an identity\n"; + return false; + } + } + + // check commutativity (if applicable) + if( grb::is_commutative< Monoid >::value ) { + typename Monoid::D3 left, right; + if( + grb::apply( + left, d1_zero, d2_nonzero, monoid.getOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test III (1)\n"; + return false; + } + if( + grb::apply( + right, d1_nonzero, d2_zero, monoid.getOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test III (2)\n"; + return false; + } + if( left != right ) { + std::cerr << "Non-commutative behaviour detected " + << "while the commutative type trait was true\n"; + return false; + } + } + + // all OK + return true; +} + +template< + template< typename D1, typename D2 = D1, typename D3 = D2 > + class Monoid +> +bool runTestsAllDomains() { + std::cout << "\t\t testing over doubles:\n"; + if( runTests< Monoid< double > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over floats:\n"; + if( runTests< Monoid< float > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over short ints:\n"; + if( runTests< Monoid< short int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over integers:\n"; + if( runTests< Monoid< int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over 64-bit integers:\n"; + if( runTests< Monoid< int64_t > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over short unsigned integers:\n"; + if( runTests< Monoid< short unsigned int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over unsigned integers:\n"; + if( runTests< Monoid< unsigned int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over size_ts:\n"; + if( runTests< Monoid< size_t > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + return true; +} + +int main( int argc, char ** argv ) { + if( argc > 1 ) { + std::cerr << "This test does not expect any arguments\n" + << "\t Example usage: ./" << argv[ 0 ] << "\n"; + return 1; + } + + std::cout << "This is functional test " << argv[ 0 ] << "\n"; + bool ok = true; + + std::cout << "\t testing grb::monoids::plus...\n"; + if( runTestsAllDomains< grb::monoids::plus >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::add...\n"; + if( runTestsAllDomains< grb::monoids::add >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::times...\n"; + if( runTestsAllDomains< grb::monoids::times >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::mul...\n"; + if( runTestsAllDomains< grb::monoids::mul >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::min...\n"; + if( runTestsAllDomains< grb::monoids::min >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::max...\n"; + if( runTestsAllDomains< grb::monoids::max >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::lor over Booleans...\n"; + if( runTests< grb::monoids::lor< bool > >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::land over Booleans...\n"; + if( runTests< grb::monoids::land< bool > >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::lxor over Booleans...\n"; + if( runTests< grb::monoids::lxor< bool > >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::lneq over Booleans...\n"; + if( runTests< grb::monoids::lneq< bool > >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::lxnor over Booleans...\n"; + if( runTests< grb::monoids::lxnor< bool > >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + std::cout << "\t testing grb::monoids::leq over Booleans...\n"; + if( runTests< grb::monoids::leq< bool > >() ) { + std::cout << "\t OK\n"; + } else { + std::cout << "\t ERR\n"; + ok = false; + } + + // done + if( ok ) { + std::cout << "Test OK\n" << std::endl; + } else { + std::cerr << std::flush; + std::cout << "Test FAILED\n" << std::endl; + } +} + diff --git a/tests/unit/parser.cpp b/tests/unit/parser.cpp index c484ae8cc..28e3f33bd 100644 --- a/tests/unit/parser.cpp +++ b/tests/unit/parser.cpp @@ -15,11 +15,12 @@ * limitations under the License. */ +#include #include +#include // SIZE_MAX #include -#include -#include #include +#include #include #include "graphblas/synchronizedNonzeroIterator.hpp" diff --git a/tests/unit/semirings.cpp b/tests/unit/semirings.cpp new file mode 100644 index 000000000..1dbec312f --- /dev/null +++ b/tests/unit/semirings.cpp @@ -0,0 +1,423 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * + * Tests the default semiring definitions. + * + * @author A. N. Yzelman + * @date 11th of October, 2024 + */ + +#include +GRB_UTIL_IGNORE_INT_IN_BOOL_CONTEXT + +#include + +#include + + +template< typename Semiring > +bool runTests() { + + Semiring ring; + + // check zero annihilates one under multiplication, zero left: + { + typename Semiring::D3 tmp; + if( + grb::apply( + tmp, + ring.template getZero< typename Semiring::D1 >(), + ring.template getOne< typename Semiring::D2 >(), + ring.getMultiplicativeOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test I\n"; + return false; + } + if( tmp != ring.template getZero< typename Semiring::D3 >() ) { + std::cerr << "Zero in D1 does not annihilate one in D2\n"; + return false; + } + } + + // check zero annihilates one under multiplication, zero right: + { + typename Semiring::D3 tmp; + if( + grb::apply( + tmp, + ring.template getOne< typename Semiring::D1 >(), + ring.template getZero< typename Semiring::D2 >(), + ring.getMultiplicativeOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test II\n"; + return false; + } + if( tmp != ring.template getZero< typename Semiring::D3 >() ) { + std::cerr << "Zero in D2 does not annihilate one in D1\n"; + return false; + } + } + + // check zero is an identity under addition, zero left: + { + typename Semiring::D4 tmp; + if( + grb::apply( + tmp, + ring.template getZero< typename Semiring::D3 >(), + ring.template getOne< typename Semiring::D4 >(), + ring.getAdditiveOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test III\n"; + return false; + } + if( tmp != ring.template getOne< typename Semiring::D4 >() ) { + std::cerr << "Zero in D3 does not act as an identity under addition\n"; + return false; + } + } + + // check zero is an identity under addition, zero right: + { + typename Semiring::D4 tmp; + if( + grb::apply( + tmp, + ring.template getOne< typename Semiring::D3 >(), + ring.template getZero< typename Semiring::D4 >(), + ring.getAdditiveOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test IV\n"; + return false; + } + if( tmp != ring.template getOne< typename Semiring::D4 >() ) { + std::cerr << "Zero in D4 does not act as an identity under addition\n"; + return false; + } + } + + // check one is an identity under multiplication: + { + typename Semiring::D3 tmp; + if( + grb::apply( + tmp, + ring.template getOne< typename Semiring::D1 >(), + ring.template getOne< typename Semiring::D2 >(), + ring.getMultiplicativeOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test V\n"; + return false; + } + if( tmp != ring.template getOne< typename Semiring::D3 >() ) { + std::cerr << "One does not act as identity under multiplication\n"; + return false; + } + } + + // check distributive property: + { + typename Semiring::D4 tmp1; + grb::RC rc = grb::apply( + tmp1, + ring.template getOne< typename Semiring::D3 >(), + ring.template getOne< typename Semiring::D4 >(), + ring.getAdditiveOperator() + ); + typename Semiring::D3 chk1; + rc = rc ? rc : grb::apply( + chk1, + ring.template getOne< typename Semiring::D1 >(), + static_cast< typename Semiring::D2 >(tmp1), + ring.getMultiplicativeOperator() + ); + typename Semiring::D3 tmp2, tmp3; + rc = rc ? rc : grb::apply( + tmp2, + ring.template getOne< typename Semiring::D1 >(), + ring.template getOne< typename Semiring::D2 >(), + ring.getMultiplicativeOperator() + ); + rc = rc ? rc : grb::apply( + tmp3, + ring.template getOne< typename Semiring::D1 >(), + ring.template getOne< typename Semiring::D2 >(), + ring.getMultiplicativeOperator() + ); + typename Semiring::D3 chk2; + rc = rc ? rc : grb::apply( + chk2, + tmp2, + static_cast< typename Semiring::D4 >(tmp3), + ring.getAdditiveOperator() + ); + if( rc != grb::SUCCESS ) { + std::cerr << "Unexpected error in test VI\n"; + return false; + } + if( chk1 != chk2 ) { + std::cerr << "The distributative property does not hold\n"; + return false; + } + } + + // check commutativity of additive monoid + { + typename Semiring::D4 left, right; + if( + grb::apply( + left, + ring.template getZero< typename Semiring::D3 >(), + ring.template getOne< typename Semiring::D4 >(), + ring.getAdditiveOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test VII (1)\n"; + return false; + } + if( + grb::apply( + right, + ring.template getOne< typename Semiring::D3 >(), + ring.template getZero< typename Semiring::D4 >(), + ring.getAdditiveOperator() + ) != grb::SUCCESS + ) { + std::cerr << "Unexpected error in test VII (2)\n"; + return false; + } + if( left != right ) { + std::cerr << "Non-commutative behaviour of the additive monoid detected\n"; + return false; + } + } + + // all OK + return true; +} + +template< + template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 > + class Semiring +> +bool runTestsAllDomains() { + std::cout << "\t\t testing over doubles:\n"; + if( runTests< Semiring< double > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over floats:\n"; + if( runTests< Semiring< float > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over short ints:\n"; + if( runTests< Semiring< short int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over integers:\n"; + if( runTests< Semiring< int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over 64-bit integers:\n"; + if( runTests< Semiring< int64_t > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over short unsigned integers:\n"; + if( runTests< Semiring< short unsigned int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over unsigned integers:\n"; + if( runTests< Semiring< unsigned int > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + std::cout << "\t\t testing over size_ts:\n"; + if( runTests< Semiring< size_t > >() ) { + std::cout << "\t\t OK\n"; + } else { + std::cout << "\t\t ERR\n"; + return false; + } + + return true; +} + +int main( int argc, char ** argv ) { + if( argc > 1 ) { + std::cerr << "This test does not expect any arguments\n" + << "\t Example usage: ./" << argv[ 0 ] << "\n"; + return 1; + } + + std::cout << "This is functional test " << argv[ 0 ] << "\n"; + bool ok = true; + + std::cout << "\t testing grb::semirings::plusTimes...\n"; + if( runTestsAllDomains< grb::semirings::plusTimes >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::minPlus...\n"; + if( runTestsAllDomains< grb::semirings::minPlus >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::maxPlus over integers:\n"; + if( runTests< grb::semirings::maxPlus< int > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::maxPlus over doubles:\n"; + if( runTests< grb::semirings::maxPlus< double > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::minTimes over unsigned integers:\n"; + if( runTests< grb::semirings::minTimes< unsigned int > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::minMax...\n"; + if( runTestsAllDomains< grb::semirings::minMax >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::maxMin...\n"; + if( runTestsAllDomains< grb::semirings::maxMin >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::maxTimes over size_ts:\n"; + if( runTests< grb::semirings::maxTimes< size_t > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::plusMin over unsigned integers:\n"; + if( runTests< grb::semirings::plusMin< unsigned int > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::lorLand over Booleans:\n"; + if( runTests< grb::semirings::lorLand< bool > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::boolean:\n"; + if( runTests< grb::semirings::boolean >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::landLor over Booleans:\n"; + if( runTests< grb::semirings::landLor< bool > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::lxorLand over Booleans:\n"; + if( runTests< grb::semirings::lxorLand< bool > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::lneqLand over Booleans:\n"; + if( runTests< grb::semirings::lneqLand< bool > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::lxnorLor over Booleans:\n"; + if( runTests< grb::semirings::lxnorLor< bool > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + std::cout << "\t testing grb::semirings::leqLor over Booleans:\n"; + if( runTests< grb::semirings::leqLor< bool > >() ) { + std::cout << "\t OK\n"; + } else { + ok = false; + } + + // done + if( ok ) { + std::cout << "Test OK\n" << std::endl; + } else { + std::cerr << std::flush; + std::cout << "Test FAILED\n" << std::endl; + } +} + diff --git a/tests/unit/unittests.sh b/tests/unit/unittests.sh index 8da87a458..592c77870 100755 --- a/tests/unit/unittests.sh +++ b/tests/unit/unittests.sh @@ -51,6 +51,18 @@ for MODE in ${MODES}; do echo " field (double, integers, and floats)" ${TEST_BIN_DIR}/mul15m_${MODE} + echo ">>> [x] [ ] Testing pre-defined monoids" + ${TEST_BIN_DIR}/monoids_${MODE} &> ${TEST_OUT_DIR}/monoids_${MODE}.log + head -1 ${TEST_OUT_DIR}/monoids_${MODE}.log + grep 'Test OK' ${TEST_OUT_DIR}/monoids_${MODE}.log || echo "Test FAILED" + echo " " + + echo ">>> [x] [ ] Testing pre-defined semirings" + ${TEST_BIN_DIR}/semirings_${MODE} &> ${TEST_OUT_DIR}/semirings_${MODE}.log + head -1 ${TEST_OUT_DIR}/semirings_${MODE}.log + grep 'Test OK' ${TEST_OUT_DIR}/semirings_${MODE}.log || echo "Test FAILED" + echo " " + echo ">>> [x] [ ] Tests the built-in parser on the west0497 MatrixMarket file" if [ -f ${INPUT_DIR}/west0497.mtx ]; then ${TEST_BIN_DIR}/parserTest_${MODE} ${INPUT_DIR}/west0497.mtx 2> ${TEST_OUT_DIR}/parserTest_${MODE}.err 1> ${TEST_OUT_DIR}/parserTest_${MODE}.out