From 55d74ec6de8a26dda072a7ffae2cb3ef6e5648b1 Mon Sep 17 00:00:00 2001
From: anyzelman <101567333+anyzelman@users.noreply.github.com>
Date: Thu, 9 Jan 2025 00:12:09 +0100
Subject: [PATCH] 338 request for fuselets (#339)

This MR provides so-called _fuselets_.

Fuselets are sequences of operations that may occur in existing code bases, and could be accelerated using the nonblocking backend. A fuselet represents such a sequence of operations, which are exposed as a single plain-C function call. ALP during its standard build will generate a library of those fuselets using its nonblocking backend, thus resulting in high-performance, automatically fused kernels.

The fuselets furthermore, at run-time and again automatically, tunes performance parameters such as tile sizes and the number of threads to be used.

Finally, the example fuselets provided in `include/transition/fuselets.h` and `src/transition/fuselets.cpp` demonstrate how easy it is to use ALP to provide arbitrary fuselets that existing code bases may require, and are easily modified or extended.

This MR also includes:
 - smoke and performance tests for the new fuselets;
 - standard semiring and monoid definitions in the new `grb::monoids` and `grb::semirings` namespaces;
 - tests for those standard semirings and monoids (insofar possible);
 - a bugfix in the stdout of the spmspm unit test;
 - a bugfix for missing cstdint includes in various headers; and
 - as always, minor code style fixes.
---
 include/graphblas.hpp                         |   6 +
 include/graphblas/base/io.hpp                 |   2 +
 include/graphblas/hyperdags/hyperdags.hpp     |   1 +
 include/graphblas/identities.hpp              | 224 ++--
 include/graphblas/monoid.hpp                  | 314 ++++++
 include/graphblas/nonblocking/coordinates.hpp |   4 +-
 include/graphblas/reference/coordinates.hpp   |   1 +
 include/graphblas/reference/init.hpp          |   1 +
 include/graphblas/reference/io.hpp            |   2 +-
 include/graphblas/semiring.hpp                | 421 ++++++++
 include/graphblas/utils/suppressions.h        |   5 +
 include/transition/fuselets.h                 | 455 ++++++++
 src/transition/CMakeLists.txt                 |   3 +
 src/transition/fuselets.cpp                   | 669 ++++++++++++
 tests/performance/CMakeLists.txt              |   5 +
 tests/performance/fuselets.cpp                | 970 ++++++++++++++++++
 tests/performance/performancetests.sh         |   9 +
 tests/performance/spmspm.cpp                  |   2 +-
 tests/smoke/CMakeLists.txt                    |   5 +
 tests/smoke/smoketests.sh                     |   9 +
 tests/unit/CMakeLists.txt                     |   8 +
 tests/unit/monoids.cpp                        | 299 ++++++
 tests/unit/parser.cpp                         |   5 +-
 tests/unit/semirings.cpp                      | 423 ++++++++
 tests/unit/unittests.sh                       |  12 +
 25 files changed, 3767 insertions(+), 88 deletions(-)
 create mode 100644 include/transition/fuselets.h
 create mode 100644 src/transition/fuselets.cpp
 create mode 100644 tests/performance/fuselets.cpp
 create mode 100644 tests/unit/monoids.cpp
 create mode 100644 tests/unit/semirings.cpp

diff --git a/include/graphblas.hpp b/include/graphblas.hpp
index 029f77bf7..dd2f63102 100644
--- a/include/graphblas.hpp
+++ b/include/graphblas.hpp
@@ -170,6 +170,12 @@
  *  -# #grb::Monoid, and
  *  -# #grb::Semiring.
  *
+ * A list of standard operators, monoids, and semirings are provided in their
+ * respective name spaces:
+ *  -# #grb::operators,
+ *  -# #grb::monoids, and
+ *  -# #grb::semirings.
+ *
  * Binary operators are parametrised in two input domains and one output domain,
  * \f$ D_1 \times D_2 \to D_3 \f$. The \f$ D_i \f$ are given as template
  * arguments to the operator. A #grb::Monoid is composed from a binary operator
diff --git a/include/graphblas/base/io.hpp b/include/graphblas/base/io.hpp
index 0a33a556a..0c0bab768 100644
--- a/include/graphblas/base/io.hpp
+++ b/include/graphblas/base/io.hpp
@@ -41,6 +41,8 @@
 
 #include <assert.h>
 
+#include <cstdint> // for uintptr_t
+
 
 namespace grb {
 
diff --git a/include/graphblas/hyperdags/hyperdags.hpp b/include/graphblas/hyperdags/hyperdags.hpp
index 5ecc4dbef..2ae951565 100644
--- a/include/graphblas/hyperdags/hyperdags.hpp
+++ b/include/graphblas/hyperdags/hyperdags.hpp
@@ -30,6 +30,7 @@
 #include <map>
 #include <set>
 #include <vector>
+#include <cstdint> // uintptr_t
 #include <ostream>
 #include <iostream>
 #include <type_traits>
diff --git a/include/graphblas/identities.hpp b/include/graphblas/identities.hpp
index fdbb7c7f7..837b1a5ec 100644
--- a/include/graphblas/identities.hpp
+++ b/include/graphblas/identities.hpp
@@ -55,91 +55,130 @@ namespace grb {
 		/** Standard identity for numerical addition. */
 		template< typename D >
 		class zero {
-			static_assert( std::is_convertible< int, D >::value, "Cannot form identity under the requested domain" );
-
-		public:
-			/**
-			 * @tparam D The domain of the value to return.
-			 * @return The identity under standard addition (i.e., `zero').
-			 */
-			static constexpr D value() {
-				return static_cast< D >( 0 );
-			}
+
+			static_assert( std::is_convertible< int, D >::value,
+				"Cannot form identity under the requested domain" );
+
+			public:
+
+				/**
+				 * @tparam D The domain of the value to return.
+				 * @return The identity under standard addition (i.e., `zero').
+				 */
+				static constexpr D value() {
+					return static_cast< D >( 0 );
+				}
+
 		};
+
 		template< typename K, typename V >
 		class zero< std::pair< K, V > > {
-		public:
-			static constexpr std::pair< K, V > value() {
-				return std::make_pair( zero< K >::value(), zero< V >::value() );
-			}
+
+			public:
+
+				static constexpr std::pair< K, V > value() {
+					return std::make_pair( zero< K >::value(), zero< V >::value() );
+				}
+
 		};
 
 		/** Standard identity for numerical multiplication. */
 		template< typename D >
 		class one {
-			static_assert( std::is_convertible< int, D >::value, "Cannot form identity under the requested domain" );
-
-		public:
-			/**
-			 * @tparam D The domain of the value to return.
-			 * @return The identity under standard multiplication (i.e., `one').
-			 */
-			static constexpr D value() {
-				return static_cast< D >( 1 );
-			}
+
+			static_assert( std::is_convertible< int, D >::value,
+				"Cannot form identity under the requested domain" );
+
+			public:
+
+				/**
+				 * @tparam D The domain of the value to return.
+				 * @return The identity under standard multiplication (i.e., `one').
+				 */
+				static constexpr D value() {
+					return static_cast< D >( 1 );
+				}
+
 		};
+
 		template< typename K, typename V >
 		class one< std::pair< K, V > > {
-		public:
-			static constexpr std::pair< K, V > value() {
-				return std::make_pair( one< K >::value(), one< V >::value() );
-			}
+
+			public:
+
+				static constexpr std::pair< K, V > value() {
+					return std::make_pair( one< K >::value(), one< V >::value() );
+				}
+
 		};
 
 		/** Standard identity for the minimum operator. */
 		template< typename D >
 		class infinity {
-			static_assert( std::is_arithmetic< D >::value, "Cannot form identity under the requested domain" );
 
-		public:
-			/**
-			 * @tparam D The domain of the value to return.
-			 * @return The identity under the standard min operator (i.e., `infinity'),
-			 *         of type \a D.
-			 */
-			static constexpr D value() {
-				return std::numeric_limits< D >::has_infinity ? std::numeric_limits< D >::infinity() : std::numeric_limits< D >::max();
-			}
+			static_assert( std::is_arithmetic< D >::value,
+				"Cannot form identity under the requested domain" );
+
+			public:
+
+				/**
+				 * @tparam D The domain of the value to return.
+				 * @return The identity under the standard min operator (i.e., `infinity'),
+				 *         of type \a D.
+				 */
+				static constexpr D value() {
+					return std::numeric_limits< D >::has_infinity
+						? std::numeric_limits< D >::infinity()
+						: std::numeric_limits< D >::max();
+				}
+
 		};
+
 		template< typename K, typename V >
 		class infinity< std::pair< K, V > > {
-		public:
-			static constexpr std::pair< K, V > value() {
-				return std::make_pair( infinity< K >::value(), infinity< V >::value() );
-			}
+
+			public:
+
+				static constexpr std::pair< K, V > value() {
+					return std::make_pair( infinity< K >::value(), infinity< V >::value() );
+				}
+
 		};
 
 		/** Standard identity for the maximum operator. */
 		template< typename D >
 		class negative_infinity {
+
 			static_assert( std::is_arithmetic< D >::value, "Cannot form identity under the requested domain" );
 
-		public:
-			/**
-			 * @tparam D The domain of the value to return.
-			 * @return The identity under the standard max operator, i.e.,
-			 *         `minus infinity'.
-			 */
-			static constexpr D value() {
-				return std::numeric_limits< D >::min() == 0 ? 0 : ( std::numeric_limits< D >::has_infinity ? -std::numeric_limits< D >::infinity() : std::numeric_limits< D >::min() );
-			}
+			public:
+
+				/**
+				 * @tparam D The domain of the value to return.
+				 * @return The identity under the standard max operator, i.e.,
+				 *         `minus infinity'.
+				 */
+				static constexpr D value() {
+					return std::numeric_limits< D >::min() == 0
+						? 0
+						: ( std::numeric_limits< D >::has_infinity
+							? -std::numeric_limits< D >::infinity()
+							: std::numeric_limits< D >::min()
+						);
+				}
+
 		};
+
 		template< typename K, typename V >
 		class negative_infinity< std::pair< K, V > > {
-		public:
+
+			public:
+
 			static constexpr std::pair< K, V > value() {
-				return std::make_pair( negative_infinity< K >::value(), negative_infinity< V >::value() );
+				return std::make_pair( negative_infinity< K >::value(),
+					negative_infinity< V >::value() );
 			}
+
 		};
 
 		/**
@@ -149,24 +188,33 @@ namespace grb {
 		 */
 		template< typename D >
 		class logical_false {
-			static_assert( std::is_convertible< bool, D >::value, "Cannot form identity under the requested domain" );
-
-		public:
-			/**
-			 * @tparam D The domain of the value to return.
-			 * @return The identity under the standard logical OR operator, i.e.,
-			 *         \a false.
-			 */
-			static const constexpr D value() {
-				return static_cast< D >( false );
-			}
+
+			static_assert( std::is_convertible< bool, D >::value,
+				"Cannot form identity under the requested domain" );
+
+			public:
+
+				/**
+				 * @tparam D The domain of the value to return.
+				 * @return The identity under the standard logical OR operator, i.e.,
+				 *         \a false.
+				 */
+				static const constexpr D value() {
+					return static_cast< D >( false );
+				}
+
 		};
+
 		template< typename K, typename V >
 		class logical_false< std::pair< K, V > > {
-		public:
-			static constexpr std::pair< K, V > value() {
-				return std::make_pair( logical_false< K >::value(), logical_false< V >::value() );
-			}
+
+			public:
+
+				static constexpr std::pair< K, V > value() {
+					return std::make_pair( logical_false< K >::value(),
+						logical_false< V >::value() );
+				}
+
 		};
 
 		/**
@@ -176,27 +224,37 @@ namespace grb {
 		 */
 		template< typename D >
 		class logical_true {
-			static_assert( std::is_convertible< bool, D >::value, "Cannot form identity under the requested domain" );
-
-		public:
-			/**
-			 * @tparam D The domain of the value to return.
-			 * @return The identity under the standard logical AND operator, i.e.,
-			 *         \a true.
-			 */
-			static constexpr D value() {
-				return static_cast< D >( true );
-			}
+
+			static_assert( std::is_convertible< bool, D >::value,
+				"Cannot form identity under the requested domain" );
+
+			public:
+
+				/**
+				 * @tparam D The domain of the value to return.
+				 * @return The identity under the standard logical AND operator, i.e.,
+				 *         \a true.
+				 */
+				static constexpr D value() {
+					return static_cast< D >( true );
+				}
+
 		};
+
 		template< typename K, typename V >
 		class logical_true< std::pair< K, V > > {
-		public:
-			static constexpr std::pair< K, V > value() {
-				return std::make_pair( logical_true< K >::value(), logical_true< V >::value() );
-			}
+
+			public:
+
+				static constexpr std::pair< K, V > value() {
+					return std::make_pair( logical_true< K >::value(),
+						logical_true< V >::value() );
+				}
+
 		};
 
 	} // namespace identities
+
 } // namespace grb
 
 #endif
diff --git a/include/graphblas/monoid.hpp b/include/graphblas/monoid.hpp
index 0130a8bb6..c055b80cd 100644
--- a/include/graphblas/monoid.hpp
+++ b/include/graphblas/monoid.hpp
@@ -144,6 +144,320 @@ namespace grb {
 			> >::value;
 	};
 
+	// after all of the standard definitions, declare some standard monoids
+
+	/**
+	 * A name space that contains a set of standard monoids.
+	 *
+	 * Standard monoids include:
+	 *  - #plus, for numerical addition
+	 *  - #times, for numerical multiplication
+	 *  - #min, for the minimum relation
+	 *  - #max, for the maximum relation
+	 *  - #lor, for the logical-or relation
+	 *  - #land, for the logical-and relation
+	 *  - #lxor, for the exclusive-or relation
+	 *  - #lxnor, for the negated exclusive-or relation.
+	 *
+	 * \note In the above, the prefix letter <tt>l</tt> stands for \em logical,
+	 *       e.g., <tt>lor</tt> stands for logical-or.
+	 *
+	 * There are also a couple of aliases to match different preferences:
+	 *  - #add (same as #plus),
+	 *  - #mul (same as #times),
+	 *  - #lneq (same as #lxor), and
+	 *  - #leq (same as #lxnor).
+	 *
+	 * \note The #min and #max monoids have different identities depending on the
+	 *       domain. The standard monoids defined here auto-adapt to the correct
+	 *       identity.
+	 */
+	namespace monoids {
+
+		/**
+		 * The plus monoid.
+		 *
+		 * Uses \em addition (plus) as the operator, and zero as its identity.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 *
+		 * @see #add.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using plus = grb::Monoid<
+			grb::operators::add< D1, D2, D3 >,
+			grb::identities::zero
+		>;
+
+		/**
+		 * The times monoid.
+		 *
+		 * Uses \em multiplication (times) as the operator, and one as its identity.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * @see #mul.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using times = grb::Monoid<
+			grb::operators::mul< D1, D2, D3 >,
+			grb::identities::one
+		>;
+
+		/**
+		 * This is an alias of #plus.
+		 *
+		 * @see #add.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using add = plus< D1, D2, D3 >;
+
+		/**
+		 * This is an alias of #times.
+		 *
+		 * @see #times.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using mul = times< D1, D2, D3 >;
+
+		/**
+		 * The min monoid.
+		 *
+		 * Uses \em min as the operator. If the domain is floating-point, uses
+		 * infinity as its identity; if the domain is integer, uses its maximum
+		 * representable value as the identity of this monoid.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using min = grb::Monoid<
+			grb::operators::min< D1, D2, D3 >,
+			grb::identities::infinity
+		>;
+
+		/**
+		 * The max monoid.
+		 *
+		 * Uses \em max as the operator. If the domain is floating-point, uses
+		 * negative infinity (\f$ -\infty \f$) as its identity; if the domain is
+		 * integer, uses its minimum representable value as the identity of this
+		 * monoid.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using max = grb::Monoid<
+			grb::operators::max< D1, D2, D3 >,
+			grb::identities::negative_infinity
+		>;
+
+		/**
+		 * The logical-or monoid.
+		 *
+		 * Uses \em logical-or as the operator and <tt>false</tt> as its identity.
+		 *
+		 * If the domain is non-boolean, inputs will be cast to a Boolean before the
+		 * operator is invoked, while the result will be cast to the target domain on
+		 * output.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using lor = grb::Monoid<
+			grb::operators::logical_or< D1, D2, D3 >,
+			grb::identities::logical_false
+		>;
+
+		/**
+		 * The logical-and monoid.
+		 *
+		 * Uses \em logical-and as the operator and <tt>true</tt> as its identity.
+		 *
+		 * If the domain is non-boolean, inputs will be cast to a Boolean before the
+		 * operator is invoked, while the result will be cast to the target domain on
+		 * output.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using land = grb::Monoid<
+			grb::operators::logical_and< D1, D2, D3 >,
+			grb::identities::logical_true
+		>;
+
+		/**
+		 * The logical-exclusive-or monoid.
+		 *
+		 * Uses \em logical-exclusive-or as the operator and <tt>false</tt> as its
+		 * identity.
+		 *
+		 * If the domain is non-boolean, inputs will be cast to a Boolean before the
+		 * operator is invoked, while the result will be cast to the target domain on
+		 * output.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 *
+		 * @see #lneq.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using lxor = grb::Monoid<
+			grb::operators::not_equal< D1, D2, D3 >,
+			grb::identities::logical_false
+		>;
+
+		/**
+		 * The logical-not-equals monoid.
+		 *
+		 * Uses \em logical-not-equals as the operator and <tt>false</tt> as its
+		 * identity.
+		 *
+		 * If the domain is non-boolean, inputs will be cast to a Boolean before the
+		 * operator is invoked, while the result will be cast to the target domain on
+		 * output.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 *
+		 * @see #lxor.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using lneq = lxor< D1, D2, D3 >;
+
+		/**
+		 * The logical-negated-exclusive-or monoid.
+		 *
+		 * Uses \em logical-negated-exclusive-or as the operator and <tt>true</tt> as
+		 * its identity.
+		 *
+		 * If the domain is non-boolean, inputs will be cast to a Boolean before the
+		 * operator is invoked, while the result will be cast to the target domain on
+		 * output.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 *
+		 * @see #leq.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using lxnor = grb::Monoid<
+			grb::operators::equal< D1, D2, D3 >,
+			grb::identities::logical_true
+		>;
+
+		/**
+		 * The logical-equals monoid.
+		 *
+		 * Uses \em logical-equals as the operator and <tt>true</tt> as its identity.
+		 *
+		 * If the domain is non-boolean, inputs will be cast to a Boolean before the
+		 * operator is invoked, while the result will be cast to the target domain on
+		 * output.
+		 *
+		 * The three domains of the monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the operator
+		 * @tparam D2 The right-hand input domain of the operator
+		 * @tparam D3 The output domain of the operator
+		 *
+		 * The types \a D2 and \a D3 are optional. If \a D3 is not explicitly given,
+		 * it will be set to \a D2. If \a D2 is not explicitly given, it will be set
+		 * to \a D1.
+		 *
+		 * This is a commutative monoid (assuming \a D1 equals \a D2).
+		 *
+		 * @see #lxnor.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2 >
+		using leq = lxnor< D1, D2, D3 >;
+
+	}
+
 } // namespace grb
 
 #endif
diff --git a/include/graphblas/nonblocking/coordinates.hpp b/include/graphblas/nonblocking/coordinates.hpp
index 1aca8e322..ff6789287 100644
--- a/include/graphblas/nonblocking/coordinates.hpp
+++ b/include/graphblas/nonblocking/coordinates.hpp
@@ -27,12 +27,14 @@
 #ifndef _H_GRB_NONBLOCKING_COORDINATES
 #define _H_GRB_NONBLOCKING_COORDINATES
 
-#include <stdexcept> //std::runtime_error
 #include <vector>
+#include <cstdint> // uintptr_t
+#include <stdexcept> //std::runtime_error
 #if defined _DEBUG && ! defined NDEBUG
  #include <set>
 #endif
 
+
 #include <stddef.h> //size_t
 #include <assert.h>
 
diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp
index dd3d44cd4..2a1f90137 100644
--- a/include/graphblas/reference/coordinates.hpp
+++ b/include/graphblas/reference/coordinates.hpp
@@ -26,6 +26,7 @@
 
 #include <stddef.h> //size_t
 
+#include <cstdint> // uintptr_t
 #include <algorithm> // std::max
 #include <stdexcept> // std::runtime_error
 
diff --git a/include/graphblas/reference/init.hpp b/include/graphblas/reference/init.hpp
index 9332c8b88..a7bdf0b02 100644
--- a/include/graphblas/reference/init.hpp
+++ b/include/graphblas/reference/init.hpp
@@ -24,6 +24,7 @@
 #define _H_GRB_REFERENCE_INIT
 
 #include <new>
+#include <cstdint> // for uintptr_t
 #include <algorithm>
 
 #include <graphblas/base/init.hpp>
diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp
index b788527a1..ad34a276c 100644
--- a/include/graphblas/reference/io.hpp
+++ b/include/graphblas/reference/io.hpp
@@ -1239,7 +1239,7 @@ namespace grb {
 							utils::template prefixSum_seq< true >( out_ccs_offsets, n );
 						}
 #else
-						size_t crs_ws, ccs_ws;
+						NIT crs_ws, ccs_ws;
 						utils::template prefixSum_ompPar_phase1< true >( out_crs_offsets, m,
 							crs_ws );
 						if( !(descr & descriptors::force_row_major) ) {
diff --git a/include/graphblas/semiring.hpp b/include/graphblas/semiring.hpp
index 23a7210e3..28354ac74 100644
--- a/include/graphblas/semiring.hpp
+++ b/include/graphblas/semiring.hpp
@@ -383,6 +383,427 @@ namespace grb {
 
 	};
 
+	// after all of the standard definitions, declare some standard semirings
+
+	/**
+	 * A name space that contains a set of standard semirings.
+	 *
+	 * Standard semirings include:
+	 *  - #plusTimes, for numerical linear algebra
+	 *  - #minPlus, for, e.g., shortest-path graph queries
+	 *  - #maxPlus, for, e.g., longest-path graph queries
+	 *  - #minTimes, for, e.g., least-reliable-path graph queries
+	 *  - #maxTimes, for, e.g., most-reliable-path graph queries,
+	 *  - #boolean, for, e.g., reachability graph queries.
+	 *  - etc.
+	 *
+	 * A list of all pre-defined semirings, in addition to the above, follows:
+	 * #minMax, #maxMin, #plusMin, #lorLand, #landLor, #lxorLand, #lxnorLor,
+	 * #lneqLand, and #leqLor.
+	 *
+	 * \note Here, lor stands for logical-or and land stands for logical-and, while
+	 *       ne stands for not-equal and eq for equal.
+	 *
+	 * \note The #lorLand semiring over the Boolean domains is the same as the
+	 *       #boolean semiring.
+	 *
+	 * \note The #lxorLand semiring is the same as the #lneqLand semiring.
+	 *
+	 * \note The #lxnorLor semiring is the same as the #leqLor semiring.
+	 *
+	 * \warning Some of these pre-defined semirings are not proper semirings over
+	 *          all domains. For example, the #maxPlus semiring over unsigned
+	 *          integers would have both max and + identities be zero, and thus
+	 *          cannot act as an annihilator over +.
+	 *
+	 * \warning While ALP does a best-effort in catching erroneous semirings, by
+	 *          virtue of templates it cannot catch all erroneous semirings. E.g.,
+	 *          continuing the above #maxPlus semiring example: even if ALP
+	 *          prevents the definition of #maxPlus semirings over unsigned types
+	 *          by relying on the <tt>std::is_unsigned</tt> type trait, a user
+	 *          could still define their own unsigned integer type that erroneously
+	 *          overloads this type trait to <tt>false</tt>. We cannot catch such
+	 *          errors and consider those programming errors.
+	 *
+	 * \note We do not pre-define any improper semiring, such as plusMin, that do
+	 *       appear in the GraphBLAS C specification. Instead, ALP has, for every
+	 *       primitive that takes a semiring, a variant of that primitive that
+	 *       instead of a semiring, takes 1) a cummutative monoid as an additive
+	 *       operator, and 2) any binary operator as the multiplicative operator.
+	 *       These variants do not (and may not) rely on the additive identity
+	 *       being an annihilator over the multiplicative operation, and do not
+	 *       (may not) rely on any distributive property over the two operations.
+	 *
+	 * Each semiring except #boolean takes up to four domains as template
+	 * arguments, while semirings as a pure mathematical concept take only a single
+	 * domain. The first three domains indicate the left-hand input domain, the
+	 * right-hand input domain, and the output domain of the multiplicative monoid,
+	 * respectively. The third and fourth domains indicate the left-hand and right-
+	 * hand input domain of the additive monoid. The fourth domain also indicates
+	 * the output domain of the additive monoid.
+	 *
+	 * \note This particular extension of semirings to four domains is rooted in
+	 *       C-Monoid categories. All useful mixed-domain semirings ALP has
+	 *       presently been applied with are C-Monoid categories, while since
+	 *       assuming this underlying algebra, the ALP code base that relates to
+	 *       algebraic structures, algebraic type traits, and their application,
+	 *       has simplified significantly.
+	 */
+	namespace semirings {
+
+		/**
+		 * The plusTimes semiring.
+		 *
+		 * Uses \em addition (plus) as the additive commutative monoid and
+		 * \em multiplication (times) as the multiplicative monoid. The identities
+		 * for each monoid are zero and one, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using plusTimes = grb::Semiring<
+			grb::operators::add< D3, D4, D4 >,
+			grb::operators::mul< D1, D2, D3 >,
+			grb::identities::zero, grb::identities::one
+		>;
+
+		/**
+		 * The minPlus semiring.
+		 *
+		 * Uses \em min as the additive commutative monoid and \em addition as the
+		 * multiplicative monoid. The identities for each monoid are \f$ \infty \f$
+		 * and zero, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using minPlus = grb::Semiring<
+			grb::operators::min< D3, D4, D4 >,
+			grb::operators::add< D1, D2, D3 >,
+			grb::identities::infinity, grb::identities::zero
+		>;
+
+		/**
+		 * The maxPlus semiring.
+		 *
+		 * Uses \em max as the additive commutative monoid and \em addition as the
+		 * multiplicative monoid. The identities for each monoid are \f$ -\infty \f$
+		 * and zero, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using maxPlus = grb::Semiring<
+			grb::operators::max< D3, D4, D4 >,
+			grb::operators::add< D1, D2, D3 >,
+			grb::identities::negative_infinity, grb::identities::zero
+		>;
+
+		/**
+		 * The minTimes semiring.
+		 *
+		 * Uses \em min as the additive commutative monoid and \em multiplication as
+		 * the multiplicative monoid. The identities for each monoid are
+		 * \f$ \infty \f$ and one, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using minTimes = grb::Semiring<
+			grb::operators::min< D3, D4, D4 >,
+			grb::operators::mul< D1, D2, D3 >,
+			grb::identities::infinity, grb::identities::one
+		>;
+
+		/**
+		 * The maxTimes semiring.
+		 *
+		 * Uses \em max as the additive commutative monoid and \em multiplication as
+		 * the multiplicative monoid. The identities for each monoid are
+		 * \f$ -infty \f$ and one, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using maxTimes = grb::Semiring<
+			grb::operators::max< D3, D4, D4 >,
+			grb::operators::mul< D1, D2, D3 >,
+			grb::identities::negative_infinity, grb::identities::one
+		>;
+
+		/**
+		 * The minMax semiring.
+		 *
+		 * Uses \em min as the additive commutative monoid and \em max as the
+		 * multiplicative monoid. The identities for each monoid are \f$ \infty \f$
+		 * and \f$ -\infty \f$, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using minMax = grb::Semiring<
+			grb::operators::min< D3, D4, D4 >,
+			grb::operators::max< D1, D2, D3 >,
+			grb::identities::infinity, grb::identities::negative_infinity
+		>;
+
+		/**
+		 * The maxMin semiring.
+		 *
+		 * Uses \em max as the additive commutative monoid and \em min as the
+		 * multiplicative monoid. The identities for each monoid are \f$ -\infty \f$
+		 * and \f$ \infty \f$, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using maxMin = grb::Semiring<
+			grb::operators::max< D3, D4, D4 >,
+			grb::operators::min< D1, D2, D3 >,
+			grb::identities::negative_infinity, grb::identities::infinity
+		>;
+
+		/**
+		 * The plusMin semiring.
+		 *
+		 * Uses \em plus as the additive commutative monoid and \em min as the
+		 * multiplicative monoid. The identities for each monoid are \f$ 0 \f$ and
+		 * \f$ \infty \f$, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using plusMin = grb::Semiring<
+			grb::operators::add< D3, D4, D4 >,
+			grb::operators::min< D1, D2, D3 >,
+			grb::identities::zero, grb::identities::infinity
+		>;
+
+		/**
+		 * The logical-or logical-and semiring.
+		 *
+		 * Uses \em or as the additive commutative monoid and \em and as the
+		 * multiplicative monoid. The identities for each monoid are \em false and
+		 * \em true, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using lorLand = grb::Semiring<
+			grb::operators::logical_or< D3, D4, D4 >,
+			grb::operators::logical_and< D1, D2, D3 >,
+			grb::identities::logical_false, grb::identities::logical_true
+		>;
+
+		/**
+		 * The Boolean semiring.
+		 *
+		 * Uses \em or as the additive commutative monoid and \em and as the
+		 * multiplicative monoid. The identities for each monoid are \em false and
+		 * \em true, respectively. All domains are fixed to <tt>bool</tt>.
+		 */
+		using boolean = lorLand< bool >;
+
+		/**
+		 * The logical-and logical-or semiring.
+		 *
+		 * Uses \em and as the additive commutative monoid and \em or as the
+		 * multiplicative monoid. The identities for each monoid are \em true and
+		 * em false, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using landLor = grb::Semiring<
+			grb::operators::logical_and< D3, D3, D4 >,
+			grb::operators::logical_or< D1, D2, D3 >,
+			grb::identities::logical_true, grb::identities::logical_false
+		>;
+
+		/**
+		 * The exclusive-logical-or logical-and semiring.
+		 *
+		 * Uses <em>not-equals</em> as the additive commutative monoid and logical-and
+		 * as the multiplicative monoid. The identities for each monoid are \em false
+		 * and \em true, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using lxorLand = grb::Semiring<
+			grb::operators::not_equal< D3, D3, D4 >,
+			grb::operators::logical_and< D1, D2, D3 >,
+			grb::identities::logical_false, grb::identities::logical_true
+		>;
+
+		/**
+		 * The not-equals logical-and semiring.
+		 *
+		 * Uses <em>not-equal</em> as the additive commutative monoid and \em and as
+		 * the multiplicative monoid. The identities for each monoid are \em false and
+		 * \em true, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using lneqLand = lxorLand< D1, D2, D3, D4 >;
+
+		/**
+		 * The negated-exclusive-or logical-or semring.
+		 *
+		 * Uses <em>negated exclusive or</em> as the additive commutative monoid and
+		 * \em or as the multiplicative monoid. The identities for each monoid are
+		 * \em true and \em false, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using lxnorLor = grb::Semiring<
+			grb::operators::equal< D3, D4, D4 >,
+			grb::operators::logical_or< D1, D2, D3 >,
+			grb::identities::logical_true, grb::identities::logical_false
+		>;
+
+		/**
+		 * The equals logical-or semiring.
+		 *
+		 * Uses \em equals as the additive commutative monoid and \em or as the
+		 * multiplicative monoid. The identities for each monoid are \em true and
+		 * \em false, respectively.
+		 *
+		 * The three domains of the multiplicative monoid are:
+		 *
+		 * @tparam D1 The left-hand input domain of the multiplicative monoid
+		 * @tparam D2 The right-hand input domain of the multiplicative monoid
+		 * @tparam D3 The output domain of the multiplicative monoid
+		 *
+		 * The domains of the additive monoid are \a D3 and:
+		 *
+		 * @tparam D4 The right-hand input domain of the additive monoid, as well as
+		 *            the output domain of the additive monoid.
+		 */
+		template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+		using leqLor = lxnorLor< D1, D2, D3, D4 >;
+
+	}
+
 } // namespace grb
 
 #endif
diff --git a/include/graphblas/utils/suppressions.h b/include/graphblas/utils/suppressions.h
index 2b7b656af..db2217666 100644
--- a/include/graphblas/utils/suppressions.h
+++ b/include/graphblas/utils/suppressions.h
@@ -45,6 +45,10 @@
   _Pragma( "GCC diagnostic push" ) ;\
   _Pragma( "GCC diagnostic ignored \"-Wclass-memaccess\"" );\
 
+ #define GRB_UTIL_IGNORE_INT_IN_BOOL_CONTEXT \
+  _Pragma( "GCC diagnostic push" ) ;\
+  _Pragma( "GCC diagnostic ignored \"-Wint-in-bool-context\"" );\
+
  #define GRB_UTIL_RESTORE_WARNINGS \
   _Pragma( "GCC diagnostic pop" );\
 
@@ -54,6 +58,7 @@
  #define GRB_UTIL_IGNORE_STRING_TRUNCATION
  #define GRB_UTIL_IGNORE_CLASS_MEMACCESS
  #define GRB_UTIL_RESTORE_WARNINGS
+ #define GRB_UTIL_IGNORE_INT_IN_BOOL_CONTEXT
 #endif
 
 #endif // end ``_H_GRB_UTILS_SUPRESSIONS''
diff --git a/include/transition/fuselets.h b/include/transition/fuselets.h
new file mode 100644
index 000000000..dbe208c47
--- /dev/null
+++ b/include/transition/fuselets.h
@@ -0,0 +1,455 @@
+
+/*
+ *   Copyright 2024 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * \ingroup TRANS
+ *
+ * Provides a set of fused level-1 and level-2 ALP kernels.
+ *
+ * The fused kernels are designed to be easily callable from existing code
+ * bases, using standard data structures such as raw pointers to vectors and the
+ * Compressed Row Storage (CRS) for sparse matrices.
+ *
+ * @author A. N. Yzelman
+ * @date 27/09/2024
+ */
+
+/**
+ * \defgroup TRANS_FUSELETS Fuselets
+ * \ingroup TRANS
+ * @{
+ *
+ * While Algebraic Programming (ALP) is a programming model at its core, one
+ * may also use ALP to generate libraries callable from any application code,
+ * even if said application is not written using ALP.
+ *
+ * Fuselets use the ALP nonblocking backend by Mastoras et al. [1,2] in
+ * particular to generate a set of fused kernels that mix dense level-1 BLAS
+ * operations with `level-2' Sparse BLAS operations. The fused kernels exhibit
+ * higher performance due to enhanced data reuse. An example of a fuselet is a
+ * dense vector update, followed by a sparse matrix--vector multiplication,
+ * finally followed by a dot-product of two dense vectors: #update_spmv_dot_dsu.
+ *
+ * The exposed API for the fuselets is standard C. The header may also be safely
+ * included from standard C++. In standard configuration, ALP builds and
+ * installs both static and dynamic fuselet libraries.
+ *
+ * \warning Any application that relies on fuselets is \em strongly encouraged
+ *          to enable standard OpenMP thread pinning (such as, for example,
+ *          achieved by setting the <tt>OMP_PROC_BIND</tt> environment variable
+ *          to <tt>true</tt>).
+ *
+ * While the presently-implemented fuselets were requested for accelerating a
+ * pre-existing distributed-memory solver, the implementation of the fuselets
+ * demonstrates how effectively ALP can be used for code generation. The tiny
+ * size of the implementation is particularly compelling, with typical gains
+ * between 50 to 200 percent (1.5-3x), depending on the vector sizes as well as
+ * the nonzero-to-size ratio [1,2], as well as on the depth of the fuselets.
+ *
+ * \note The maximum pipeline depth in the pre-defined fuselets here provided,
+ *       is three. This pipeline depth forms a simple upper bound on achievable
+ *       speedup.
+ *
+ * If using fuselets for scientific work, please cite the related papers [1,2];
+ * and please refer to the same papers for more details about the nonblocking
+ * backend:
+ *
+ * [1] Design and implementation for nonblocking execution in GraphBLAS:
+ *     tradeoffs and performance by Aristeidis Mastoras, Sotiris Anagnostidis,
+ *     and A. N. Yzelman, ACM Transactions on Architecture and Code Optimization
+ *     (TACO) 20, 1, Article 6 (2023).
+ * [2] Nonblocking execution in GraphBLAS by Aristeidis Mastoras, Sotiris
+ *     Anagnostidis, and A. N. Yzelman in IPDPSW, pp. 230-233, IEEE (2022).
+ *
+ * Users interested in defining their own fuselets may directly edit the
+ * following files to add any new functions they require:
+ *  - <tt>include/transition/fuselets.h</tt>, and
+ *  - <tt>src/transition/fuselets.cpp</tt>.
+ * After any such edits, simply issue <tt>make install</tt> again, which will
+ * update your installed fuselets to include your newly-defined routines, thus
+ * providing an easily-customisable extensible framework.
+ *
+ * For fuselets that take matrix inputs, we here assume standard Compressed Row
+ * Storage (CRS), also known as Compressed Sparse Rows (CSR). The fuselets
+ * currently defined use a different postfix to distinguish between the possible
+ * types one may use for each of the three standard CRS arrays. For example, the
+ * postfix <tt>_dsu</tt> indicates a double-precision nonzero value array (d),
+ * an offset array of type <tt>size_t</tt> (s), and a column index array of type
+ * <tt>unsigned int</tt> (u). The postfix <tt>_dii</tt> instead indicates arrays
+ * of type <tt>double</tt>, <tt>int</tt>, and <tt>int</tt>, respectively.
+ *
+ * More details about the presently provided variants may be found in the
+ * following note.
+ *
+ * \note The de-facto industry-standard Compressed Row Storage (CRS) comprises
+ *       three arrays: a row offset array, a column index array, and, for
+ *       non-pattern matrices, a value array. The element types of the former
+ *       two arrays can have multiple sensible values:
+ *        - 64-bit unsigned integers for the offset array (s), default-sized
+ *          (usually 32-bit) integers (i), or default-sized unsigned integers
+ *          (u).
+ *        - 64-bit unsigned integers for the column indices (s), default-sized
+ *          (usually 32-bit) integers instead (i), or default-sized unsigned
+ *          integers (u).
+ *       For values, we presently only support double-precision (d). All postfix
+ *       combinations presently supported are <tt>_dsu</tt> and <tt>_dii</tt>.
+ *
+ * \note Other postfix combinations are easily provided; if unclear how or if a
+ *       presently missing combination should be of wide interest, please create
+ *       a feature requests and/or contact the maintainers.
+ *
+ * Typical example work estimation for adding a new fuselets, assuming
+ * familiarity with the use of ALP and allowing copying code snippets from
+ * pre-existing fuselets:
+ *  - writing the spec for a new fuselet: 12 minutes
+ *  - implementing the new fuselet: 8 minutes
+
+ * \note This was measured for spmv_dot_norm2. While the code includes proper
+ *       error handling, it does not include time required for writing and
+ *       adding automated tests.
+ */
+
+#ifndef _H_ALP_FUSELETS
+#define _H_ALP_FUSELETS
+
+#include <stddef.h> // for size_t
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	/**
+	 * Initialisation routine that should be called before calling any fuselets.
+	 *
+	 * A user application shall
+	 *  1. call this function after application start and before calling any
+	 *     fuselets, as well as
+	 *  2. call this function after a call to #finalize_fuselets and before any
+	 *     subsequent calls to fuselets.
+	 * This function shall not be called in any other case.
+	 *
+	 * \note For example, it is not legal to call this function twice without an
+	 *       call to #finalize_fuselets in between.
+	 *
+	 * To ensure proper clean-up before application termination, all calls to this
+	 * function should be matched with a call to #finalize_fuselets.
+	 *
+	 * @returns Zero, if the initialisation has proceeded successfully.
+	 * @returns Any other value, if initialisation has failed. In this case,
+	 *          it shall be as though this call had never occurred. In particular,
+	 *          any subsequent calls to fuselets shall (thus) induce undefined
+	 *          behaviour.
+	 *
+	 * The recommendation is to call this function once and as soon as possible
+	 * after the application <tt>main</tt> function has started.
+	 */
+	int initialize_fuselets();
+
+	/**
+	 * Cleans up fuselet resources.
+	 *
+	 * It may only be called once after every call to #initialize_fuselets. Cannot
+	 * follow another call to #finalize_fuselets without a call to
+	 * #initialize_fuselets in between.
+	 *
+	 * The recommendation is to call this function once and just before the
+	 * application <tt>main</tt> function terminates.
+	 */
+	int finalize_fuselets();
+
+	/**
+	 * Computes \f$ v, \beta \f$ from:
+	 *
+	 *  - \f$ v = Ay + \alpha v \f$,
+	 *  - \f$ \beta = (r,v) \f$.
+	 *
+	 * @param[in,out] v    The input and output vector \f$ v \f$
+	 * @param[out]    beta The output scalar \f$ \beta \f$
+	 *
+	 * The pointer \a v should point to an array, while the pointer \a beta should
+	 * point to a scalar. In the case the initial values of \a v should be ignored,
+	 * set the argument \a alpha to zero.
+	 *
+	 * @param[in] alpha The input scalar \f$ \alpha \f$
+	 * @param[in] ia    The CRS row offset array of \f$ A \f$
+	 * @param[in] ij    The CRS column index array of \f$ A \f$
+	 * @param[in] iv    The CRS value array of \f$ A \f$
+	 * @param[in] y     The input vector \f$ y \f$
+	 *
+	 * Here, \a alpha is a scalar value. The pointers \a ia, \a ij, and \a iv
+	 * correspond to a CRS of \f$ A \f$. The pointer \a y should point to an array.
+	 *
+	 * @param[in] r The input vector \f$ r \f$
+	 *
+	 * The pointer \a r should point to an array.
+	 *
+	 * @param[in] n The row-wise \em and column-wise dimension of \f$ A \f$
+	 *
+	 * The sizes of the arrays pointed to by \a v, \a y, and \a r should have size
+	 * \f$ n \f$.
+	 *
+	 * @returns Zero if and only if the call executed successfully.
+	 * @returns A nonzero error code otherwise.
+	 */
+	int spmv_dot_dsu(
+		double * const v, double * const beta,                    // outputs
+		const size_t * const ia, const unsigned int * const ij,
+		const double * const iv, const double * const y,
+		const double alpha,                                       // input 1
+		const double * const r,                                   // input 2
+		const size_t n                                            // size
+	);
+
+	/**
+	 * @see spmv_dot_dii
+	 */
+	int spmv_dot_dii(
+		double * const v, double * const beta,
+		const int * const ia, const int * const ij,
+		const double * const iv, const double * const y,
+		const double alpha,
+		const double * const r,
+		const size_t n
+	);
+
+	/**
+	 * Computes \f$ v, \beta, \gamma \f$ from:
+	 *  - \f$ v = Ay + \alpha v \f$,
+	 *  - \f$ \beta = (v,r) \f$,
+	 *  - \f$ \gamma = ||v||_2^2 \f$.
+	 *
+	 * @param[in,out] v     The input and output vector \f$ v \f$
+	 * @param[out]    beta  The output scalar \f$ \beta \f$
+	 * @param[out]    gamma The output scalar \f$ \gamma \f$
+	 *
+	 * The pointer \a v should point to an array, while the pointers \a beta and
+	 * \a gamma should point to scalars. In the case the initial values of \a v
+	 * should be ignored, set the argument \a alpha to zero.
+	 *
+	 * @param[in] alpha The input scalar \f$ \alpha \f$
+	 * @param[in] ia    The CRS row offset array of \f$ A \f$
+	 * @param[in] ij    The CRS column index array of \f$ A \f$
+	 * @param[in] iv    The CRS value array of \f$ A \f$
+	 * @param[in] y     The input vector \f$ y \f$
+	 *
+	 * Here, \a alpha is a scalar value. The pointers \a ia, \a ij, and \a iv
+	 * correspond to a CRS of \f$ A \f$. The pointer \a y should point to an array.
+	 *
+	 * @param[in] r The input vector \f$ r \f$
+	 *
+	 * The pointer \a r should point to an array.
+	 *
+	 * @param[in] n The row-wise \em and column-wise dimension of \f$ A \f$
+	 *
+	 * The sizes of the arrays pointed to by \a v, \a y, and \a r should have size
+	 * \f$ n \f$.
+	 *
+	 * @returns Zero if and only if the call executed successfully.
+	 * @returns A nonzero error code otherwise.
+	 */
+	int spmv_dot_norm2_dsu(
+		double * const v,
+		double * const beta, double * const gamma,              // outputs
+		const size_t * const ia, const unsigned int * const ij,
+		const double * const iv, const double * const y,
+		const double alpha,                                     // input 1
+		const double * const r,                                 // input 2
+		const size_t n                                          // size
+	);
+
+	/** @see spmv_dot_norm2_dsu */
+	int spmv_dot_norm2_dii(
+		double * const v,
+		double * const beta, double * const gamma,
+		const int * const ia, const int * const ij,
+		const double * const iv, const double * const y,
+		const double alpha,
+		const double * const r,
+		const size_t n
+	);
+
+	/**
+	 * Computes \f$ p, u, \alpha \f$ from:
+	 *
+	 *  - \f$ p = z + \beta p \f$,
+	 *  - \f$ u = Ap \f$,
+	 *  - \f$ \alpha = (u,p) \f$.
+	 *
+	 * @param[in,out] p  The input and output vector \f$ p \f$
+	 * @param[out]    u  The output vector \f$ u \f$
+	 * @param[out] alpha The output scalar \f$ \alpha \f$
+	 *
+	 * The pointers \a p and \a u should be pointers to arrays, while \a alpha
+	 * should be a pointer to a scalar. The contents of \a u need \em not be zeroed
+	 * out(!)-- this fuselet will reset the vector. Similarly, the initial value of
+	 * \a alpha will be ignored.
+	 *
+	 * @param[in]   z  The input vector \f$ z \f$
+	 * @param[in] beta The input scalar \f$ \beta \f$
+	 *
+	 * The pointer \a z should point to an array while \a beta is a scalar.
+	 *
+	 * @param[in] ia The CRS row offset array of \f$ A \f$
+	 * @param[in] ij The CRS column index array of \f$ A \f$
+	 * @param[in] iv The CRS value array of \f$ A \f$
+	 *
+	 * The pointers \a ia, \a ij, and \a iv correspond to a CRS of \f$ A \f$.
+	 *
+	 * @param[in] n The row-wise \em and column-wise dimension of \a A
+	 *
+	 * The size of the arrays \a p, \a u, and \a z is \f$ n \f$. The size of the
+	 * array \f$ ia \f$ is \f$ n + 1 \f$. The size of the arrays \a ij and * \a iv
+	 * is <tt>ia[n]</tt>.
+	 *
+	 * @returns Zero if and only if the call executed successfully.
+	 * @returns A nonzero error code otherwise.
+	 */
+	int update_spmv_dot_dsu(
+		double * const p, double * const u, double * const alpha, // outputs
+		const double * const z, const double beta,                // input 1
+		const size_t * const ia, const unsigned int * const ij,
+		const double * const iv,                                  // input 2
+		const size_t n                                            // size
+	);
+
+	/** @see update_spmv_dot_dsu */
+	int update_spmv_dot_dii(
+		double * const p, double * const u, double * const alpha,
+		const double * const z, const double beta,
+		const int * const ia, const int * const ij, const double * const iv,
+		const size_t n
+	);
+
+	/**
+	 * Computes \f$ x, r, \mathit{norm} \f$ from:
+	 *
+	 *  - \f$ x = \alpha p + x \f$,
+	 *  - \f$ r = \beta u + r \f$,
+	 *  - \f$ \mathit{norm} = ||r||_2^2 \f$.
+	 *
+	 * @param[in,out] x     The input and output vector \f$ x \f$
+	 * @param[in,out] r     The input and output vector \f$ r \f$
+	 * @param[out]    norm2 The 2-norm-squared of \f$ r \f$
+	 *
+	 * The pointers \a x and \a r should be pointers to arrays, while \a norm2
+	 * should be a pointer to a scalar. The initial value of \a norm2 will be
+	 * ignored.
+	 *
+	 * @param[in] alpha The input scalar \f$ \alpha \f$
+	 * @param[in] p     The input vector \f$ p \f$
+	 *
+	 * Here, \a alpha is a scalar while \a p is a pointer to an array.
+	 *
+	 * @param[in] beta The input scalar \f$ \beta \f$
+	 * @param[in] u    The input vector \f$ u \f$
+	 *
+	 * Here, \a beta is a scalar while \a u is a pointer to an array.
+	 *
+	 * @param[in] n The size of the vectors \a x, \a r, \a p, and \a u.
+	 *
+	 * @returns Zero if and only if the call executed successfully.
+	 * @returns A nonzero error code otherwise.
+	 */
+	int update_update_norm2(
+		double * const x, double * const r, double * const norm2, // outputs
+		const double alpha, const double * const p,               // input 1
+		const double beta, const double * const u,                // input 2
+		const size_t n                                            // size
+	);
+
+	/**
+	 * Computes \f$ p \f$ from:
+	 *
+	 *  - \f$ p = \alpha r + \beta v + \gamma p \f$
+	 *
+	 * @param[in,out] p     The input and output vector \f$ p \f$
+	 *
+	 * The pointer \a p should be a pointer to an array.
+	 *
+	 * @param[in] alpha The input scalar \f$ \alpha \f$
+	 * @param[in] r     The input vector \f$ r \f$
+	 * @param[in] beta  The input scalar \f$ \beta \f$
+	 * @param[in] v     The input vector \f$ v \f$
+	 * @param[in] gamma The input scalar \f$ \gamma \f$
+	 *
+	 * Here, \a alpha, \a beta, \a gamma are scalars while \a p, \a r, and \a v are
+	 * pointers to arrays.
+	 *
+	 * @param[in] n The size of the vectors \a p, \a r, and \a v.
+	 *
+	 * @returns Zero if and only if the call executed successfully.
+	 * @returns A nonzero error code otherwise.
+	 */
+	int double_update(
+		double * const p,                           // output
+		const double alpha, const double * const r, // input 1
+		const double beta, const double * const v,  // input 2
+		const double gamma,                         // input 3
+		const size_t n                              // size
+	);
+
+	/**
+	 * Computes \f$ x, r, \theta \f$ from:
+	 *
+	 *  - \f$ x = \beta y + \omega z + \alpha x \f$,
+	 *  - \f$ r = \eta t + \zeta r \f$,
+	 *  - \f$ \theta = ||r||_2^2 \f$.
+	 *
+	 * @param[in,out] x     The input and output vector \f$ x \f$
+	 * @param[in,out] r     The input and output vector \f$ r \f$
+	 * @param[out]    theta The output scalar \f$ \theta \f$
+	 *
+	 * Here, \a x and \a r are pointers to arrays while \a theta is a pointer to
+	 * a scalar. Any initial contents of what \a theta points to, will be ignored.
+	 * If any initial contents of \a x should be ignored, set \a alpha to zero. If
+	 * any initial contents of \a r should be ignored, set \a zeta to zero.
+	 *
+	 * @param[in] beta  The input scalar \f$ \beta \f$
+	 * @param[in] y     The input vector \f$ y \f$
+	 * @param[in] omega The input scalar \f$ \omega \f$
+	 * @param[in] z     The input vector \f$ z \f$
+	 * @param[in] alpha The input scalar \f$ \alpha \f$
+	 *
+	 * @param[in] eta  The input scalar \f$ \eta \f$
+	 * @param[in] t    The input vector \f$ t \f$
+	 * @param[in] zeta The input scalar \f$ \zeta \f$
+	 *
+	 * @param[in] n The vector size (in number of elements).
+	 *
+	 * The sizes of the vectors \a x, \a r, \a y, \a z, and \a t point to, should
+	 * equal \a n.
+	 */
+	int doubleUpdate_update_norm2(
+		double * const x, double * const r, double * const theta, // output
+		const double beta, const double * const y,
+		const double omega, const double * const z,
+		const double alpha,                                       // input 1
+		const double eta, const double * const t,
+		const double zeta,                                        // input 2
+		const size_t n                                            // size
+	);
+
+#ifdef __cplusplus
+} // end extern "C"
+#endif
+
+#endif // end ifdef _H_ALP_FUSELETS
+
+/** @} */ // ends doxygen page for fuselets
+
diff --git a/src/transition/CMakeLists.txt b/src/transition/CMakeLists.txt
index cca57f9e1..1eec9a9a8 100644
--- a/src/transition/CMakeLists.txt
+++ b/src/transition/CMakeLists.txt
@@ -92,5 +92,8 @@ if( WITH_NONBLOCKING_BACKEND )
 			PRIVATE_LINK_LIBRARIES spsolver_shmem_parallel backend_nonblocking
 		)
 	endif()
+	add_transition_library( fuselets STATIC "fuselets" ${CMAKE_CURRENT_SOURCE_DIR}/fuselets.cpp
+		PRIVATE_LINK_LIBRARIES backend_nonblocking
+	)
 endif()
 
diff --git a/src/transition/fuselets.cpp b/src/transition/fuselets.cpp
new file mode 100644
index 000000000..a366fa6d0
--- /dev/null
+++ b/src/transition/fuselets.cpp
@@ -0,0 +1,669 @@
+
+/*
+ *   Copyright 2024 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * \ingroup TRANS_FUSELETS
+ *
+ * Implements a set of fused level-1 and level-2 ALP kernels.
+ *
+ * The fused kernels are designed to be easily callable from existing code
+ * bases, using standard data structures such as raw pointers to vectors and the
+ * Compressed Row Storage for sparse matrices.
+ *
+ * As a secondary goal, this standard set of fuselets demos the so-called ALP
+ * <em>native interface</em>, show-casing the ease by which additional fuselets
+ * that satisfy arbitrary needs can be added.
+ *
+ * @author A. N. Yzelman
+ * @date 27/09/2024
+ */
+
+#include <graphblas.hpp>
+
+#include <exception>
+#include <iostream>
+
+#include <assert.h>
+
+#include "fuselets.h"
+
+
+template< typename T >
+using MySemiring = grb::semirings::plusTimes< T >;
+
+template< typename T >
+using MyAddMonoid = grb::monoids::plus< T >;
+
+template< typename T >
+using MyTimesMonoid = grb::monoids::times< T >;
+
+static MySemiring< double > dblSemiring;
+static MyAddMonoid< double > dblPlusMonoid;
+static MyTimesMonoid< double > dblTimesMonoid;
+
+int initialize_fuselets() {
+	const grb::RC rc = grb::init();
+	if( rc != grb::SUCCESS ) {
+		return 255;
+	} else {
+		return 0;
+	}
+}
+
+int finalize_fuselets() {
+	const grb::RC rc = grb::finalize();
+	if( rc != grb::SUCCESS ) {
+		return 255;
+	} else {
+		return 0;
+	}
+}
+
+template< typename OffsetT, typename IndexT, typename ValueT >
+static int spmv_dot(
+	double * const v, double * const beta,
+	const OffsetT * const ia, const IndexT * const ij,
+	const ValueT * const iv, const double * const y,
+	const double alpha,
+	const double * const r,
+	const size_t n
+) {
+	// typedef our matrix type, which depends on the above argument types
+	typedef grb::Matrix<
+			ValueT,                       // the matrix value type
+			grb::config::default_backend, // use the compile-time selected backend
+			                              // (set by CMakeLists.txt: nonblocking)
+			IndexT, IndexT,               // the types of the row- and column-indices
+			OffsetT                       // the type of the ia array
+		> MyMatrixType;
+
+	// catch trivial op
+	if( n == 0 ) {
+		return 0;
+	}
+
+	// dynamic checks
+	assert( v != nullptr );
+	assert( beta != nullptr );
+	assert( ia != nullptr );
+	assert( ij != nullptr );
+	assert( iv != nullptr );
+	assert( y != nullptr );
+	assert( r != nullptr );
+
+	grb::Vector< double > alp_v =
+		grb::internal::template wrapRawVector< double >( n, v );
+	const MyMatrixType alp_A = grb::internal::wrapCRSMatrix( iv, ij, ia, n, n );
+	const grb::Vector< double > alp_y =
+		grb::internal::template wrapRawVector< double >( n, y );
+
+	// perform operation 1
+	grb::RC rc = grb::SUCCESS;
+	if( alpha == 0.0 || alpha == -0.0 ) {
+		rc = grb::set< grb::descriptors::dense >( alp_v, 0.0 );
+	} else {
+		rc = grb::foldr< grb::descriptors::dense >( alpha, alp_v, dblTimesMonoid );
+	}
+
+	rc = rc ? rc : grb::mxv<
+			grb::descriptors::dense | grb::descriptors::force_row_major
+		>(
+			alp_v, alp_A, alp_y, dblSemiring
+		);
+
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets spmv_dot encountered error at operation 1: "
+			<< grb::toString( rc ) << "\n";
+		return 10;
+	}
+
+	// perform operation 2
+	const grb::Vector< double > alp_r =
+		grb::internal::template wrapRawVector< double >( n, r );
+	double &alp_beta = *beta;
+	alp_beta = 0.0;
+	rc = rc ? rc : grb::dot< grb::descriptors::dense >(
+		alp_beta, alp_r, alp_v, dblSemiring );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets spmv_dot encountered error at operation 2: "
+			<< grb::toString( rc ) << "\n";
+		return 20;
+	}
+
+	// done
+	rc = rc ? rc : grb::wait( alp_v );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets spmv_dot encountered error: "
+			<< grb::toString( rc ) << "\n";
+		return 255;
+	} else {
+		return 0;
+	}
+}
+
+int spmv_dot_dsu(
+	double * const v, double * const beta,
+	const size_t * const ia, const unsigned int * const ij,
+	const double * const iv, const double * const y,
+	const double alpha,
+	const double * const r,
+	const size_t n
+) {
+	return spmv_dot< size_t, unsigned int, double >(
+		v, beta, ia, ij, iv, y, alpha, r, n );
+}
+
+int spmv_dot_dii(
+	double * const v, double * const beta,
+	const int * const ia, const int * const ij,
+	const double * const iv, const double * const y,
+	const double alpha,
+	const double * const r,
+	const size_t n
+) {
+	return spmv_dot< int, int, double >(
+		v, beta, ia, ij, iv, y, alpha, r, n );
+}
+
+template< typename OffsetT, typename IndexT, typename ValueT >
+static int spmv_dot_norm2(
+	double * const v,
+	double * const beta, double * const gamma,
+	const OffsetT * const ia, const IndexT * const ij,
+	const ValueT * const iv, const double * const y,
+	const double alpha,
+	const double * const r,
+	const size_t n
+) {
+	// typedef our matrix type, which depends on the above argument types
+	typedef grb::Matrix<
+			ValueT,                       // the matrix value type
+			grb::config::default_backend, // use the compile-time selected backend
+			                              // (set by CMakeLists.txt: nonblocking)
+			IndexT, IndexT,               // the types of the row- and column-indices
+			OffsetT                       // the type of the ia array
+		> MyMatrixType;
+
+	// catch trivial op
+	if( n == 0 ) {
+		return 0;
+	}
+
+	// dynamic checks
+	assert( v != nullptr );
+	assert( beta != nullptr );
+	assert( gamma != nullptr );
+	assert( ia != nullptr );
+	assert( ij != nullptr );
+	assert( iv != nullptr );
+	assert( y != nullptr );
+	assert( r != nullptr );
+
+	grb::Vector< double > alp_v =
+		grb::internal::template wrapRawVector< double >( n, v );
+	const MyMatrixType alp_A = grb::internal::wrapCRSMatrix( iv, ij, ia, n, n );
+	const grb::Vector< double > alp_y =
+		grb::internal::template wrapRawVector< double >( n, y );
+
+	// perform operation 1
+	grb::RC rc = grb::SUCCESS;
+	if( alpha == 0.0 || alpha == -0.0 ) {
+		rc = grb::set< grb::descriptors::dense >( alp_v, 0.0 );
+	} else {
+		rc = grb::foldr< grb::descriptors::dense >( alpha, alp_v, dblTimesMonoid );
+	}
+
+	rc = rc ? rc : grb::mxv<
+			grb::descriptors::dense | grb::descriptors::force_row_major
+		>(
+			alp_v, alp_A, alp_y, dblSemiring
+		);
+
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error at operation 1: "
+			<< grb::toString( rc ) << "\n";
+		return 10;
+	}
+
+	// perform operation 2
+	const grb::Vector< double > alp_r =
+		grb::internal::template wrapRawVector< double >( n, r );
+	double &alp_beta = *beta;
+	alp_beta = 0.0;
+	rc = rc ? rc : grb::dot< grb::descriptors::dense >(
+		alp_beta, alp_r, alp_v, dblSemiring );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error at operation 2: "
+			<< grb::toString( rc ) << "\n";
+		return 20;
+	}
+
+	// perform operation 3
+	double &alp_gamma = *gamma;
+	alp_gamma = 0.0;
+	rc = rc ? rc : grb::dot< grb::descriptors::dense >(
+		alp_gamma, alp_v, alp_v, dblSemiring );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error at operation 3: "
+			<< grb::toString( rc ) << "\n";
+		return 30;
+	}
+
+	// done
+	rc = rc ? rc : grb::wait( alp_v );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets spmv_dot_norm2 encountered error: "
+			<< grb::toString( rc ) << "\n";
+		return 255;
+	} else {
+		return 0;
+	}
+}
+
+int spmv_dot_norm2_dsu(
+	double * const v,
+	double * const beta, double * const gamma,
+	const size_t * const ia, const unsigned int * const ij,
+	const double * const iv, const double * const y,
+	const double alpha,
+	const double * const r,
+	const size_t n
+) {
+	return spmv_dot_norm2< size_t, unsigned int, double >(
+		v, beta, gamma, ia, ij, iv, y, alpha, r, n );
+}
+
+int spmv_dot_norm2_dii(
+	double * const v,
+	double * const beta, double * const gamma,
+	const int * const ia, const int * const ij,
+	const double * const iv, const double * const y,
+	const double alpha,
+	const double * const r,
+	const size_t n
+) {
+	return spmv_dot_norm2< int, int, double >(
+		v, beta, gamma, ia, ij, iv, y, alpha, r, n );
+}
+
+template< typename OffsetT, typename IndexT, typename ValueT >
+static int update_spmv_dot(
+	double * const p, double * const u, double * const alpha,
+	const double * const z, const double beta,
+	const OffsetT * const ia, const IndexT * const ij,
+	const ValueT * const iv,
+	const size_t n
+) {
+	// typedef our matrix type, which depends on the above argument types
+	// typedef our matrix type, which depends on the above argument types
+	typedef grb::Matrix<
+			ValueT,                       // the matrix value type
+			grb::config::default_backend, // use the compile-time selected backend
+			                              // (set by CMakeLists.txt: nonblocking)
+			IndexT, IndexT,               // the types of the row- and column-indices
+			OffsetT                       // the type of the ia array
+		> MyMatrixType;
+
+	// catch trivial op
+	if( n == 0 ) {
+		return 0;
+	}
+
+	// simple dynamic sanity checks
+	assert( p != nullptr );
+	assert( u != nullptr );
+	assert( alpha != nullptr );
+	assert( ia != nullptr );
+	assert( ij != nullptr );
+	assert( iv != nullptr );
+	assert( ij[ n ] / n <= n );
+#ifndef NDEBUG
+	// we employ defensive programming and perform expensive input checks when
+	// compiled in debug mode:
+	for( size_t i = 0; i < n; ++i ) {
+		assert( ia[ i + 1 ] >= ia[ i ] );
+		for( size_t k = ia[ i ]; k < ia[ i + 1 ]; ++k ) {
+			assert( ij[ k ] < n );
+		}
+	}
+#endif
+
+	// get ALP versions of (input/)output containers
+	grb::Vector< double > alp_p =
+		grb::internal::template wrapRawVector< double >( n, p );
+	grb::Vector< double > alp_u =
+		grb::internal::template wrapRawVector< double >( n, u );
+	double &alp_alpha = *alpha;
+
+	// do first op
+	const grb::Vector< double > alp_z =
+		grb::internal::template wrapRawVector< double >( n, z );
+	grb::RC ret = grb::foldr< grb::descriptors::dense >(
+		beta, alp_p,
+		dblTimesMonoid
+	);
+	ret = ret ? ret : grb::foldr< grb::descriptors::dense >(
+		alp_z, alp_p,
+		dblPlusMonoid
+	);
+	if( ret != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 1: "
+			<< grb::toString( ret ) << "\n";
+		return 1;
+	}
+
+	// do second op
+	const MyMatrixType alp_A = grb::internal::wrapCRSMatrix( iv, ij, ia, n, n );
+	ret = grb::set< grb::descriptors::dense >( alp_u, 0.0 );
+	ret = ret ? ret : grb::mxv<
+		grb::descriptors::dense | grb::descriptors::force_row_major
+	>( alp_u, alp_A, alp_p, dblSemiring );
+	if( ret != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 2: "
+			<< grb::toString( ret ) << "\n";
+		return 2;
+	}
+
+	// do third op
+	alp_alpha = 0.0;
+	ret = grb::dot< grb::descriptors::dense >(
+		alp_alpha, alp_u, alp_p, dblSemiring );
+	if( ret != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 3: "
+			<< grb::toString( ret ) << "\n";
+		return 3;
+	}
+
+	// done
+	ret = grb::wait( alp_p, alp_u );
+	if( ret == grb::SUCCESS ) {
+		return 0;
+	} else {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error: "
+			<< grb::toString( ret ) << "\n";
+		return 255;
+	}
+}
+
+int update_spmv_dot_dsu(
+	double * const p, double * const u, double * const alpha,
+	const double * const z, const double beta,
+	const size_t * const ia, const unsigned int * const ij,
+	const double * const iv,
+	const size_t n
+) {
+	return update_spmv_dot< size_t, unsigned int, double >(
+		p, u, alpha, z, beta, ia, ij, iv, n );
+}
+
+int update_spmv_dot_dii(
+	double * const p, double * const u, double * const alpha,
+	const double * const z, const double beta,
+	const int * const ia, const int * const ij, const double * const iv,
+	const size_t n
+) {
+	return update_spmv_dot< int, int, double >(
+		p, u, alpha, z, beta, ia, ij, iv, n );
+}
+
+int update_update_norm2(
+	double * const x, double * const r, double * const norm2, // outputs
+	const double alpha, const double * const p,               // input 1
+	const double beta, const double * const u,                // input 2
+	const size_t n                                            // size
+) {
+	// catch trivial op
+	if( n == 0 ) {
+		return 0;
+	}
+
+	// simple dynamic sanity checks
+	assert( x != nullptr );
+	assert( r != nullptr );
+	assert( norm2 != nullptr );
+	assert( p != nullptr );
+	assert( u != nullptr );
+
+	// get (input/)output containers
+	grb::Vector< double > alp_x =
+		grb::internal::template wrapRawVector< double >( n, x );
+	grb::Vector< double > alp_r =
+		grb::internal::template wrapRawVector< double >( n, r );
+	double &alp_norm2 = *norm2;
+
+	// perform operation 1
+	const grb::Vector< double > alp_p =
+		grb::internal::template wrapRawVector< double >( n, p );
+	grb::RC ret = grb::eWiseMul< grb::descriptors::dense >(
+		alp_x, alpha, alp_p, dblSemiring );
+	if( ret != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 1: "
+			<< grb::toString( ret ) << "\n";
+		return 1;
+	}
+
+	// perform operation 2
+	const grb::Vector< double > alp_u =
+		grb::internal::template wrapRawVector< double >( n, u );
+	ret = grb::eWiseMul< grb::descriptors::dense >(
+		alp_r, beta, alp_u, dblSemiring );
+	if( ret != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 2: "
+			<< grb::toString( ret ) << "\n";
+		return 2;
+	}
+
+	// perform operation 3
+	alp_norm2 = 0.0;
+	ret = grb::dot< grb::descriptors::dense >(
+		alp_norm2, alp_r, alp_r, dblSemiring );
+	if( ret != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error at operation 3: "
+			<< grb::toString( ret ) << "\n";
+		return 3;
+	}
+
+	// done
+	ret = grb::wait( alp_x, alp_r );
+	if( ret != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets update_spmv_dot encountered error: "
+			<< grb::toString( ret ) << "\n";
+		return 255;
+	} else {
+		return 0;
+	}
+}
+
+int double_update(
+	double * const p,
+	const double alpha, const double * const r,
+	const double beta, const double * const v,
+	const double gamma,
+	const size_t n
+) {
+	// check trivial op
+	if( n == 0 ) {
+		return 0;
+	}
+
+	// dynamic checks
+	assert( p != nullptr );
+	assert( r != nullptr );
+	assert( v != nullptr );
+
+	// get container views
+	grb::Vector< double > alp_p =
+		grb::internal::template wrapRawVector< double >( n, p );
+	const grb::Vector< double > alp_r =
+		grb::internal::template wrapRawVector< double >( n, r );
+	const grb::Vector< double > alp_v =
+		grb::internal::template wrapRawVector< double >( n, v );
+
+	grb::RC rc = grb::SUCCESS;
+	// p = gamma * p
+	if( gamma != 1.0 ) {
+		rc = grb::foldr< grb::descriptors::dense >( gamma, alp_p, dblTimesMonoid );
+	} else if( gamma == 0.0 || gamma == -0.0 ) {
+		rc = grb::set( alp_p, 0 );
+	}
+
+	if( beta != 0.0 && beta != -0.0 ) {
+		if( beta != 1.0 ) {
+			// p = beta .* v + gamma .* p
+			rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >(
+				alp_p, beta, alp_v, dblSemiring );
+		} else {
+			// p = v + (gamma .* p)
+			rc = rc ? rc : grb::foldr< grb::descriptors::dense >(
+				alp_v, alp_p, dblPlusMonoid );
+		}
+	}
+
+	if( alpha != 0.0 && beta != -0.0 ) {
+		if( alpha != 1.0 ) {
+			// p = alpha .* r + beta .* v + gamma .* p
+			rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >(
+				alp_p, alpha, alp_r, dblSemiring );
+		} else {
+			// p = r + (beta .* v + gamma .* p)
+			rc = rc ? rc : grb::foldr< grb::descriptors::dense >(
+				alp_r, alp_p, dblPlusMonoid );
+		}
+	}
+
+	// done
+	rc = rc ? rc : grb::wait( alp_p );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets double_update encountered error: "
+			<< grb::toString( rc ) << "\n";
+		return 255;
+	} else {
+		return 0;
+	}
+}
+
+int doubleUpdate_update_norm2(
+	double * const x, double * const r, double * const theta,
+	const double beta, const double * const y,
+	const double omega, const double * const z,
+	const double alpha,
+	const double eta, const double * const t,
+	const double zeta,
+	const size_t n
+) {
+	// check trivial dispatch
+	if( n == 0 ) {
+		return 0;
+	}
+
+	// dynamic checks
+	assert( x != nullptr );
+	assert( r != nullptr );
+	assert( theta != nullptr );
+	assert( y != nullptr );
+	assert( z != nullptr );
+	assert( t != nullptr );
+
+	// get outputs first
+	grb::Vector< double > alp_x =
+		grb::internal::template wrapRawVector< double >( n, x );
+	grb::Vector< double > alp_r =
+		grb::internal::template wrapRawVector< double >( n, r );
+	double &alp_theta = *theta;
+
+	// do first operation
+	grb::RC rc = grb::SUCCESS;
+	const grb::Vector< double > alp_z =
+		grb::internal::template wrapRawVector< double >( n, z );
+	const grb::Vector< double > alp_y =
+		grb::internal::template wrapRawVector< double >( n, y );
+	if( alpha == 0.0 || alpha == -0.0 ) {
+		rc = grb::set< grb::descriptors::dense >( alp_x, 0.0 );
+	} else if( alpha != 1.0 ) {
+		rc = grb::foldr< grb::descriptors::dense >( alpha, alp_x, dblTimesMonoid );
+	}
+	if( omega != 0.0 && omega != -0.0 ) {
+		if( omega != 1.0 ) {
+			rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >(
+				alp_x, omega, alp_z, dblSemiring );
+		} else {
+			rc = rc ? rc : grb::foldr< grb::descriptors::dense >(
+				alp_z, alp_x, dblPlusMonoid );
+		}
+	}
+	if( beta != 0.0 && beta != -0.0 ) {
+		if( beta != 1.0 ) {
+			rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >(
+				alp_x, beta, alp_y, dblSemiring );
+		} else {
+			rc = rc ? rc : grb::foldr< grb::descriptors::dense >(
+				alp_z, alp_x, dblPlusMonoid );
+		}
+	}
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets doubleUpdate_update_dot "
+			<< "encountered error at operation 1: " << grb::toString( rc ) << "\n";
+		return 10;
+	}
+
+	// perform operation 2:
+	const grb::Vector< double > alp_t =
+		grb::internal::template wrapRawVector< double >( n, t );
+	if( zeta == 0.0 || zeta == -0.0 ) {
+		rc = grb::set< grb::descriptors::dense >( alp_r, 0.0 );
+	} else if( zeta != 1.0 ) {
+		rc = grb::foldr< grb::descriptors::dense >(
+			zeta, alp_r, dblTimesMonoid );
+	}
+	if( eta != 0.0 && eta != -0.0 ) {
+		if( eta != 1.0 ) {
+			rc = rc ? rc : grb::eWiseMul< grb::descriptors::dense >(
+				alp_r, eta, alp_t, dblSemiring );
+		} else {
+			rc = rc ? rc : grb::foldr< grb::descriptors::dense >(
+				alp_t, alp_r, dblPlusMonoid );
+		}
+	}
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets doubleUpdate_update_dot "
+			<< "encountered error at operation 2: " << grb::toString( rc ) << "\n";
+		return 20;
+	}
+
+	// perform last op:
+	alp_theta = 0.0;
+	rc = grb::dot< grb::descriptors::dense >(
+		alp_theta, alp_r, alp_r, dblSemiring );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets doubleUpdate_update_dot "
+			<< "encountered error at operation 3: " << grb::toString( rc ) << "\n";
+		return 30;
+	}
+
+	// done
+	rc = grb::wait( alp_x, alp_r );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "ALP/Fuselets doubleUpdate_update_dot encountered error: "
+			<< grb::toString( rc ) << "\n";
+		return 255;
+	} else {
+		return 0;
+	}
+}
+
diff --git a/tests/performance/CMakeLists.txt b/tests/performance/CMakeLists.txt
index 732431691..3487cf065 100644
--- a/tests/performance/CMakeLists.txt
+++ b/tests/performance/CMakeLists.txt
@@ -56,6 +56,11 @@ add_grb_executables( dot-openmp dot.cpp $<TARGET_OBJECTS:bench_kernels_omp>
 	ADDITIONAL_LINK_LIBRARIES backend_headers_nodefs OpenMP::OpenMP_CXX
 )
 
+add_grb_executables( fuselets_performance fuselets.cpp
+	BACKENDS reference_omp NO_BACKEND_NAME
+	ADDITIONAL_LINK_LIBRARIES fuselets
+)
+
 add_grb_executables( scaling scaling.cpp
 	../unit/parser.cpp
 	BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking
diff --git a/tests/performance/fuselets.cpp b/tests/performance/fuselets.cpp
new file mode 100644
index 000000000..6ed10e7d1
--- /dev/null
+++ b/tests/performance/fuselets.cpp
@@ -0,0 +1,970 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <assert.h>
+#include <inttypes.h> //for strtoumax
+
+#include <graphblas/utils/timer.hpp>
+
+#include <graphblas.hpp>
+
+#include <graphblas/algorithms/matrix_factory.hpp>
+
+#include <transition/fuselets.h>
+
+
+struct Output {
+	grb::utils::TimerResults times;
+	grb::RC error;
+	size_t reps_used;
+};
+
+struct Input {
+	size_t n;
+	size_t rep;
+};
+
+template< typename T >
+static grb::Matrix< T > setupUpperDiagonalMatrix( const size_t n, const T value ) {
+	return grb::algorithms::matrices< double >::eye( n, n, value, 1 );
+}
+
+template< typename T >
+static grb::RC setupVectors(
+	grb::Vector< T > &x,
+	grb::Vector< T > &y,
+	grb::Vector< T > &z
+) {
+	grb::RC rc = grb::set< grb::descriptors::use_index >( x, 0.0 );
+	rc = rc ? rc : grb::set( y, 0.0 );
+	rc = rc ? rc : grb::set( z, 2.0 );
+	return rc;
+}
+
+template< typename T >
+static void getCRS(
+	const grb::Matrix< T > &Am,
+	const T * &av, const size_t * &ai, const unsigned int * &aj
+) {
+	const auto &A = grb::internal::getCRS( Am );
+	av = A.values;
+	ai = A.col_start;
+	aj = A.row_index;
+}
+
+static grb::RC verifySpMV( const double * const y, const size_t n ) {
+	if( y[ n - 1 ] != 1.0 ) {
+		std::cerr << "\t\t error during SpMV output verification (I)\n"
+			<< "\t\t\t expected: 1.0, got: " << y[ n - 1 ]
+			<< " at position " << (n-1) << "\n";
+		return grb::FAILED;
+	}
+	for( size_t i = 0; i < n - 1; ++i ) {
+		if( y[ i ] != static_cast< double >( 2 * (i + 1) + 1 ) ) {
+			std::cerr << "\t\t errpr during SpMV output verification (II)\n"
+				<< "\t\t\t expected: " << ( 2 * ( i + 1 ) + 1 ) << ", got: " << y[ i ]
+				<< " at position " << i << "\n";
+			return grb::FAILED;
+		}
+	}
+	return grb::SUCCESS;
+}
+
+void test_spmv_dot( const struct Input &in, struct Output &out ) {
+	grb::utils::Timer timer;
+	grb::Semiring<
+		grb::operators::add< double >, grb::operators::mul< double >,
+		grb::identities::zero, grb::identities::one
+	> reals;
+
+	// set io time to 0
+	out.times.io = 0;
+
+	// start preamble
+	timer.reset();
+	grb::Vector< double > xv( in.n ), yv( in.n ), zv( in.n );
+	grb::Matrix< double > Am = setupUpperDiagonalMatrix( in.n, 2.0 );
+	grb::RC rc = setupVectors( xv, zv, yv );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_spmv_dot: test initialisation FAILED (I)\n";
+		out.error = grb::FAILED;
+		return;
+	}
+
+	double * const x = xv.raw(), * const y = yv.raw(), * const z = zv.raw();
+	const double * av = nullptr;
+	const size_t * ai = nullptr; const unsigned int * aj = nullptr;
+	getCRS( Am, av, ai, aj );
+	double beta = 0.0;
+
+	rc = rc ? rc : (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED);
+
+	// verify
+	if( rc == grb::SUCCESS ) {
+		// A has values 2 above its diagonal
+		// x is a dense vector with values (0, 1, ..., n-1)
+		// y is a dense vector of twos
+		// z is a dense vector of zeroes
+		// therefore, calling the spmv_dot fuselet with alpha = 0.5 should result in:
+		//  - beta = 0.0
+		//  - y dense with values(3, 5, 7, ..., 2(n-1), 1)
+		beta = 1.0;
+		const int fuselet_rc = spmv_dot_dsu(
+			y, &beta,
+			ai, aj, av,
+			x,
+			0.5, z,
+			in.n
+		);
+		if( fuselet_rc != 0 ) {
+			std::cerr << "\t test_spmv_dot: verification FAILED (I)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		if( beta != 0.0 || beta != -0.0 ) {
+			std::cerr << "\t test_spmv_dot: verification FAILED (II)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		out.error = verifySpMV( y, in.n );
+		if( out.error != grb::SUCCESS ) {
+			std::cerr << "\t test_spmv_dot: verification FAILED (III)\n";
+			return;
+		}
+	}
+
+	out.times.preamble = timer.time();
+	// end preamble
+
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_spmv_dot: test initialisation FAILED (II)\n";
+		out.error = rc;
+		return;
+	}
+
+	// benchmark
+
+	timer.reset();
+	for( size_t i = 0; i < in.rep; ++i ) {
+		beta = 0.0;
+		(void) spmv_dot_dsu(
+			y, &beta,
+			ai, aj, av,
+			x,
+			0.5, z,
+			in.n
+		);
+	}
+	const double fast = timer.time();
+
+	timer.reset();
+	for( size_t i = 0; i < in.rep; ++i ) {
+		(void) grb::foldr< grb::descriptors::dense >( 0.5, yv,
+			grb::operators::mul< double >() );
+		(void) grb::mxv< grb::descriptors::dense >( yv, Am, xv,
+			grb::semirings::plusTimes< double >() );
+		beta = 0.0;
+		(void) grb::dot< grb::descriptors::dense >( beta, zv, yv,
+			grb::semirings::plusTimes< double >() );
+		(void) grb::wait();
+	}
+	const double slow = timer.time();
+
+	// record speedup
+	std::cout << "\t reference_omp: " << slow << " ms.\n\t fuselets: " << fast
+		<< " ms.\n\t (for " << in.rep << " spmv_dots)\n";
+	out.times.useful =
+		static_cast< double >(slow - fast) / static_cast< double >(in.rep);
+
+	// postamble, and done
+	timer.reset();
+	out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC;
+	out.times.postamble = timer.time();
+}
+
+void test_spmv_dot_norm2(
+	const struct Input &in, struct Output &out
+) {
+	grb::utils::Timer timer;
+
+	// I/O phase is empty
+	out.times.io = 0;
+
+	// start preamble
+	timer.reset();
+	grb::Vector< double > xv( in.n ), yv( in.n ), zv( in.n );
+	grb::Matrix< double > Am = setupUpperDiagonalMatrix( in.n, 2.0 );
+	grb::RC rc = setupVectors( xv, yv, zv );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_spmv_dot_norm2: test initialisation FAILED (I)\n";
+		out.error = grb::FAILED;
+		return;
+	}
+
+	double * const x = xv.raw(), * const y = yv.raw(), * const z = zv.raw();
+	const double * av = nullptr;
+	const size_t * ai = nullptr; const unsigned int * aj = nullptr;
+	getCRS( Am, av, ai, aj );
+	double beta, gamma;
+	beta = gamma = 0.0;
+
+	rc = initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED;
+
+	// verify
+	if( rc == grb::SUCCESS ) {
+		// A and x are as in the above test
+		// y is dense with entries 4.23 everywhere
+		// z is dense with entries twos everywhere
+		// therefore, as in the above, after spmv_dot_norm2, the output
+		//  - z should be dense with values (3, 5, 7, ..., 2(n-1), 1)
+		//  - beta should be zero
+		// new to the above, gamma should read norm2-squared of z
+		beta = 1.13;
+		gamma = 2.17;
+		const int fuselet_rc = spmv_dot_norm2_dsu(
+			z, &beta, &gamma,
+			ai, aj, av,
+			x, 0.5, y,
+			in.n
+		);
+		if( fuselet_rc != 0 ) {
+			std::cerr << "\t test_spmv_dot_norm2: verification FAILED (I)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		if( beta != 0.0 || beta != -0.0 ) {
+			std::cerr << "\t test_spmv_dot_norm2: verification FAILED (II)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		double check_gamma = z[ in.n - 1];
+		check_gamma *= check_gamma;
+		for( size_t i = 0; i < in.n - 1; ++i ) {
+			check_gamma += z[ i ] * z[ i ];
+		}
+		if( !grb::utils::equals( gamma, check_gamma, 2 * in.n - 1) ) {
+			std::cerr << "\t test_spmv_dot_norm2: verification FAILED (III)\n"
+				<< "\t\t expected: " << check_gamma << ", got: " << gamma << "\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		out.error = verifySpMV( z, in.n );
+		if( out.error != grb::SUCCESS ) {
+			std::cerr << "\t test_spmv_dot_norm2: verification FAILED (IV)\n";
+			return;
+		}
+	}
+
+	out.times.preamble = timer.time();
+	// end preamble
+
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_spmv_dot_norm2: test initialisation FAILED (II)\n";
+		out.error = grb::FAILED;
+		return;
+	}
+
+	// benchmark
+
+	timer.reset();
+	for( size_t i = 0; i < in.rep; ++i ) {
+		beta = gamma = 0.0;
+		(void) spmv_dot_norm2_dsu(
+			z, &beta, &gamma,
+			ai, aj, av,
+			x, 0.5, y,
+			in.n
+		);
+	}
+	const double fast = timer.time();
+
+	timer.reset();
+	for( size_t i = 0; i < in.rep; ++i ) {
+		grb::semirings::plusTimes< double > plusTimes_FP64;
+		(void) grb::foldr< grb::descriptors::dense >( 0.5, zv,
+			grb::operators::mul< double >() );
+		(void) grb::mxv< grb::descriptors::dense >( zv, Am, xv,
+			grb::semirings::plusTimes< double >() );
+		beta = gamma = 0.0;
+		(void) grb::dot< grb::descriptors::dense >( beta, yv, zv, plusTimes_FP64 );
+		(void) grb::dot< grb::descriptors::dense >( gamma, zv, zv, plusTimes_FP64 );
+		(void) grb::wait();
+	}
+	const double slow = timer.time();
+
+	// record speedup:
+	std::cout << "\t test_spmv_dot_norm2 (" << in.rep << " repetitions):\n"
+		<< "\t\t reference_omp: " << slow << " ms.\n"
+		<< "\t\t fuselets: " << fast << " ms.\n";
+	out.times.useful =
+		static_cast< double >(slow - fast) / static_cast< double >(in.rep);
+
+	// postamble, and done
+	timer.reset();
+	out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC;
+	out.times.postamble = timer.time();
+}
+
+void test_update_spmv_dot(
+	const struct Input &in, struct Output &out
+) {
+	grb::utils::Timer timer;
+
+	// I/O phase is empty
+	out.times.io = 0;
+
+	// start preamble
+	timer.reset();
+	grb::Vector< double > xv( in.n ), yv( in.n ), zv( in.n );
+	grb::Matrix< double > Am = setupUpperDiagonalMatrix( in.n, 2.0 );
+	grb::RC rc = setupVectors( xv, yv, zv );
+	rc = rc ? rc : grb::set< grb::descriptors::dense >( yv, 4.23 );
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_spmv_dot_norm2: test initialisation FAILED (I)\n";
+		out.error = grb::FAILED;
+		return;
+	}
+
+	double * const x = xv.raw(), * const y = yv.raw(), * const z = zv.raw();
+	const double * av = nullptr;
+	const size_t * ai = nullptr; const unsigned int * aj = nullptr;
+	getCRS( Am, av, ai, aj );
+	double beta, gamma;
+	beta = -1.615;
+	gamma = 1.17;
+
+	rc = initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED;
+
+	// verify
+	if( rc == grb::SUCCESS ) {
+		// A and x are as in the above test:
+		//  - A has values 2 above its diagonal
+		//  - x is a dense vector with values (0, 1, ..., n-1)
+		// y is dense with entries 4.23 everywhere
+		// z is dense with entries twos everywhere
+		// therefore, after update_spmv_dot_dsu, the output
+		//  - z should be dense with value one (1.0) everywhere
+		//  - x should be dense with value two (2.0) everywhere
+		//     - except at its last position, which should read zero (0)
+		//  - gamma should read 2.0 * (n-1)
+		const int fuselet_rc = update_spmv_dot_dsu(
+			z, x, &gamma,
+			y, beta,
+			ai, aj, av,
+			in.n
+		);
+		if( fuselet_rc != 0 ) {
+			std::cerr << "\t update_spmv_dot: verification FAILED (I)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		{
+			bool fail = false;
+			for( size_t i = 0; i < in.n; ++i ) {
+				if( !grb::utils::equals( z[ i ], 1.0, 3 ) ) {
+					fail = true;
+					std::cerr << "\t\t z[ " << i << " ], expected one, got "
+						<< z[ i ] << "\n";
+				}
+			}
+			if( fail ) {
+				std::cerr << "\t update_spmv_dot: verification FAILED (II)\n";
+				out.error = grb::FAILED;
+				return;
+			}
+		}
+		double check_gamma = z[ in.n - 1 ] * x[ in.n - 1 ];
+		for( size_t i = 0; i < in.n - 1; ++i ) {
+			check_gamma += z[ i ] * x[ i ];
+		}
+		if( !grb::utils::equals( gamma, check_gamma, 2 * in.n - 1 ) ) {
+			std::cerr << "\t update_spmv_dot: verification FAILED (III)\n"
+				<< "\t\t expected: " << check_gamma << ", got: " << gamma << "\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		{
+			bool fail = false;
+			constexpr double zero = 0.0;
+			constexpr double two = 2.0;
+			if( !grb::utils::equals( x[ in.n - 1 ], zero, 4 ) ) {
+				fail = true;
+				std::cerr << "\t\t x[ " << (in.n-1) << " ] (last entry) "
+				       << "expected zero, got " << x[ in.n - 1 ] << "\n";
+			}
+			for( size_t i = 0; i < in.n - 1; ++i ) {
+				if( !grb::utils::equals( x[ i ], two, 4 ) ) {
+					fail = true;
+					std::cerr << "\t\t x[ " << i << " ] expected 2.0, got "
+						<< x[ i ] << "\n";
+				}
+			}
+			if( fail ) {
+				std::cerr << "\t update_spmv_dot: verification FAILED (IV)\n";
+				out.error = grb::FAILED;
+				return;
+			}
+		}
+	}
+
+	out.times.preamble = timer.time();
+	// end preamble
+
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t update_spmv_dot: test initialisation FAILED (II)\n";
+		out.error = grb::FAILED;
+		return;
+	}
+
+	// benchmark
+
+	timer.reset();
+	for( size_t i = 0; i < in.rep; ++i ) {
+		beta = -1.615;
+		gamma = 1.17;
+		(void) update_spmv_dot_dsu(
+			z, x, &gamma,
+			y, beta,
+			ai, aj, av,
+			in.n
+		);
+	}
+	const double fast = timer.time();
+
+	timer.reset();
+	for( size_t i = 0; i < in.rep; ++i ) {
+		beta = -1.615;
+		gamma = 1.17;
+		grb::semirings::plusTimes< double > plusTimes_FP64;
+		(void) grb::foldr< grb::descriptors::dense >( beta, zv,
+			grb::operators::mul< double >() );
+		(void) grb::foldr< grb::descriptors::dense >( yv, zv,
+			grb::operators::add< double >() );
+		(void) grb::set< grb::descriptors::dense >( xv, 0.0 );
+		(void) grb::mxv< grb::descriptors::dense >( xv, Am, zv, plusTimes_FP64 );
+		gamma = 0.0;
+		(void) grb::dot< grb::descriptors::dense >( gamma, zv, xv, plusTimes_FP64 );
+		(void) grb::wait();
+	}
+	const double slow = timer.time();
+
+	// record speedup:
+	std::cout << "\t test_update_spmv_dot (" << in.rep << " repetitions):\n"
+		<< "\t\t reference_omp: " << slow << " ms.\n"
+		<< "\t\t fuselets: " << fast << " ms.\n";
+	out.times.useful =
+		static_cast< double >(slow - fast) / static_cast< double >(in.rep);
+
+	// postamble, and done
+	timer.reset();
+	out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC;
+	out.times.postamble = timer.time();
+}
+
+void test_update_update_norm2(
+	const struct Input &in, struct Output &out
+) {
+	grb::utils::Timer timer;
+
+	// I/O phase is empty
+	out.times.io = 0;
+
+	// start preamble
+	timer.reset();
+	grb::Vector< double > xv( in.n ), pv( in.n ), rv( in.n ), uv( in.n );
+	grb::RC rc = setupVectors( xv, pv, rv );
+	rc = rc ? rc : grb::set( uv, 1.0 );
+	double alpha, beta;
+	alpha = 5.0;
+	beta = -2.0;
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_update_update_norm2: initalisation FAILED (I)\n";
+		out.error = rc;
+		return;
+	}
+
+	rc = (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED);
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_update_update_norm2: initialisation FAILED (II)\n";
+		out.error = rc;
+		return;
+	}
+
+	// On successful initialisation, the vector data are as follows:
+	//  - x is a dense vector (0, 1, 2, ..., in.n-1)
+	//  - p is a dense vector with values zero (0)
+	//  - r is a dense vector with values two (2)
+	//  - u is a dense vector with values one (1)
+	// hence the expected output of applying the update_update_norm2 fuselet:
+	//  - x is a dense vector (0, 1, 2, ..., in.n-1),
+	//  - r is a dense vector (0, 0, ..., 0), and therefore
+	//  - the norm should be zero also
+
+	// get raw pointers to vector data
+	double * const x = xv.raw();
+	double * const r = rv.raw();
+	const double * const p = pv.raw();
+	const double * const u = uv.raw();
+
+	// verify
+	{
+		double norm = 50.0;
+		const int fuselet_rc = update_update_norm2(
+			x, r, &norm,
+			alpha, p,
+			beta, u,
+			in.n
+		);
+		if( fuselet_rc != 0 ) {
+			std::cerr << "\t update_update_norm2: verification FAILED (I)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		if( norm != 0.0 || norm != -0.0 ) {
+			std::cerr << "\t update_update_norm2: verification FAILED (II)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		bool fail = false;
+		for( size_t i = 0; i < in.n; ++i ) {
+			constexpr double zero = 0.0;
+			if( !grb::utils::equals( x[ i ], static_cast< double >(i), 3 ) ) {
+				fail = true;
+				std::cerr << "\t\t x[ " << i << " ]: expected " << i << ", got " << x[ i ]
+					<< "\n";
+			}
+			if( !grb::utils::equals( r[ i ], zero, 3 ) ) {
+				fail = true;
+				std::cerr << "\t\t r[ " << i << " ]: expected zero, got " << r[ i ] << "\n";
+			}
+		}
+		if( fail ) {
+			std::cerr << "\t update_update_norm2: verification FAILED (III)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+	}
+
+	// benchmark
+	{
+		timer.reset();
+		for( size_t i = 0; i < in.rep; ++i ) {
+			double norm2 = 167;
+			(void) update_update_norm2(
+				x, r, &norm2,
+				alpha, p,
+				beta, u,
+				in.n
+			);
+		}
+		const double fast = timer.time();
+
+		timer.reset();
+		for( size_t i = 0; i < in.rep; ++i ) {
+			double norm2 = 167;
+			grb::semirings::plusTimes< double > plusTimes_FP64;
+			(void) grb::eWiseMul< grb::descriptors::dense >( xv, alpha, pv,
+				plusTimes_FP64 );
+			(void) grb::eWiseMul< grb::descriptors::dense >( rv, beta, uv,
+				plusTimes_FP64 );
+			norm2 = 0.0;
+			(void) grb::dot< grb::descriptors::dense >( norm2, rv, rv, plusTimes_FP64 );
+			(void) grb::wait();
+		}
+
+		const double slow = timer.time();
+
+		// record speedup:
+		std::cout << "\t test_update_update_norm2 (" << in.rep << " repetitions):\n"
+			<< "\t\t reference_omp: " << slow << " ms.\n"
+			<< "\t\t fuselets: " << fast << " ms.\n";
+		out.times.useful =
+			static_cast< double >(slow - fast) / static_cast< double >(in.rep);
+	}
+
+	// postamble, and done
+	timer.reset();
+	out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC;
+	out.times.postamble = timer.time();
+}
+
+void test_double_update(
+	const struct Input &in, struct Output &out
+) {
+	grb::utils::Timer timer;
+
+	// I/O phase is empty
+	out.times.io = 0;
+
+	// start preamble
+	timer.reset();
+	grb::Vector< double > rv( in.n ), vv( in.n ), pv( in.n );
+	grb::RC rc = setupVectors( rv, vv, pv );
+	double alpha, beta, gamma;
+	alpha = 2.0;
+	beta = 17.7;
+	gamma = 0.5;
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_double_update: initalisation FAILED (I)\n";
+		out.error = rc;
+		return;
+	}
+
+	rc = (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED);
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_double_update: initialisation FAILED (II)\n";
+		out.error = rc;
+		return;
+	}
+
+	// On successful initialisation, the vector data are as follows:
+	//  - r is a dense vector (0, 1, 2, ..., in.n-1)
+	//  - v is a dense vector with values zero (0)
+	//  - p is a dense vector with values two (2)
+	// hence the expected output of applying the update_update_norm2 fuselet:
+	//  - p is a dense vector (1, 3, 5, ..., 2*in.n-1)
+
+	// get raw pointers to vector data
+	double * const p = pv.raw();
+	const double * const r = rv.raw();
+	const double * const v = vv.raw();
+
+	// verify
+	{
+		const int fuselet_rc = double_update(
+			p, alpha, r, beta, v, gamma,
+			in.n
+		);
+		if( fuselet_rc != 0 ) {
+			std::cerr << "\t double_update: verification FAILED (I)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		bool fail = false;
+		for( size_t i = 0; i < in.n; ++i ) {
+			if( !grb::utils::equals( p[ i ], 2*static_cast< double >(i)+1, 5 ) ) {
+				fail = true;
+				std::cerr << "\t\t p[ " << i << " ]: expected "
+					<< (i*2+1) << ", got " << p[ i ] << "\n";
+			}
+		}
+		if( fail ) {
+			std::cerr << "\t double_update: verification FAILED (II)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+	}
+
+	// benchmark
+	{
+		timer.reset();
+		for( size_t i = 0; i < in.rep; ++i ) {
+			(void) double_update(
+				p, alpha, r, beta, v, gamma,
+				in.n
+			);
+		}
+		const double fast = timer.time();
+
+		timer.reset();
+		for( size_t i = 0; i < in.rep; ++i ) {
+			grb::semirings::plusTimes< double > plusTimes_FP64;
+			(void) grb::foldr< grb::descriptors::dense >( gamma, pv,
+				grb::monoids::times< double >() );
+			(void) grb::eWiseMul< grb::descriptors::dense >( pv, alpha, rv,
+				plusTimes_FP64 );
+			(void) grb::eWiseMul< grb::descriptors::dense >( pv, beta, vv,
+				plusTimes_FP64 );
+			(void) grb::wait();
+		}
+
+		const double slow = timer.time();
+
+		// record speedup:
+		std::cout << "\t test_double_update (" << in.rep << " repetitions):\n"
+			<< "\t\t reference_omp: " << slow << " ms.\n"
+			<< "\t\t fuselets: " << fast << " ms.\n";
+		out.times.useful =
+			static_cast< double >(slow - fast) / static_cast< double >(in.rep);
+	}
+
+	// postamble, and done
+	timer.reset();
+	out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC;
+	out.times.postamble = timer.time();
+}
+
+void test_doubleUpdate_update_norm2(
+	const struct Input &in, struct Output &out
+) {
+	grb::utils::Timer timer;
+
+	// I/O phase is empty
+	out.times.io = 0;
+
+	// start preamble
+	timer.reset();
+	grb::Vector< double > xv( in.n ), rv( in.n ), yv( in.n ), zv( in.n ),
+		tv( in.n );
+	grb::RC rc = setupVectors( xv, rv, yv );
+	rc = rc ? rc : grb::set( rv, 0.5 );
+	rc = rc ? rc : grb::set( zv, 1.0 );
+	rc = rc ? rc : grb::set( tv, -1.0 );
+	double theta, beta, omega, alpha, eta, zeta;
+	theta = 17.7;
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_doubleUpdate_update_norm2: initalisation FAILED (I)\n";
+		out.error = rc;
+		return;
+	}
+
+	rc = (initialize_fuselets() == 0 ? grb::SUCCESS : grb::FAILED);
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "\t test_doubleUpdate_update_norm2: initialisation FAILED (II)\n";
+		out.error = rc;
+		return;
+	}
+
+	// On successful initialisation, the scalar and vector data are as follows:
+	beta = 0.5;
+	omega = -1.0;
+	alpha = 2.0;
+	eta = 1.0;
+	zeta = 2.0;
+	theta = 3.14;
+	//  - x is a dense vector (0, 1, 2, ..., in.n-1)
+	//  - r is a dense vector with values 0.5 everywhere
+	//  - y is a dense vector with values two (2)
+	//  - z is a dense vector with values one (1)
+	//  - t is a dense vector with values minus one (-1)
+	// hence the expected output of applying the update_update_norm2 fuselet:
+	//  - x is a dense vector (0, 2, ..., 2*in.n-2)
+	//  - r is a dense vector with values zero (0)
+	//  - theta will be zero
+
+	// get raw pointers to vector data
+	double * const x = xv.raw();
+	double * const r = rv.raw();
+	const double * const y = yv.raw();
+	const double * const z = zv.raw();
+	const double * const t = tv.raw();
+
+	// verify
+	{
+		constexpr double zero = 0.0;
+		const int fuselet_rc = doubleUpdate_update_norm2(
+			x, r, &theta,
+			beta, y,
+			omega, z,
+			alpha,
+			eta, t,
+			zeta,
+			in.n
+		);
+		if( fuselet_rc != 0 ) {
+			std::cerr << "\t double_update: verification FAILED (I)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		bool fail = false;
+		for( size_t i = 0; i < in.n; ++i ) {
+			if( !grb::utils::equals( x[ i ], 2*static_cast< double >(i), 5 ) ) {
+				fail = true;
+				std::cerr << "\t\t x[ " << i << " ]: expected "
+					<< (i*2) << ", got " << x[ i ] << "\n";
+			}
+			if( !grb::utils::equals( r[ i ], zero, 3 ) ) {
+				fail = true;
+				std::cerr << "\t\t r[ " << i << " ]: expected zero, got "
+					<< r[ i ] << "\n";
+			}
+		}
+		if( fail ) {
+			std::cerr << "\t doubleUpdate_update_norm2: verification FAILED (II)\n";
+			out.error = grb::FAILED;
+			return;
+		}
+		if( !grb::utils::equals( theta, zero, 2 * in.n - 1 ) ) {
+			std::cerr << "\t doubleUpdate_update_norm2: verification FAILED (III)\n"
+				<< "\t\t expected zero, got " << theta << "\n";
+			out.error = grb::FAILED;
+			return;
+		}
+	}
+
+	// benchmark
+	{
+		timer.reset();
+		for( size_t i = 0; i < in.rep; ++i ) {
+			theta = 3.14;
+			(void) doubleUpdate_update_norm2(
+				x, r, &theta,
+				beta, y,
+				omega, z,
+				alpha,
+				eta, t,
+				zeta,
+				in.n
+			);
+		}
+		const double fast = timer.time();
+
+		timer.reset();
+		for( size_t i = 0; i < in.rep; ++i ) {
+			grb::semirings::plusTimes< double > plusTimes_FP64;
+			(void) grb::foldr< grb::descriptors::dense >( alpha, xv,
+				grb::monoids::times< double >() );
+			(void) grb::eWiseMul< grb::descriptors::dense >( xv, omega, zv,
+				plusTimes_FP64 );
+			(void) grb::eWiseMul< grb::descriptors::dense >( xv, beta, yv,
+				plusTimes_FP64 );
+			(void) grb::foldr< grb::descriptors::dense >( zeta, rv,
+				grb::monoids::times< double >() );
+			(void) grb::eWiseMul< grb::descriptors::dense >( rv, eta, tv,
+				plusTimes_FP64 );
+			theta = 3.17;
+			(void) grb::dot< grb::descriptors::dense >( theta, rv, rv,
+				plusTimes_FP64 );
+			(void) grb::wait();
+		}
+
+		const double slow = timer.time();
+
+		// record speedup:
+		std::cout << "\t test_doubleUpdate_update_norm2 (" << in.rep << " repetitions):"
+			<< "\n\t\t reference_omp: " << slow << " ms.\n"
+			<< "\t\t fuselets: " << fast << " ms.\n";
+		out.times.useful =
+			static_cast< double >(slow - fast) / static_cast< double >(in.rep);
+	}
+
+	// postamble, and done
+	timer.reset();
+	out.error = finalize_fuselets() == 0 ? grb::SUCCESS : grb::PANIC;
+	out.times.postamble = timer.time();
+}
+
+int main( int argc, char ** argv ) {
+	// sanity check on program args
+	if( argc < 2 || argc > 4 ) {
+		std::cout << "Usage: " << argv[ 0 ] << " <vector length> (inner iterations) "
+			<< "(outer iterations)" << std::endl;
+		return 0;
+	}
+	std::cout << "Test executable: " << argv[ 0 ] << std::endl;
+
+	// prepare input, output structs
+	struct Input in;
+	struct Output out;
+
+	// get vector length
+	char * end = NULL;
+	in.n = strtoumax( argv[ 1 ], &end, 10 );
+	if( argv[ 1 ] == end ) {
+		std::cerr << "Could not parse argument " << argv[ 1 ] << " "
+			<< "for vector length." << std::endl;
+		std::cout << "Test FAILED\n" << std::endl;
+		return 10;
+	}
+
+	// get inner number of iterations
+	in.rep = grb::config::BENCHMARKING::inner();
+	if( argc >= 3 ) {
+		in.rep = strtoumax( argv[ 2 ], &end, 10 );
+		if( argv[ 2 ] == end ) {
+			std::cerr << "Could not parse argument " << argv[ 2 ] << " "
+				<< "for number of inner experiment repititions." << std::endl;
+			std::cout << "Test FAILED\n" << std::endl;
+			return 20;
+		}
+	}
+
+	// get outer number of iterations
+	size_t outer = grb::config::BENCHMARKING::outer();
+	if( argc >= 4 ) {
+		outer = strtoumax( argv[ 3 ], &end, 10 );
+		if( argv[ 3 ] == end ) {
+			std::cerr << "Could not parse argument " << argv[ 3 ] << " "
+				<< "for number of outer experiment repititions." << std::endl;
+			std::cout << "Test FAILED\n" << std::endl;
+			return 30;
+		}
+	}
+
+	// prepare benchmarker
+	grb::Benchmarker< grb::AUTOMATIC > bench;
+
+	// start tests
+	std::cout << "\nBenchmark label: spmv_dot of size " << in.n
+		<< std::endl;
+	grb::RC rc = bench.exec( &(test_spmv_dot), in, out, 1, outer, true );
+
+	if( rc == grb::SUCCESS ) {
+		std::cout << "\nBenchmark label: spmv_dot_norm2 of size " << in.n
+			<< std::endl;
+		rc = bench.exec( &(test_spmv_dot_norm2), in, out, 1, outer, true );
+	}
+
+	if( rc == grb::SUCCESS ) {
+		std::cout << "\nBenchmark label: update_spmv_dot of size " << in.n
+			<< std::endl;
+		rc = bench.exec( &(test_update_spmv_dot), in, out, 1, outer, true );
+	}
+
+	if( rc == grb::SUCCESS ) {
+		std::cout << "\nBenchmark label: update_update_norm2 of size " << in.n
+			<< std::endl;
+		rc = bench.exec( &(test_update_update_norm2), in, out, 1, outer, true );
+	}
+
+	if( rc == grb::SUCCESS ) {
+		std::cout << "\nBenchmark label: double_update of size " << in.n
+			<< std::endl;
+		rc = bench.exec( &(test_double_update), in, out, 1, outer, true );
+	}
+
+	if( rc == grb::SUCCESS ) {
+		std::cout << "\nBenchmark label: doubleUpdate_update_norm2 of size " << in.n
+			<< std::endl;
+		rc = bench.exec( &(test_doubleUpdate_update_norm2), in, out, 1, outer, true );
+	}
+
+	if( rc != grb::SUCCESS ) {
+		std::cerr << "Test launch failed: " << grb::toString( rc ) << std::endl;
+		std::cout << "Test FAILED\n" << std::endl;
+		return EXIT_FAILURE;
+	}
+
+	if( out.error != grb::SUCCESS ) {
+		std::cerr << "Functional test exits with nonzero exit code. "
+			<< "Reason: " << grb::toString( out.error ) << "." << std::endl;
+		std::cout << "Test FAILED\n" << std::endl;
+		return EXIT_FAILURE;
+	}
+
+	std::cout << "\nNOTE: please check the above performance figures manually-- "
+		<< "the useful timings should positive, indicating speedup for fuselets vs. "
+		<< "regular blocking execution (for large enough vector lengths)\n";
+
+	// done
+	std::cout << "Test OK\n" << std::endl;
+	return EXIT_SUCCESS;
+}
+
diff --git a/tests/performance/performancetests.sh b/tests/performance/performancetests.sh
index f8f198b6a..576b20fc5 100755
--- a/tests/performance/performancetests.sh
+++ b/tests/performance/performancetests.sh
@@ -196,6 +196,15 @@ if [[ -z $DATASETTORUN && ( -z "$EXPTYPE" || "$EXPTYPE" == "KERNEL" ) ]]; then
 	tail -2 ${TEST_OUT_DIR}/dot-openmp
 	egrep 'label|Overall timings|0,' ${TEST_OUT_DIR}/dot-openmp | grep -v Outer >> ${TEST_OUT_DIR}/benchmarks
 
+	echo ">>>      [x]           [x]       Testing fuselets versus standard reference_omp using a"
+	echo "                                 problem size of 100 000 000."
+	echo " "
+	${TEST_BIN_DIR}/fuselets_performance 100000000 1 30 &> ${TEST_OUT_DIR}/fuselets_performance
+	head -1 ${TEST_OUT_DIR}/fuselets_performance
+	grep "Test OK" ${TEST_OUT_DIR}/fuselets_performance || echo "Test FAILED"
+	echo " "
+	egrep 'label|Overall timings|0,' ${TEST_OUT_DIR}/fuselets_performance | grep -v Outer >> ${TEST_OUT_DIR}/benchmarks
+
 fi
 
 # start definition of helper functions for remainder performance tests
diff --git a/tests/performance/spmspm.cpp b/tests/performance/spmspm.cpp
index 84e8bf79c..61dcf769a 100644
--- a/tests/performance/spmspm.cpp
+++ b/tests/performance/spmspm.cpp
@@ -305,7 +305,7 @@ void grbProgram( const struct input &data_in, struct output &out ) {
 			if( s == 0 ) {
 				std::cout << "Info: cold mxm completed"
 					<< ". Time taken was " << single_time << " ms. "
-					<< "Deduced inner repetitions parameter of " << out.rep << " "
+					<< "Deduced inner repetitions parameter of " << deduced_inner_reps << " "
 					<< "to take 1 second or more per inner benchmark.\n";
 				out.rep = deduced_inner_reps;
 			}
diff --git a/tests/smoke/CMakeLists.txt b/tests/smoke/CMakeLists.txt
index 1f99446ee..7e7f2af4a 100644
--- a/tests/smoke/CMakeLists.txt
+++ b/tests/smoke/CMakeLists.txt
@@ -55,6 +55,11 @@ add_grb_executables( manual_hook_grb_collectives_blas1_raw manual_launcher.cpp
 	BACKENDS bsp1d NO_BACKEND_NAME
 )
 
+add_grb_executables( fuselets_smoke ../performance/fuselets.cpp
+	BACKENDS reference_omp NO_BACKEND_NAME
+	ADDITIONAL_LINK_LIBRARIES fuselets
+)
+
 add_grb_executables( small_knn ../unit/auto_launcher.cpp
 	hook/knn.cpp
 	BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking
diff --git a/tests/smoke/smoketests.sh b/tests/smoke/smoketests.sh
index 45793fb42..ce6c3ce6b 100755
--- a/tests/smoke/smoketests.sh
+++ b/tests/smoke/smoketests.sh
@@ -427,6 +427,15 @@ for BACKEND in ${BACKENDS[@]}; do
 				echo " "
 			fi
 
+			if [ "$BACKEND" = "reference_omp" ]; then
+				echo "Non-standard reference-omp specific smoke tests:"
+				echo " "
+				echo ">>>      [x]           [ ]       Tests fuselets"
+				$runner ${TEST_BIN_DIR}/fuselets_smoke 100 1 1 &> ${TEST_OUT_DIR}/fuselets.log
+				head -1 ${TEST_OUT_DIR}/fuselets.log
+				grep "Test OK" ${TEST_OUT_DIR}/fuselets.log || echo "Test FAILED"
+				echo " "
+			fi
 		done
 	done
 
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index f7b71cbad..28aa6cb36 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -190,6 +190,14 @@ add_grb_executables( iteratorFilter iteratorFilter.cpp
 	BACKENDS reference NO_BACKEND_NAME
 )
 
+add_grb_executables( monoids monoids.cpp
+	BACKENDS reference NO_BACKEND_NAME
+)
+
+add_grb_executables( semirings semirings.cpp
+	BACKENDS reference NO_BACKEND_NAME
+)
+
 add_grb_executables( RBGaussSeidel RBGaussSeidel.cpp
 	BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking
 )
diff --git a/tests/unit/monoids.cpp b/tests/unit/monoids.cpp
new file mode 100644
index 000000000..d43473158
--- /dev/null
+++ b/tests/unit/monoids.cpp
@@ -0,0 +1,299 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * Tests the default monoid definitions.
+ *
+ * @author A. N. Yzelman
+ * @date 24th of October, 2024
+ */
+
+#include <graphblas.hpp>
+
+#include <iostream>
+
+
+template< typename Monoid >
+bool runTests() {
+
+	Monoid monoid;
+
+	// get identities (the zeroes) in each input domain
+	const typename Monoid::D1 d1_zero =
+		monoid.template getIdentity< typename Monoid::D1 >();
+	const typename Monoid::D2 d2_zero =
+		monoid.template getIdentity< typename Monoid::D2 >();
+
+	// get nonzeroes in each input domain
+	// without a semiring structure, we cannot just construct one. What we do here
+	// instead is use negation of the earlier-retrieved zeroes.
+	const typename Monoid::D1 d1_nonzero = !(d1_zero);
+	const typename Monoid::D2 d2_nonzero = !(d2_zero);
+
+	// check zero is an identity under addition, zero left:
+	{
+		typename Monoid::D3 tmp;
+		if(
+			grb::apply(
+				tmp, d1_zero, d2_nonzero, monoid.getOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test I\n";
+			return false;
+		}
+		if( tmp != static_cast< typename Monoid::D3 >( d2_nonzero ) ) {
+			std::cerr << "Zero in D1 does not act as an identity\n";
+			return false;
+		}
+	}
+
+	// check zero is an identity under addition, zero right:
+	{
+		typename Monoid::D3 tmp;
+		if(
+			grb::apply(
+				tmp, d1_nonzero, d2_zero, monoid.getOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test II\n";
+			return false;
+		}
+		if( tmp != static_cast< typename Monoid::D3 >( d1_nonzero ) ) {
+			std::cerr << "Zero in D2 does not act as an identity\n";
+			return false;
+		}
+	}
+
+	// check commutativity (if applicable)
+	if( grb::is_commutative< Monoid >::value ) {
+		typename Monoid::D3 left, right;
+		if(
+			grb::apply(
+				left, d1_zero, d2_nonzero, monoid.getOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test III (1)\n";
+			return false;
+		}
+		if(
+			grb::apply(
+				right, d1_nonzero, d2_zero, monoid.getOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test III (2)\n";
+			return false;
+		}
+		if( left != right ) {
+			std::cerr << "Non-commutative behaviour detected "
+				<< "while the commutative type trait was true\n";
+			return false;
+		}
+	}
+
+	// all OK
+	return true;
+}
+
+template<
+	template< typename D1, typename D2 = D1, typename D3 = D2 >
+	class Monoid
+>
+bool runTestsAllDomains() {
+	std::cout << "\t\t testing over doubles:\n";
+	if( runTests< Monoid< double > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over floats:\n";
+	if( runTests< Monoid< float > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over short ints:\n";
+	if( runTests< Monoid< short int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over integers:\n";
+	if( runTests< Monoid< int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over 64-bit integers:\n";
+	if( runTests< Monoid< int64_t > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over short unsigned integers:\n";
+	if( runTests< Monoid< short unsigned int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over unsigned integers:\n";
+	if( runTests< Monoid< unsigned int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over size_ts:\n";
+	if( runTests< Monoid< size_t > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	return true;
+}
+
+int main( int argc, char ** argv ) {
+	if( argc > 1 ) {
+		std::cerr << "This test does not expect any arguments\n"
+			<< "\t Example usage: ./" << argv[ 0 ] << "\n";
+		return 1;
+	}
+
+	std::cout << "This is functional test " << argv[ 0 ] << "\n";
+	bool ok = true;
+	
+	std::cout << "\t testing grb::monoids::plus...\n";
+	if( runTestsAllDomains< grb::monoids::plus >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::add...\n";
+	if( runTestsAllDomains< grb::monoids::add >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::times...\n";
+	if( runTestsAllDomains< grb::monoids::times >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::mul...\n";
+	if( runTestsAllDomains< grb::monoids::mul >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::min...\n";
+	if( runTestsAllDomains< grb::monoids::min >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::max...\n";
+	if( runTestsAllDomains< grb::monoids::max >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::lor over Booleans...\n";
+	if( runTests< grb::monoids::lor< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::land over Booleans...\n";
+	if( runTests< grb::monoids::land< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::lxor over Booleans...\n";
+	if( runTests< grb::monoids::lxor< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::lneq over Booleans...\n";
+	if( runTests< grb::monoids::lneq< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::lxnor over Booleans...\n";
+	if( runTests< grb::monoids::lxnor< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::monoids::leq over Booleans...\n";
+	if( runTests< grb::monoids::leq< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		std::cout << "\t ERR\n";
+		ok = false;
+	}
+
+	// done
+	if( ok ) {
+		std::cout << "Test OK\n" << std::endl;
+	} else {
+		std::cerr << std::flush;
+		std::cout << "Test FAILED\n" << std::endl;
+	}
+}
+
diff --git a/tests/unit/parser.cpp b/tests/unit/parser.cpp
index c484ae8cc..28e3f33bd 100644
--- a/tests/unit/parser.cpp
+++ b/tests/unit/parser.cpp
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
+#include <map>
 #include <cstdio>
+#include <cstdint> // SIZE_MAX
 #include <fstream>
-#include <iostream>
-#include <map>
 #include <sstream>
+#include <iostream>
 #include <stdexcept>
 
 #include "graphblas/synchronizedNonzeroIterator.hpp"
diff --git a/tests/unit/semirings.cpp b/tests/unit/semirings.cpp
new file mode 100644
index 000000000..1dbec312f
--- /dev/null
+++ b/tests/unit/semirings.cpp
@@ -0,0 +1,423 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * Tests the default semiring definitions.
+ *
+ * @author A. N. Yzelman
+ * @date 11th of October, 2024
+ */
+
+#include <graphblas/utils/suppressions.h>
+GRB_UTIL_IGNORE_INT_IN_BOOL_CONTEXT
+
+#include <graphblas.hpp>
+
+#include <iostream>
+
+
+template< typename Semiring >
+bool runTests() {
+
+	Semiring ring;
+
+	// check zero annihilates one under multiplication, zero left:
+	{
+		typename Semiring::D3 tmp;
+		if(
+			grb::apply(
+				tmp,
+				ring.template getZero< typename Semiring::D1 >(),
+				ring.template getOne< typename Semiring::D2 >(),
+				ring.getMultiplicativeOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test I\n";
+			return false;
+		}
+		if( tmp != ring.template getZero< typename Semiring::D3 >() ) {
+			std::cerr << "Zero in D1 does not annihilate one in D2\n";
+			return false;
+		}
+	}
+
+	// check zero annihilates one under multiplication, zero right:
+	{
+		typename Semiring::D3 tmp;
+		if(
+			grb::apply(
+				tmp,
+				ring.template getOne< typename Semiring::D1 >(),
+				ring.template getZero< typename Semiring::D2 >(),
+				ring.getMultiplicativeOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test II\n";
+			return false;
+		}
+		if( tmp != ring.template getZero< typename Semiring::D3 >() ) {
+			std::cerr << "Zero in D2 does not annihilate one in D1\n";
+			return false;
+		}
+	}
+
+	// check zero is an identity under addition, zero left:
+	{
+		typename Semiring::D4 tmp;
+		if(
+			grb::apply(
+				tmp,
+				ring.template getZero< typename Semiring::D3 >(),
+				ring.template getOne< typename Semiring::D4 >(),
+				ring.getAdditiveOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test III\n";
+			return false;
+		}
+		if( tmp != ring.template getOne< typename Semiring::D4 >() ) {
+			std::cerr << "Zero in D3 does not act as an identity under addition\n";
+			return false;
+		}
+	}
+
+	// check zero is an identity under addition, zero right:
+	{
+		typename Semiring::D4 tmp;
+		if(
+			grb::apply(
+				tmp,
+				ring.template getOne< typename Semiring::D3 >(),
+				ring.template getZero< typename Semiring::D4 >(),
+				ring.getAdditiveOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test IV\n";
+			return false;
+		}
+		if( tmp != ring.template getOne< typename Semiring::D4 >() ) {
+			std::cerr << "Zero in D4 does not act as an identity under addition\n";
+			return false;
+		}
+	}
+
+	// check one is an identity under multiplication:
+	{
+		typename Semiring::D3 tmp;
+		if(
+			grb::apply(
+				tmp,
+				ring.template getOne< typename Semiring::D1 >(),
+				ring.template getOne< typename Semiring::D2 >(),
+				ring.getMultiplicativeOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test V\n";
+			return false;
+		}
+		if( tmp != ring.template getOne< typename Semiring::D3 >() ) {
+			std::cerr << "One does not act as identity under multiplication\n";
+			return false;
+		}
+	}
+
+	// check distributive property:
+	{
+		typename Semiring::D4 tmp1;
+		grb::RC rc = grb::apply(
+			tmp1,
+			ring.template getOne< typename Semiring::D3 >(),
+			ring.template getOne< typename Semiring::D4 >(),
+			ring.getAdditiveOperator()
+		);
+		typename Semiring::D3 chk1;
+		rc = rc ? rc : grb::apply(
+				chk1,
+				ring.template getOne< typename Semiring::D1 >(),
+				static_cast< typename Semiring::D2 >(tmp1),
+				ring.getMultiplicativeOperator()
+			);
+		typename Semiring::D3 tmp2, tmp3;
+		rc = rc ? rc : grb::apply(
+				tmp2,
+				ring.template getOne< typename Semiring::D1 >(),
+				ring.template getOne< typename Semiring::D2 >(),
+				ring.getMultiplicativeOperator()
+			);
+		rc = rc ? rc : grb::apply(
+				tmp3,
+				ring.template getOne< typename Semiring::D1 >(),
+				ring.template getOne< typename Semiring::D2 >(),
+				ring.getMultiplicativeOperator()
+			);
+		typename Semiring::D3 chk2;
+		rc = rc ? rc : grb::apply(
+			chk2,
+			tmp2,
+			static_cast< typename Semiring::D4 >(tmp3),
+			ring.getAdditiveOperator()
+		);
+		if( rc != grb::SUCCESS ) {
+			std::cerr << "Unexpected error in test VI\n";
+			return false;
+		}
+		if( chk1 != chk2 ) {
+			std::cerr << "The distributative property does not hold\n";
+			return false;
+		}
+	}
+
+	// check commutativity of additive monoid
+	{
+		typename Semiring::D4 left, right;
+		if(
+			grb::apply(
+				left,
+				ring.template getZero< typename Semiring::D3 >(),
+				ring.template getOne< typename Semiring::D4 >(),
+				ring.getAdditiveOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test VII (1)\n";
+			return false;
+		}
+		if(
+			grb::apply(
+				right,
+				ring.template getOne< typename Semiring::D3 >(),
+				ring.template getZero< typename Semiring::D4 >(),
+				ring.getAdditiveOperator()
+			) != grb::SUCCESS
+		) {
+			std::cerr << "Unexpected error in test VII (2)\n";
+			return false;
+		}
+		if( left != right ) {
+			std::cerr << "Non-commutative behaviour of the additive monoid detected\n";
+			return false;
+		}
+	}
+
+	// all OK
+	return true;
+}
+
+template<
+	template< typename D1, typename D2 = D1, typename D3 = D2, typename D4 = D3 >
+	class Semiring
+>
+bool runTestsAllDomains() {
+	std::cout << "\t\t testing over doubles:\n";
+	if( runTests< Semiring< double > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over floats:\n";
+	if( runTests< Semiring< float > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over short ints:\n";
+	if( runTests< Semiring< short int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over integers:\n";
+	if( runTests< Semiring< int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over 64-bit integers:\n";
+	if( runTests< Semiring< int64_t > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over short unsigned integers:\n";
+	if( runTests< Semiring< short unsigned int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over unsigned integers:\n";
+	if( runTests< Semiring< unsigned int > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	std::cout << "\t\t testing over size_ts:\n";
+	if( runTests< Semiring< size_t > >() ) {
+		std::cout << "\t\t OK\n";
+	} else {
+		std::cout << "\t\t ERR\n";
+		return false;
+	}
+
+	return true;
+}
+
+int main( int argc, char ** argv ) {
+	if( argc > 1 ) {
+		std::cerr << "This test does not expect any arguments\n"
+			<< "\t Example usage: ./" << argv[ 0 ] << "\n";
+		return 1;
+	}
+
+	std::cout << "This is functional test " << argv[ 0 ] << "\n";
+	bool ok = true;
+
+	std::cout << "\t testing grb::semirings::plusTimes...\n";
+	if( runTestsAllDomains< grb::semirings::plusTimes >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::minPlus...\n";
+	if( runTestsAllDomains< grb::semirings::minPlus >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::maxPlus over integers:\n";
+	if( runTests< grb::semirings::maxPlus< int > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::maxPlus over doubles:\n";
+	if( runTests< grb::semirings::maxPlus< double > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::minTimes over unsigned integers:\n";
+	if( runTests< grb::semirings::minTimes< unsigned int > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::minMax...\n";
+	if( runTestsAllDomains< grb::semirings::minMax >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::maxMin...\n";
+	if( runTestsAllDomains< grb::semirings::maxMin >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::maxTimes over size_ts:\n";
+	if( runTests< grb::semirings::maxTimes< size_t > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::plusMin over unsigned integers:\n";
+	if( runTests< grb::semirings::plusMin< unsigned int > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::lorLand over Booleans:\n";
+	if( runTests< grb::semirings::lorLand< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::boolean:\n";
+	if( runTests< grb::semirings::boolean >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::landLor over Booleans:\n";
+	if( runTests< grb::semirings::landLor< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::lxorLand over Booleans:\n";
+	if( runTests< grb::semirings::lxorLand< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::lneqLand over Booleans:\n";
+	if( runTests< grb::semirings::lneqLand< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::lxnorLor over Booleans:\n";
+	if( runTests< grb::semirings::lxnorLor< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	std::cout << "\t testing grb::semirings::leqLor over Booleans:\n";
+	if( runTests< grb::semirings::leqLor< bool > >() ) {
+		std::cout << "\t OK\n";
+	} else {
+		ok = false;
+	}
+
+	// done
+	if( ok ) {
+		std::cout << "Test OK\n" << std::endl;
+	} else {
+		std::cerr << std::flush;
+		std::cout << "Test FAILED\n" << std::endl;
+	}
+}
+
diff --git a/tests/unit/unittests.sh b/tests/unit/unittests.sh
index 8da87a458..592c77870 100755
--- a/tests/unit/unittests.sh
+++ b/tests/unit/unittests.sh
@@ -51,6 +51,18 @@ for MODE in ${MODES}; do
 		echo "                                 field (double, integers, and floats)"
 		${TEST_BIN_DIR}/mul15m_${MODE}
 
+		echo ">>>      [x]           [ ]       Testing pre-defined monoids"
+		${TEST_BIN_DIR}/monoids_${MODE} &> ${TEST_OUT_DIR}/monoids_${MODE}.log
+		head -1 ${TEST_OUT_DIR}/monoids_${MODE}.log
+		grep 'Test OK' ${TEST_OUT_DIR}/monoids_${MODE}.log || echo "Test FAILED"
+		echo " "
+
+		echo ">>>      [x]           [ ]       Testing pre-defined semirings"
+		${TEST_BIN_DIR}/semirings_${MODE} &> ${TEST_OUT_DIR}/semirings_${MODE}.log
+		head -1 ${TEST_OUT_DIR}/semirings_${MODE}.log
+		grep 'Test OK' ${TEST_OUT_DIR}/semirings_${MODE}.log || echo "Test FAILED"
+		echo " "
+
 		echo ">>>      [x]           [ ]       Tests the built-in parser on the west0497 MatrixMarket file"
 		if [ -f ${INPUT_DIR}/west0497.mtx ]; then
 			${TEST_BIN_DIR}/parserTest_${MODE} ${INPUT_DIR}/west0497.mtx 2> ${TEST_OUT_DIR}/parserTest_${MODE}.err 1> ${TEST_OUT_DIR}/parserTest_${MODE}.out