diff --git a/clients/common/lapack/testing_sygvdx_hegvdx.hpp b/clients/common/lapack/testing_sygvdx_hegvdx.hpp
index 40075077e..755bc1b83 100644
--- a/clients/common/lapack/testing_sygvdx_hegvdx.hpp
+++ b/clients/common/lapack/testing_sygvdx_hegvdx.hpp
@@ -1,5 +1,5 @@
 /* **************************************************************************
- * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,8 +27,10 @@
 
 #pragma once
 
+#include "common/matrix_utils/matrix_utils.hpp"
 #include "common/misc/client_util.hpp"
 #include "common/misc/clientcommon.hpp"
+#include "common/misc/clss.hpp"
 #include "common/misc/lapack_host_reference.hpp"
 #include "common/misc/norm.hpp"
 #include "common/misc/rocsolver.hpp"
@@ -206,6 +208,69 @@ void testing_sygvdx_hegvdx_bad_arg()
     }
 }
 
+//
+// If the environment variable:
+//
+// ROCSOLVER_SYGVDX_HEGVDX_USE_LEGACY_TESTS
+//
+// is defined, `sygvdx_hegvdx_getError` will compute errors using the
+// legacy error bounds (for debugging purposes).
+//
+// Otherwise the new error bounds are always used.
+//
+static bool sygvdx_hegvdx_use_legacy_tests()
+{
+    bool status = false;
+    if(std::getenv("ROCSOLVER_SYGVDX_HEGVDX_USE_LEGACY_TESTS") != nullptr)
+    {
+        status = true;
+    }
+    return status;
+}
+
+//
+// The default behaviour of `sygvdx_hegvdx_getError()` is to check if the
+// number of computed eigenvalues match the number of reference eigenvalues,
+// and then to check all computed eigenvalues for their accuracy, but this
+// behaviour can be relaxed.  This leads to two modes of operation: a relaxed
+// check and a full (default) check.  Those are controlled by function
+// `test_for_equality_of_number_of_computed_eigenvalues()`, below, in the
+// following manner:
+//
+// a) If `ROCSOLVER_LAX_EIGENSOLVERS_TESTS` is defined, then the test suite
+// will only use the subset of computed eigenvalues that match reference
+// eigenvalues (up to the given tolerance); except
+//
+// b) If `ROCSOLVER_FULL_EIGENSOLVERS_TESTS` is defined, then the test suite
+// will unconditionally check all eigenvalues for their accuracy.
+//
+// The relaxed tests are intended as a means to decouple the computation of
+// error bounds of eigenvalues and eigenvectors, allowing tests to pass in the
+// case that not all eigenvalues could be accurately computed, but all accurate
+// eigenvalues have accurate eigenvectors.  If eigenvectors are not accurate,
+// the corresponding tests will fail both in full mode and in relaxed mode.
+//
+// Note: the relaxed version of the tests is only supported when using the new
+// error bounds, see also function `sygvdx_hegvdx_use_legacy_tests()`.
+//
+static bool test_for_equality_of_number_of_computed_eigenvalues()
+{
+    bool status = true;
+#if defined(ROCSOLVER_LAX_EIGENSOLVERS_TESTS)
+    status = false;
+#else
+    if(std::getenv("ROCSOLVER_LAX_EIGENSOLVERS_TESTS") != nullptr)
+    {
+        status = false;
+    }
+#endif
+    if(std::getenv("ROCSOLVER_FULL_EIGENSOLVERS_TESTS") != nullptr)
+    {
+        status = true;
+    }
+    return status;
+}
+
 template <bool CPU, bool GPU, typename T, typename Td, typename Th>
 void sygvdx_hegvdx_initData(const rocblas_handle handle,
                             const rocblas_eform itype,
@@ -233,11 +298,15 @@ void sygvdx_hegvdx_initData(const rocblas_handle handle,
         rocblas_init<T>(hA, true);
         rocblas_init<T>(U, true);
 
+        bool use_legacy_tests = sygvdx_hegvdx_use_legacy_tests();
+
         for(rocblas_int b = 0; b < bc; ++b)
         {
             // for testing purposes, we start with a reduced matrix M for the standard equivalent problem
             // with spectrum in a desired range (-20, 20). Then we construct the generalized pair
             // (A, B) from there.
+            memset(hB[b], 0,
+                   sizeof(T) * n * ldb); // since ldb >= n, make sure all entries of B are initialized
             for(rocblas_int i = 0; i < n; i++)
             {
                 // scale matrices and set hA = M (symmetric/hermitian), hB = U (upper triangular)
@@ -316,15 +385,23 @@ void sygvdx_hegvdx_initData(const rocblas_handle handle,
                 {
                     for(rocblas_int j = 0; j < n; j++)
                     {
-                        if(itype != rocblas_eform_bax)
+                        if(use_legacy_tests)
                         {
-                            A[b][i + j * lda] = hA[b][i + j * lda];
-                            B[b][i + j * ldb] = hB[b][i + j * ldb];
+                            if(itype != rocblas_eform_bax)
+                            {
+                                A[b][i + j * lda] = hA[b][i + j * lda];
+                                B[b][i + j * ldb] = hB[b][i + j * ldb];
+                            }
+                            else
+                            {
+                                A[b][i + j * lda] = hB[b][i + j * ldb];
+                                B[b][i + j * ldb] = hA[b][i + j * lda];
+                            }
                         }
                         else
                         {
-                            A[b][i + j * lda] = hB[b][i + j * ldb];
-                            B[b][i + j * ldb] = hA[b][i + j * lda];
+                            A[b][i + j * lda] = hA[b][i + j * lda];
+                            B[b][i + j * ldb] = hB[b][i + j * ldb];
                         }
                     }
                 }
@@ -378,6 +455,8 @@ void sygvdx_hegvdx_getError(const rocblas_handle handle,
                             double* max_err,
                             const bool singular)
 {
+    using HMat = HostMatrix<T, rocblas_int>;
+    using BDesc = typename HMat::BlockDescriptor;
     constexpr bool COMPLEX = rocblas_is_complex<T>;
 
     int lwork = (COMPLEX ? 2 * n : 8 * n);
@@ -390,11 +469,68 @@ void sygvdx_hegvdx_getError(const rocblas_handle handle,
     std::vector<int> hIfail(n);
     host_strided_batch_vector<T> A(lda * n, 1, lda * n, bc);
     host_strided_batch_vector<T> B(ldb * n, 1, ldb * n, bc);
+    std::vector<closest_largest_subsequences<S>> clss(bc);
+    std::vector<bool> skip_test(bc, false);
+
+    bool use_legacy_tests = sygvdx_hegvdx_use_legacy_tests();
+    bool test_for_equality = test_for_equality_of_number_of_computed_eigenvalues();
 
     // input data initialization
     sygvdx_hegvdx_initData<true, true, T>(handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc,
                                           hA, hB, A, B, true, singular);
 
+    // CPU lapack
+    // abstol = 0 ensures max accuracy in rocsolver; for lapack we should use 2*safemin
+    S atol = 2 * get_safemin<S>();
+    for(rocblas_int b = 0; b < bc; ++b)
+    {
+        cpu_sygvx_hegvx(itype, evect, erange, uplo, n, hA[b], lda, hB[b], ldb, vl, vu, il, iu, atol,
+                        hNev[b], hW[b], hZ[b], ldz, work.data(), lwork, rwork.data(), iwork.data(),
+                        hIfail.data(), hInfo[b]);
+
+        // Capture failures where B is not positive definite (hInfo[b][0] > n),
+        // or where the i-argument has an illegal value (hInfo[b][0] < 0).  All other LAPACK
+        // failures skip the test.
+        if((hInfo[b][0] > 0) && (hInfo[b][0] <= n))
+        {
+            skip_test[b] = true;
+        }
+    }
+
+    //
+    // Given an eigenvalue l_i of the symmetric matrix A and a computed
+    // eigenvalue l_i^* (obtained with a backward stable method), Weyl's
+    // theorem yields |l_i - l_i^*| <= K*ulp*||A||_2, where K depends on n.
+    // For the sake of this test, we will set K = C * n, with C ~ 1.
+    //
+    // Thus, if the range to look for eigenvalues is the interval (vl, vu],
+    // calls to the solver should look for computed eigenvalues in the range
+    // (vl - tol, vu + tol], where `tol = C * n * ulp * ||A||`.
+    //
+    S C = 4;
+    std::vector<S> tols(bc, 0);
+    std::vector<S> norms(bc, 0);
+    S tol = 0;
+    for(rocblas_int b = 0; b < bc; ++b)
+    {
+        if(hNev[b][0] > 0)
+        {
+            // Get lapack eigenvalues (reference to which rocSOLVER's sygvdx will be compared to)
+            auto eigsLapack = *HMat::Convert(hW[b], hNev[b][0], 1);
+            norms[b] = eigsLapack.max_coeff_norm();
+        }
+        else
+        {
+            norms[b] = S(0);
+        }
+
+        tols[b] = C * n * std::numeric_limits<S>::epsilon() * norms[b];
+        if(std::isfinite(tols[b]) && (tols[b] > tol))
+        {
+            tol = tols[b];
+        }
+    }
+
     // execute computations
     // GPU lapack
     CHECK_ROCBLAS_ERROR(rocsolver_sygvdx_hegvdx(
@@ -407,101 +543,225 @@ void sygvdx_hegvdx_getError(const rocblas_handle handle,
     if(evect != rocblas_evect_none)
         CHECK_HIP_ERROR(hZRes.transfer_from(dZ));
 
-    // CPU lapack
-    // abstol = 0 ensures max accuracy in rocsolver; for lapack we should use 2*safemin
-    S atol = 2 * get_safemin<S>();
-    for(rocblas_int b = 0; b < bc; ++b)
-    {
-        cpu_sygvx_hegvx(itype, evect, erange, uplo, n, hA[b], lda, hB[b], ldb, vl, vu, il, iu, atol,
-                        hNev[b], hW[b], hZ[b], ldz, work.data(), lwork, rwork.data(), iwork.data(),
-                        hIfail.data(), hInfo[b]);
-    }
-
-    // (We expect the used input matrices to always converge. Testing
-    // implicitly the equivalent non-converged matrix is very complicated and it boils
-    // down to essentially run the algorithm again and until convergence is achieved.
-    // We do test with indefinite matrices B).
+    // Except for the cases in which B is indefinite, we expect the eigensolver
+    // to converge for all input matrices.
 
-    // check info for non-convergence and/or positive-definiteness
+    // check info for illegal values and/or positive-definiteness
     *max_err = 0;
     for(rocblas_int b = 0; b < bc; ++b)
     {
+        // Capture failures where B is not positive definite (hInfo[b][0] > n),
+        // or where the i-argument has an illegal value (hInfo[b][0] < 0).  All other LAPACK
+        // failures skip the test.
+        if(skip_test[b])
+            continue;
+
         EXPECT_EQ(hInfo[b][0], hInfoRes[b][0]) << "where b = " << b;
         if(hInfo[b][0] != hInfoRes[b][0])
             *max_err += 1;
-    }
 
-    // Check number of returned eigenvalues
-    for(rocblas_int b = 0; b < bc; ++b)
-    {
-        EXPECT_EQ(hNev[b][0], hNevRes[b][0]) << "where b = " << b;
-        if(hNev[b][0] != hNevRes[b][0])
-            *max_err += 1;
+        auto numMatchingEigs = clss[b](hW[b], hNev[b][0], hWRes[b], hNevRes[b][0], tols[b]);
+        if(test_for_equality)
+        {
+            EXPECT_EQ(hNev[b][0], numMatchingEigs) << "where b = " << b;
+            if(hNev[b][0] != numMatchingEigs)
+                *max_err += 1;
+        }
     }
 
+    //
+    // Compute errors
+    //
     double err;
 
     for(rocblas_int b = 0; b < bc; ++b)
     {
+        auto [lapackEigs, rocsolverEigs] = clss[b].subseqs();
+        auto [_, rocsolverEigsIds] = clss[b].subseqs_ids();
+        auto numMatchingEigs = rocsolverEigs.size();
+
+        // Number of eigenvalues computed by rocSOLVER
+        auto numRocsolverEigs = hNevRes[b][0];
+
+        // Only check accuracy for tests in which both computed and reference values exist and are well defined.
+        if(skip_test[b] || (numMatchingEigs == 0) || (hInfo[b][0] != 0))
+            continue;
+
         if(evect == rocblas_evect_none)
         {
-            // only eigenvalues needed; can compare with LAPACK
+            //
+            // Only eigenvalues
+            //
 
-            // error is ||hW - hWRes|| / ||hW||
-            // using frobenius norm
-            if(hInfo[b][0] == 0)
+            if(use_legacy_tests)
+            {
+                err = norm_error('F', 1, numMatchingEigs, 1, lapackEigs.data(), rocsolverEigs.data());
+                *max_err = err > *max_err ? err : *max_err;
+            }
+            else
             {
-                err = norm_error('F', 1, hNev[b][0], 1, hW[b], hWRes[b]);
+                // Get computed eigenvalues
+                auto eigs
+                    = *HMat::Convert(rocsolverEigs.data(), rocsolverEigs.size(),
+                                     1); // convert eigenvalues from type S to type T, if required
+
+                // Get lapack (reference) eigenvalues
+                auto eigsRef
+                    = *HMat::Convert(lapackEigs.data(), lapackEigs.size(),
+                                     1); // convert eigenvalues from type S to type T, if required
+                err = (eigs - eigsRef).norm() / eigsRef.norm();
                 *max_err = err > *max_err ? err : *max_err;
             }
         }
         else
         {
-            // both eigenvalues and eigenvectors needed; need to implicitly test
-            // eigenvectors due to non-uniqueness of eigenvectors under scaling
-            if(hInfo[b][0] == 0)
+            //
+            // Both eigenvalues and eigenvectors
+            //
+
+            if(use_legacy_tests)
             {
                 T alpha = 1;
                 T beta = 0;
 
                 // hZRes contains eigenvectors x
                 // compute B*x (or A*x) and store in hB
-                cpu_symm_hemm(rocblas_side_left, uplo, n, hNev[b][0], alpha, B[b], ldb, hZRes[b],
-                              ldz, beta, hB[b], ldb);
+                cpu_symm_hemm(rocblas_side_left, uplo, n, numRocsolverEigs, alpha, B[b], ldb,
+                              hZRes[b], ldz, beta, hB[b], ldb);
 
+                auto [_, hWResIds] = clss[b].subseqs_ids();
                 if(itype == rocblas_eform_ax)
                 {
                     // problem is A*x = (lambda)*B*x
 
                     // compute (1/lambda)*A*x and store in hA
-                    for(int j = 0; j < hNev[b][0]; j++)
+                    for(int j = 0; j < numMatchingEigs; j++)
                     {
-                        alpha = T(1) / hWRes[b][j];
-                        cpu_symv_hemv(uplo, n, alpha, A[b], lda, hZRes[b] + j * ldz, 1, beta,
+                        int jj = hWResIds[j]; // Id of rocSOLVER eigen-pair associated to j-th LAPACK eigen-pair
+                        alpha = T(1) / hWRes[b][jj];
+                        cpu_symv_hemv(uplo, n, alpha, A[b], lda, hZRes[b] + jj * ldz, 1, beta,
                                       hA[b] + j * lda, 1);
                     }
 
                     // move B*x into hZRes
                     for(rocblas_int i = 0; i < n; i++)
-                        for(rocblas_int j = 0; j < hNev[b][0]; j++)
-                            hZRes[b][i + j * ldz] = hB[b][i + j * ldb];
+                    {
+                        for(rocblas_int j = 0; j < numMatchingEigs; j++)
+                        {
+                            int jj = hWResIds[j]; // Id of rocSOLVER eigen-pair associated to j-th LAPACK eigen-pair
+                            hZRes[b][i + j * ldz] = hB[b][i + jj * ldb];
+                        }
+                    }
                 }
                 else
                 {
                     // problem is A*B*x = (lambda)*x or B*A*x = (lambda)*x
 
                     // compute (1/lambda)*A*B*x or (1/lambda)*B*A*x and store in hA
-                    for(int j = 0; j < hNev[b][0]; j++)
+                    for(int j = 0; j < numMatchingEigs; j++)
                     {
-                        alpha = T(1) / hWRes[b][j];
-                        cpu_symv_hemv(uplo, n, alpha, A[b], lda, hB[b] + j * ldb, 1, beta,
+                        int jj = hWResIds[j]; // Id of rocSOLVER eigen-pair associated to j-th LAPACK eigen-pair
+                        alpha = T(1) / hWRes[b][jj];
+                        cpu_symv_hemv(uplo, n, alpha, A[b], lda, hB[b] + jj * ldb, 1, beta,
                                       hA[b] + j * lda, 1);
                     }
+                    // move hZRes
+                    for(rocblas_int i = 0; i < n; i++)
+                    {
+                        for(rocblas_int j = 0; j < numMatchingEigs; j++)
+                        {
+                            int jj = hWResIds[j]; // Id of rocSOLVER eigen-pair associated to j-th LAPACK eigen-pair
+                            if(j != jj)
+                                hZRes[b][i + j * ldz] = hZRes[b][i + jj * ldz];
+                        }
+                    }
                 }
 
                 // error is ||hA - hZRes|| / ||hA||
                 // using frobenius norm
-                err = norm_error('F', n, hNev[b][0], lda, hA[b], hZRes[b], ldz);
+                err = norm_error('F', n, numMatchingEigs, lda, hA[b], hZRes[b], ldz);
+                *max_err = err > *max_err ? err : *max_err;
+            }
+            else // if(!use_legacy_tests)
+            {
+                //
+                // Prepare input
+                //
+
+                // Get computed eigenvalues
+                auto eigs
+                    = *HMat::Convert(rocsolverEigs.data(), rocsolverEigs.size(),
+                                     1); // convert eigenvalues from type S to type T, if required
+
+                // Get lapack (reference) eigenvalues
+                auto eigsRef
+                    = *HMat::Convert(lapackEigs.data(), lapackEigs.size(),
+                                     1); // convert eigenvalues from type S to type T, if required
+
+                // Create thin wrappers of input matrices A and B
+                auto AWrap = HMat::Wrap(A.data() + b * lda * n, lda, n);
+                auto BWrap = HMat::Wrap(B.data() + b * ldb * n, ldb, n);
+
+                // We want the sub-blocks starting from row 0, col 0 and with size n x n of A and B
+                auto A_b = (*AWrap).block(BDesc().nrows(n).ncols(n));
+                auto B_b = (*BWrap).block(BDesc().nrows(n).ncols(n));
+
+                // Get computed eigenvectors
+                auto V_b
+                    = (*HMat::Wrap(hZRes[b], ldz, n)).block(BDesc().nrows(n).ncols(numRocsolverEigs));
+
+                // If rocSOLVER computed more eigen-pairs then the number of
+                // reference eigenvalues, select the eigen-pairs that match the
+                // reference
+                if(numRocsolverEigs > numMatchingEigs)
+                {
+                    rocblas_int ii;
+                    for(rocblas_int i = 0; i < numMatchingEigs; ++i)
+                    {
+                        ii = rocsolverEigsIds[i];
+                        V_b.col(i, V_b.col(ii));
+                    }
+                    V_b = V_b.block(BDesc().nrows(n).ncols(numMatchingEigs));
+                }
+
+                //
+                // Check eigenpairs' accuracy with a "Relative Weyl" error
+                // bound, which (at its simplest form) states the following.
+                //
+                // Let X (cond(X) < Inf), and A (A^* = A) be such that A has
+                // eigenvalues {a_i} and H = X^t*A*X has eigenvalues {h_i}.
+                // Then:
+                //
+                // |a_i - h_i| <= |a_i|*||X^t*X - I||_2
+                //
+                // Note: for rocSOLVER's sygv, if V is the eigenvectors' matrix
+                // and B = L*L^t, then either X = L^t*V (cases 1 and 2) or X =
+                // inv(L)*V (case 3).
+                //
+                auto VE = HMat::Empty();
+                if(itype == rocblas_eform_bax)
+                {
+                    VE = adjoint(V_b) * inv(B_b) * V_b - HMat::Eye(numMatchingEigs);
+                }
+                else // if ((itype == rocblas_eform_ax) || (itype == rocblas_eform_abx))
+                {
+                    VE = adjoint(V_b) * B_b * V_b - HMat::Eye(numMatchingEigs);
+                }
+                S eta = std::max(VE.norm(), std::numeric_limits<S>::epsilon());
+                *max_err = eta > *max_err ? eta : *max_err;
+
+                auto AE = HMat::Empty();
+                if(itype == rocblas_eform_abx)
+                {
+                    auto Z = B_b * V_b;
+                    AE = adjoint(Z) * A_b * Z - HMat::Zeros(numMatchingEigs).diag(eigs);
+                }
+                else // if ((itype == rocblas_eform_ax) || (itype == rocblas_eform_bax))
+                {
+                    AE = adjoint(V_b) * A_b * V_b - HMat::Zeros(numMatchingEigs).diag(eigs);
+                }
+                err = AE.norm() / eigsRef.norm();
+                err *= std::numeric_limits<S>::epsilon() / eta;
                 *max_err = err > *max_err ? err : *max_err;
             }
         }
@@ -834,9 +1094,9 @@ void testing_sygvdx_hegvdx(Arguments& argus)
     }
 
     // validate results for rocsolver-test
-    // using 3 * n * machine_precision as tolerance
+    // using 5 * n * machine_precision as tolerance
     if(argus.unit_check)
-        ROCSOLVER_TEST_CHECK(T, max_error, 3 * n);
+        ROCSOLVER_TEST_CHECK(T, max_error, 5 * n);
 
     // output results for rocsolver-bench
     if(argus.timing)
diff --git a/clients/common/matrix_utils/host_matrix.hpp b/clients/common/matrix_utils/host_matrix.hpp
index fd84e100b..df6b2eae6 100644
--- a/clients/common/matrix_utils/host_matrix.hpp
+++ b/clients/common/matrix_utils/host_matrix.hpp
@@ -1,5 +1,5 @@
 /* **************************************************************************
- * Copyright (C) 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (C) 2018-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -113,14 +113,14 @@ class HostMatrix : public MatrixInterface<T_, I_>
         return ptr;
     }
 
-    template <typename S_>
-    static auto Convert(const HostMatrix<S_, I_>& In) -> HostMatrix<T_, I_>
+    template <template <typename, typename> class HostMatrix_, typename TT_, typename II_>
+    static auto Convert(const HostMatrix_<TT_, II_>& In) -> HostMatrix<T_, I_>
     {
         HostMatrix<T_, I_> Out(In.nrows(), In.ncols());
 
         for(I i = 0; i < Out.size(); ++i)
         {
-            Out[i] = T(In[i]);
+            Out[i] = T_(In[i]);
         }
 
         return Out;
@@ -440,6 +440,7 @@ class HostMatrix : public MatrixInterface<T_, I_>
 
         nrows_ = nrows;
         ncols_ = ncols;
+        ld_ = nrows;
 
         return true;
     }
@@ -1316,4 +1317,77 @@ auto eig_lower(const HostMatrix_<T, I>& A)
     return std::make_tuple(U, Lambda);
 }
 
+template <template <typename, typename> class HostMatrix_, typename T, typename I>
+auto svd(const HostMatrix_<T, I>& A) -> std::tuple<HostMatrix_<T, I> /* Left Singular Vectors: U */,
+                                                   HostMatrix_<T, I> /* Singular Values */,
+                                                   HostMatrix_<T, I> /* Right Singular Vectors */>
+{
+    using S = typename HostMatrix_<T, I>::S;
+
+    I nrows = A.nrows();
+    I ncols = A.ncols();
+
+    I dim = std::min(nrows, ncols);
+    HostMatrix_<T, I> U(nrows, nrows), V(ncols, ncols), Sigma(nrows, ncols);
+    HostMatrix_<S, I> sigma_diag(dim, 1);
+
+    if constexpr(std::is_same<std::decay_t<I>, int>::value)
+    {
+        detail::lapack_ge_svd(A.data(), nrows, ncols, U.data(), sigma_diag.data(), V.data());
+    }
+    else
+    {
+        bool within_lapack_limits
+            = static_cast<std::int64_t>(nrows) * static_cast<std::int64_t>(ncols)
+                <= static_cast<std::int64_t>(std::numeric_limits<int>::max())
+            && static_cast<std::int64_t>(nrows)
+                <= static_cast<std::int64_t>(std::numeric_limits<int>::max())
+            && static_cast<std::int64_t>(ncols)
+                <= static_cast<std::int64_t>(std::numeric_limits<int>::max());
+
+        if(within_lapack_limits)
+        {
+            detail::lapack_ge_svd(A.data(), static_cast<int>(nrows), static_cast<int>(ncols),
+                                  U.data(), sigma_diag.data(), V.data());
+        }
+        else
+        {
+            throw std::domain_error(
+                "Error computing svd(A): A.nrows(), A.ncols(), A.nrows()*A.ncols() must be "
+                "smaller or equal to INT_MAX");
+        }
+    }
+
+    // Lapack *gesvd returns V^* instead of V.
+    Sigma.diag(HostMatrix_<T, I>::Convert(sigma_diag));
+    V = adjoint(V);
+    return std::make_tuple(U, Sigma, V);
+}
+
+template <template <typename, typename> class HostMatrix_, typename T, typename I>
+auto inv(const HostMatrix_<T, I>& A) -> HostMatrix_<T, I> /* Pseudo-Inverse of A */
+{
+    using S = typename HostMatrix_<T, I>::S;
+
+    auto [U, Sigma, V] = svd(A);
+    I nrows = A.nrows();
+    I ncols = A.ncols();
+    I dim = std::min(nrows, ncols);
+
+    for(I i = 0; i < dim; ++i)
+    {
+        if(std::abs(Sigma(i, i)) > std::max(std::numeric_limits<S>::min(), S(0)))
+        {
+            Sigma(i, i) = T(1) / Sigma(i, i);
+        }
+        else
+        {
+            Sigma(i, i) = T(0);
+        }
+    }
+
+    auto iA = adjoint(U * Sigma * adjoint(V));
+    return iA;
+}
+
 } // namespace matxu
diff --git a/clients/common/matrix_utils/matrix_utils_detail.hpp b/clients/common/matrix_utils/matrix_utils_detail.hpp
index ca41a255a..1f890fa04 100644
--- a/clients/common/matrix_utils/matrix_utils_detail.hpp
+++ b/clients/common/matrix_utils/matrix_utils_detail.hpp
@@ -1,5 +1,5 @@
 /* **************************************************************************
- * Copyright (C) 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (C) 2018-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -191,6 +191,30 @@ namespace detail
         return (info == 0);
     }
 
+    // Compute singular values and singular vectors of A with lapack_*gesvd
+    template <typename T, typename S>
+    bool lapack_ge_svd(T const* A, const int nrows, const int ncols, T* U, S* D, T* V)
+    {
+        if(A == nullptr || nrows < 1 || ncols < 1)
+        {
+            return false;
+        }
+
+        int info;
+        int worksize = 32 * std::max(1, 2 * std::min(nrows, ncols) + std::max(nrows, ncols));
+        std::vector<T> work(worksize, T(0.));
+        int worksize_real = 5 * std::min(nrows, ncols);
+        std::vector<S> work_real(worksize_real, S(0.));
+        T* Acpy;
+        Acpy = (T*)malloc(sizeof(T) * nrows * ncols);
+        memcpy(Acpy, A, sizeof(T) * nrows * ncols);
+        cpu_gesvd(rocblas_svect_all, rocblas_svect_all, nrows, ncols, Acpy, nrows, D, U, nrows, V,
+                  ncols, work.data(), worksize, work_real.data(), &info);
+        free(Acpy);
+
+        return (info == 0);
+    }
+
 } // namespace detail
 
 } // namespace matxu
diff --git a/clients/common/misc/clss.hpp b/clients/common/misc/clss.hpp
new file mode 100644
index 000000000..3eaf7c30f
--- /dev/null
+++ b/clients/common/misc/clss.hpp
@@ -0,0 +1,797 @@
+/* **************************************************************************
+ * Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * *************************************************************************/
+
+#pragma once
+
+#include <complex>
+#include <cstring>
+#include <iomanip>
+#include <mutex>
+#include <sstream>
+#include <type_traits>
+#include <vector>
+
+//
+// @brief `class closest_largest_subsequences`: Functor to compute the closest
+// largest subsequences of a given pair of sequences.
+//
+// Given a tolerance `tol` and a pair of sequences:
+//
+// (a_i), (b_j) with 0 <= i <= n, 0 <= j <= m;
+//
+// `closest_largests_subsequences` (`clss`) extracts the subsequences:
+//
+// (a_l) .=. (a_l1, a_l2, ..., a_lP) with i <= l1 < l2 < ... < lP <= n, and
+// (b_k) .=. (b_k1, b_k2, ..., b_kP) with j <= k1 < k2 < ... < kP <= m;
+//
+// (where 0 <= P <= n, m) that satisfy the following properties:
+//
+// 1. |a_l1 - b_k1| <= tol, |a_l2 - b_k2| <= tol, ..., |a_lP - b_kP| <= tol;
+//
+// 2. maximizes P (the size of the subsequences); and, for this maximal P,
+//
+// 3. minimizes ||a_l - b_k||_1 = \sum_{1 <= q <= P} |a_l_q - b_k_q|;
+//
+// in O(max{n, m}^2) space and time.  For a commented example, see Usage
+// section down below.
+//
+// \tparam T Type of elements in sequences (a_i), (b_j); expected to be an
+// arithmetic type; otherwise, T must be endowed with an overload to
+// `operator<` that defines a strict partial ordear.
+//
+// \tparam I Signed integer type to index the sequences.
+//
+//
+// ## Usage:
+//
+// Functor `clss` primary use is to improve the tests of the expert
+// eigensolvers' drivers, and allow extracting a sub-sequence of the computed
+// eigenvalues that matches a given list of eigenvalues.
+//
+// For example, consider the use of the bisection driver (STEBZ) to compute the
+// eigenvalues of a matrix A with two irreducible blocks.  The spectrum of A is
+// given as:
+//
+// - eig(A) = {-2., -1., 1., 2., -3., -2., -1., 1., 2.}.
+//
+// Say that the eigenvalues computed by STEBZ (grouped with the "by block"
+// ordering) are (to working precision `eps` = 0.015):
+//
+// - STEBZ::eig(A) = {-2., -0.99, 1.01, 1.99, -3.0, -2.01, -0.99, 1.01, 2.01},
+//
+// and those are meant to be compared with matrix eig(A) in the range (-1, 2].
+// One would find that
+//
+// - eig(A) \intersect (-1, 2] = {1., 2., 1., 2.}; whereas
+//
+// - STEBZ::eig(A) \intersect (-1, 2] = {-0.99, 1.01, 1.99, -0.99, 1.01}.
+//
+// Even though the computation is correct to working precision, the sets
+// `eig(A)` and `STEBZ::eig(A)` have different sizes (which breaks tests that
+// target their equality) and unmatched eigenvalues (which breaks tests that
+// compare the eigenvalues directly).
+//
+// One can avoid such problems by, instead, comparing the sub-sequences
+// produced by using functor `clss` with inputs:
+//
+// i) eig(A) \intersect (-1, 2];
+//
+// ii) STEBZ eigenvalues in the interval (-1 - tol, 2 + tol]; and
+//
+// iii) tolerance `tol` (which will be arbitrarily set to 2*`eps` = 0.03 here;
+// in general, `tol` is a function of `eps` and matrix A).
+//
+// For this example, such a call would look like:
+//
+// - `clss({1., 2., 1., 2.}, {-0.99, 1.01, 1.99, -0.99, 1.01, 2.01}, tol)`;
+//
+// which yields the subsequences (obtained with `clss::subseqs`):
+//
+// - {1.,   2.,   1.,   2.}, (i.e., the reference eigenvalues) and
+//
+// - {1.01, 1.99, 1.01, 2.01};
+//
+// where the latter is the maximal subsequence of STEBZ::eig(A) that satisfies
+// properties (1), (2) and (3) of the definition of `clss` above.
+//
+// For this example, the computed `clss::distance` (i.e., the subsequences' l^1
+// distance) is 0.04, and the computed `clss::inf_norm_distance` (i.e., the
+// sub-sequences' l^\inf distance) is `0.01`.
+//
+// Moreover, method `clss::subseqs_ids` returns the indices of the elements of
+// the subsequences in their original sequences.  For this example,
+// `clss::subseqs_ids` would return:
+//
+// - {0, 1, 2, 3}, (i.e., indices in the reference eigenvalues list) and
+//
+// - {1, 2, 4, 5};
+//
+// where the latter contains the indices of the elements of the second
+// subsequence ({1.01, 1.99, 1.01, 2.01}) with respect to the original sequence
+// they belong to ({-0.99, 1.01, 1.99, -0.99, 1.01, 2.01}, meant to have been
+// computed by STEBZ).
+//
+template <typename T,
+          typename I = std::int64_t,
+          typename = typename std::enable_if<std::is_signed<std::decay_t<I>>::value>::type>
+class closest_largest_subsequences
+{
+public:
+    using S = decltype(std::real(T{}));
+
+    //
+    // Computes the largest closest subsequences of input sequences `a` and `b`.
+    //
+    // \param a:      pointer to first sequence, array of const T.
+    //
+    // \param size_a: number of elements in first sequence.
+    //
+    // \param b:      pointer to second sequence, array of const T.
+    //
+    // \param size_b: number of elements in second sequence.
+    //
+    // \return size of subsequences (equals the maximal number of matching
+    // elements of the original sequences)
+    //
+
+    [[maybe_unused]] auto operator()(T const* a, I size_a, T const* b, I size_b, S tol)
+        -> /**! Size of subsequences */ I
+    {
+        std::lock_guard<std::mutex> lock(m_);
+
+        clear();
+        if((size_a > 0) && (size_b > 0) && (tol >= 0))
+        {
+            //
+            // Initialize members
+            //
+            this->tol_ = tol;
+            this->size_a_ = size_a;
+            this->size_b_ = size_b;
+            this->memo_distances_.resize(size_a * size_b, std::numeric_limits<S>::infinity());
+            this->memo_sizes_.resize(size_a * size_b, S(-1));
+            this->memo_next_.resize(size_a * size_b, I(-1));
+            // Copy original sequences for debugging purposes
+            this->seq_a_.resize(size_a, T(0));
+            memcpy(seq_a_.data(), a, sizeof(T) * size_a);
+            this->seq_b_.resize(size_b, T(0));
+            memcpy(seq_b_.data(), b, sizeof(T) * size_b);
+
+            //
+            // Call recursive, memoized, implementation to compute subsequences
+            //
+            auto [distance, sseqs_size, _] = clss_implr(a, size_a - 1, b, size_b - 1);
+            this->distance_ = distance;
+            this->sseqs_size_ = sseqs_size;
+
+            //
+            // Extract `sseq_a_` and `sseq_b_` from `a` and `b` and set:
+            // inf_norm_ = ||sseq_a_ - sseq_b_||_inf
+            //
+            this->inf_norm_ = extract_subsequences(a, size_a, b, size_b);
+        }
+
+        return sseqs_size_;
+    }
+
+    //
+    // Computes the largest closest subsequences of input sequences `a` and `b`.
+    //
+    // \param a:      pointer to first sequence, array of T.
+    //
+    // \param size_a: number of elements in first sequence.
+    //
+    // \param b:      pointer to second sequence, array of T.
+    //
+    // \param size_b: number of elements in second sequence.
+    //
+    // \return size of subsequences (equals the maximal number of matching
+    // elements of the original sequences)
+    //
+    [[maybe_unused]] auto operator()(T* a, I size_a, T* b, I size_b, S tol)
+        -> /**! Size of subsequences */ I
+    {
+        return this->operator()(const_cast<T const*>(a), size_a, const_cast<T const*>(b), size_b,
+                                tol);
+    }
+
+    //
+    // Computes the largest closest subsequences of input sequences `a` and `b`.
+    //
+    // \param a:      pointer to first sequence, array of const T.
+    //
+    // \param size_a: number of elements in first sequence; type can differ from
+    // template parameter I.
+    //
+    // \param b:      pointer to second sequence, array of const T.
+    //
+    // \param size_b: number of elements in second sequence; type can differ from
+    // template parameter I.
+    //
+    // \return size of subsequences (equals the maximal number of matching
+    // elements of the original sequences)
+    //
+    template <typename J, typename = typename std::enable_if<std::is_integral<J>::value>::type>
+    [[maybe_unused]] auto operator()(T const* a, J size_a, T const* b, J size_b, S tol)
+        -> /**! Size of subsequences */ I
+    {
+        return this->operator()(a, static_cast<I>(size_a), b, static_cast<I>(size_b), tol);
+    }
+
+    //
+    // Computes the largest closest subsequences of input sequences `a` and `b`.
+    //
+    // \param a:      pointer to first sequence, array of T.
+    //
+    // \param size_a: number of elements in first sequence; type can differ from
+    // template parameter I.
+    //
+    // \param b:      pointer to second sequence, array of T.
+    //
+    // \param size_b: number of elements in second sequence; type can differ from
+    // template parameter I.
+    //
+    // \return size of subsequences (equals the maximal number of matching
+    // elements of the original sequences)
+    //
+    template <typename J, typename = typename std::enable_if<std::is_integral<J>::value>::type>
+    [[maybe_unused]] auto operator()(T* a, J size_a, T* b, J size_b, S tol)
+        -> /**! Size of subsequences */ I
+    {
+        return this->operator()(const_cast<T const*>(a), static_cast<I>(size_a),
+                                const_cast<T const*>(b), static_cast<I>(size_b), tol);
+    }
+
+    //
+    // Computes the largest closest subsequences of input sequences `a` and `b`.
+    //
+    // \param a:      first sequence, const vector of T.
+    //
+    // \param b:      second sequence, const vector of T.
+    //
+    // \return size of subsequences (equals the maximal number of matching
+    // elements of the original sequences)
+    //
+    [[maybe_unused]] auto operator()(const std::vector<T>& a, const std::vector<T>& b, S tol)
+        -> /**! Size of subsequences */ I
+    {
+        return this->operator()(a.data(), a.size(), b.data(), b.size(), tol);
+    }
+
+    //
+    // Returns the l^1 distance between subsequences, or Inf if at least one of
+    // them is empty.
+    //
+    // \return l^1 distance between subsequences.
+    //
+    auto distance() -> S
+    {
+        std::lock_guard<std::mutex> lock(m_);
+        return distance_;
+    }
+
+    //
+    // Returns the l^\inf distance between subsequences, or Inf if at least one
+    // of them is empty.
+    //
+    // \return l^\inf distance between subsequences.
+    //
+    auto inf_norm_distance() -> S
+    {
+        std::lock_guard<std::mutex> lock(m_);
+        return inf_norm_;
+    }
+
+    //
+    // Returns the indices of the elements of the subsequences in their
+    // original sequences.
+    //
+    // Let a, b denote the original sequences, and sseq_a, sseq_b denote
+    // subsequences computed by functor `clss`.  Write:
+    //
+    // `auto [a_ids, b_ids] = clss::subseqs_ids();`
+    //
+    // Then:
+    //
+    // a) For 0 <= i < sseq_a.size(), sseq_a[i] == a[a_ids[i]];
+    //
+    // b) For 0 <= j < sseq_b.size(), sseq_b[i] == b[b_ids[j]].
+    //
+    // \return std::pair of std::vector containing indices of subsequences'
+    // elements as they appear in the original sequences.
+    //
+    auto subseqs_ids() -> std::pair<std::vector<S>, std::vector<S>>
+    {
+        std::lock_guard<std::mutex> lock(m_);
+        return std::make_pair(sseq_a_ids_, sseq_b_ids_);
+    }
+
+    //
+    // Returns two subsequences satisfying properties (1), (2) and (3)
+    // of the functor description.
+    //
+    // \return std::pair of std::vector containing subsequences.
+    //
+    auto subseqs() -> std::pair<std::vector<S>, std::vector<S>>
+    {
+        std::lock_guard<std::mutex> lock(m_);
+        return std::make_pair(sseq_a_, sseq_b_);
+    }
+
+    //
+    // Returns the number of elements of the subsequences.
+    //
+    // \return number of elements of the subsequences.
+    //
+    auto subseqs_size() -> I
+    {
+        std::lock_guard<std::mutex> lock(m_);
+        return sseqs_size_;
+    }
+
+    ///
+    /// For debugging
+    ///
+
+    //
+    // Prints internal information for debugging purposes.
+    //
+    // \return std::string with debug information.
+    //
+    auto print_debug_str() -> std::string
+    {
+        std::ostringstream os;
+        return print_debug(os).str();
+    }
+
+    //
+    // Prints internal information for debugging purposes.
+    //
+    // \param os: reference to a variable of a type that derives from
+    // std::ostream, in which debug information is meant to be appended to.
+    //
+    // \return *reference* to input parameter `os`, for convenience.
+    //
+    // See `clss::print_debug_str` for an example of usage.
+    //
+    template <typename K = std::ostringstream,
+              typename = typename std::enable_if<std::is_base_of_v<std::ostream, K>>::type>
+    [[maybe_unused]] auto print_debug(K& os) -> K&
+    {
+        std::lock_guard<std::mutex> lock(m_);
+
+        auto a = seq_a_.data();
+        auto b = seq_b_.data();
+
+        const auto default_precision{os.precision()};
+        const auto digits
+            = static_cast<I>(tol_ > S(0) ? std::ceil(-std::min(std::log10(tol_), S(0))) + 2
+                                         : std::numeric_limits<T>::max_digits10);
+        os << std::fixed << std::setprecision(digits);
+
+        auto print_input_sequences = [&os](auto& a, auto a_size, auto& b, auto b_size) {
+            os << ">>> Input: \n";
+
+            os << ":: :: a = {";
+            for(I i = 0; i < a_size; ++i)
+            {
+                os << a[i];
+                if(i != a_size - 1)
+                {
+                    os << ", ";
+                }
+            }
+            os << "}\n\n";
+
+            os << ":: :: b = {";
+            for(I i = 0; i < b_size; ++i)
+            {
+                os << b[i];
+                if(i != b_size - 1)
+                {
+                    os << ", ";
+                }
+            }
+            os << "}\n\n";
+        };
+
+        os << ">>>>>>>>>>>>\n";
+        os << ":: :: closest_largest_subsequences::print_debug()\n\n" << std::flush;
+        print_input_sequences(a, size_a_, b, size_b_);
+        os << ":: :: tol = " << tol_ << std::endl << std::endl;
+
+        os << "++++++++++++\n";
+        os << ":: :: Subsequences sub_a, sub_b have distance: " << distance_
+           << ", size: " << sseqs_size_ << ", and ||sub_a - sub_b||_inf = " << inf_norm_ << std::endl
+           << std::endl;
+
+        print_extract_subsequences(os);
+        os << "<<<<<<<<<<<<\n" << std::flush;
+
+        // Restore defaults
+        os << std::setprecision(default_precision);
+
+        return os;
+    }
+
+private:
+    S tol_{};
+    I sseqs_size_{};
+    S distance_ = std::numeric_limits<S>::infinity();
+    S inf_norm_ = std::numeric_limits<S>::infinity();
+    I size_a_{};
+    I size_b_{};
+    std::vector<T> seq_a_{};
+    std::vector<T> seq_b_{};
+    std::vector<T> sseq_a_{};
+    std::vector<T> sseq_b_{};
+    std::vector<T> sseq_a_ids_{};
+    std::vector<T> sseq_b_ids_{};
+    std::vector<S> memo_distances_{};
+    std::vector<I> memo_sizes_{};
+    std::vector<I> memo_next_{};
+    std::mutex m_;
+
+    void clear()
+    {
+        tol_ = {};
+        sseqs_size_ = {};
+        distance_ = std::numeric_limits<T>::infinity();
+        inf_norm_ = std::numeric_limits<S>::infinity();
+        size_a_ = {};
+        size_b_ = {};
+        seq_a_ = {};
+        seq_b_ = {};
+        sseq_a_ = {};
+        sseq_b_ = {};
+        sseq_a_ids_ = {};
+        sseq_b_ids_ = {};
+        memo_distances_ = {};
+        memo_sizes_ = {};
+        memo_next_ = {};
+    }
+
+    /// Recursive implementation with memoization
+    auto clss_implr(T const* a, I sa, T const* b, I sb)
+        -> std::tuple</* acc distance */ S, /* size */ I, /* next */ I>
+    {
+        //
+        // Base case: at least one of the sequences is empty
+        //
+        if(!in_range(sa, sb))
+        {
+            return std::make_tuple(std::numeric_limits<S>::infinity(), I(0), I(-1));
+        }
+
+        //
+        // If `dist`, `size` and `next_index` have already been computed for this pair of `sa`, `sb` return
+        //
+        auto [dist, size, _] = memo(sa, sb);
+        I next_index = I(-1);
+
+        if(memo_valid(dist, size))
+        {
+            // Make next entry point to this one
+            next_index = ij2index(sa, sb);
+
+            return std::make_tuple(dist, size, next_index);
+        }
+
+        //
+        // Otherwise, compute new `dist`, `size` and `next_index`
+        //
+
+        // Initialize local vars
+        dist = std::numeric_limits<S>::infinity();
+        size = I(0);
+        // Compare current optimum (dist, size) with candidate optimum (d, s), and update if necessary
+        auto do_update = [](S d, I s, I nindex, S& dist, I& size, I& next_index) -> bool {
+            bool update = false;
+            if(size < s)
+            {
+                dist = d;
+                size = s;
+                next_index = nindex;
+                update = true;
+            }
+            else if(size == s)
+            {
+                if(dist > d)
+                {
+                    dist = d;
+                    next_index = nindex;
+                    update = true;
+                }
+            }
+
+            return update;
+        };
+        [[maybe_unused]] bool update = false;
+
+        // Case 1: a[0] .==. b[0], try to match next element of sequence `a` with next element of sequence `b`
+        if(equiv(a[0], b[0]))
+        {
+            auto [d, s, nindex] = clss_implr(a + I(1), sa - I(1), b + I(1), sb - I(1));
+            if(d == std::numeric_limits<S>::infinity())
+            {
+                dist = std::abs(a[0] - b[0]);
+                size = I(1);
+                next_index = ij2index(sa, sb);
+                update = true;
+            }
+            else
+            {
+                d += std::abs(a[0] - b[0]);
+                ++s;
+                update = do_update(d, s, nindex, dist, size, next_index);
+            }
+        }
+
+        // Case 2: try to match next element of sequence `a` with current element of sequence `b`
+        {
+            auto [d, s, nindex] = clss_implr(a + I(1), sa - I(1), b, sb);
+            update = do_update(d, s, nindex, dist, size, next_index);
+        }
+
+        // Case 3: try to match current element of sequence `a` with next element of sequence `b`
+        {
+            auto [d, s, nindex] = clss_implr(a, sa, b + I(1), sb - I(1));
+            update = do_update(d, s, nindex, dist, size, next_index);
+        }
+
+        // Save best results from 3 cases
+        memo_dist(sa, sb) = dist;
+        memo_size(sa, sb) = size;
+        memo_next(sa, sb) = next_index;
+
+        // Make next entry point to this one
+        next_index = ij2index(sa, sb);
+
+        return std::make_tuple(dist, size, next_index);
+    }
+
+    auto extract_subsequences(T const* a, I size_a, T const* b, I size_b)
+        -> /* || sseq_a_ - sseq_b_ ||_inf */ S
+    {
+        S inf_norm = std::numeric_limits<S>::infinity();
+        I sa = size_a - I(1);
+        I sb = size_b - I(1);
+
+        I index = ij2index(sa, sb);
+        if(!in_range(index) || (sseqs_size_ == I(0)))
+        {
+            return inf_norm;
+        }
+
+        I next_index = index;
+        inf_norm = static_cast<S>(0);
+        do
+        {
+            index = next_index;
+            next_index = memo_next(index);
+            next_index = in_range(next_index) ? next_index : index;
+
+            I ia, ib;
+            I si = memo_size(index);
+            I nsi = memo_size(next_index);
+            if((nsi < si) || (index == next_index))
+            {
+                auto [ja, jb] = index2ij(index);
+
+                ia = sa - ja;
+                sseq_a_ids_.push_back(ia);
+                sseq_a_.push_back(a[ia]);
+
+                ib = sb - jb;
+                sseq_b_ids_.push_back(ib);
+                sseq_b_.push_back(b[ib]);
+
+                S norm = std::abs(a[ia] - b[ib]);
+                inf_norm = std::max(inf_norm, norm);
+            }
+        } while((index != next_index) && in_range(index));
+
+        return inf_norm;
+    }
+
+    template <typename K = std::ostream>
+    void print_extract_subsequences(K&& os)
+    {
+        os << ">>> Traversing:";
+        I sa = size_a_ - I(1);
+        I sb = size_b_ - I(1);
+        I index = ij2index(sa, sb);
+        if(!in_range(index) || (sseqs_size_ == I(0)))
+        {
+            os << " nothing to print\n";
+            return;
+        }
+        os << std::endl;
+
+        I next_index = index, i = I(0);
+        do
+        {
+            index = next_index;
+            next_index = memo_next(index);
+            next_index = in_range(next_index) ? next_index : index;
+
+            I ia, ib;
+            I si = memo_size(index);
+            I nsi = memo_size(next_index);
+            if((nsi < si) || (index == next_index))
+            {
+                auto [ja, jb] = index2ij(index);
+
+                ia = sa - ja;
+                ib = sb - jb;
+
+                os << ""
+                   << ":: :: Indices: (" << ia << ", " << ib << ") :: Elements: (" << sseq_a_[i]
+                   << ", " << sseq_b_[i] << ") :: (acc dist = " << memo_dist(ja, jb)
+                   << ", size = " << memo_size(ja, jb) << ")\n";
+                ++i;
+            }
+        } while((index != next_index) && in_range(index));
+
+        return;
+    }
+
+    ///
+    /// Helper functions
+    ///
+
+    /// lhs, rhs are "equivalent" (symbolyc notation: lhs .=. rhs)
+    /// when |lhs - rhs| <= tol.
+    ///
+    /// This is not a true equivalence relation.
+    bool equiv(T lhs, T rhs) const
+    {
+        if(std::abs(lhs - rhs) <= tol_)
+        {
+            return true;
+        }
+
+        return false;
+    }
+
+    bool in_range(I i, I j) const
+    {
+        bool in_range = false;
+
+        if((i >= 0) && (i < size_a_) && (j >= I(0)) && (j < size_b_))
+        {
+            in_range = true;
+        }
+
+        return in_range;
+    }
+
+    bool in_range(I index) const
+    {
+        bool in_range = false;
+
+        I upper_bound = size_a_ * size_b_;
+        if((index >= I(0)) && (index < upper_bound))
+        {
+            in_range = true;
+        }
+
+        return in_range;
+    }
+
+    auto memo(I i, I j) -> std::tuple<S, I, I> const
+    {
+        auto d = memo_dist(i, j);
+        auto s = memo_size(i, j);
+        auto n = memo_next(i, j);
+
+        return std::make_tuple(d, s, n);
+    }
+
+    S memo_dist(I i, I j) const&&
+    {
+        auto x = memo_distances_[ij2index(i, j)];
+        return x;
+    }
+
+    S& memo_dist(I i, I j) &
+    {
+        auto& x = memo_distances_[ij2index(i, j)];
+        return x;
+    }
+
+    I memo_size(I i, I j) const&&
+    {
+        auto x = memo_sizes_[ij2index(i, j)];
+        return x;
+    }
+
+    I& memo_size(I i, I j) &
+    {
+        auto& x = memo_sizes_[ij2index(i, j)];
+        return x;
+    }
+
+    I memo_size(I index) const&&
+    {
+        auto x = memo_sizes_[index];
+        return x;
+    }
+
+    I& memo_size(I index) &
+    {
+        auto& x = memo_sizes_[index];
+        return x;
+    }
+
+    I memo_next(I i, I j) const&&
+    {
+        auto x = memo_next_[ij2index(i, j)];
+        return x;
+    }
+
+    I& memo_next(I i, I j) &
+    {
+        auto& x = memo_next_[ij2index(i, j)];
+        return x;
+    }
+
+    I memo_next(I index) const&&
+    {
+        auto x = memo_next_[index];
+        return x;
+    }
+
+    I& memo_next(I index) &
+    {
+        auto& x = memo_next_[index];
+        return x;
+    }
+
+    bool memo_valid(S d, I s) const
+    {
+        bool valid = true;
+        if((d == S(-1)) || (s == I(-1)))
+        {
+            valid = false;
+        }
+
+        return valid;
+    }
+
+    auto ij2index(I i, I j) -> I const
+    {
+        return i + size_a_ * j;
+    }
+
+    auto index2ij(I index) -> std::pair<I, I> const
+    {
+        I i = index % size_a_;
+        I j = (index - i) / size_a_;
+        return std::make_pair(i, j);
+    }
+};