Skip to content

Commit

Permalink
Fix stein initial eigenvectors' choices (#714)
Browse files Browse the repository at this point in the history
Improve choice of starting eigenvectors in STEIN

This commit introduces a pseudorandom number generator that is
subsequently used to generate more robust initial eigenvectors in
`roclapack_stein`.
  • Loading branch information
jmachado-amd authored Aug 1, 2024
1 parent ec88119 commit b6820dc
Show file tree
Hide file tree
Showing 2 changed files with 299 additions and 3 deletions.
22 changes: 19 additions & 3 deletions library/src/auxiliary/rocauxiliary_stein.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "lapack_device_functions.hpp"
#include "rocblas.hpp"
#include "rocsolver/rocsolver.h"
#include "rocsolver_prng.hpp"

ROCSOLVER_BEGIN_NAMESPACE

Expand Down Expand Up @@ -112,7 +113,7 @@ __device__ void run_stein(const int tid,
S ssfmin)
{
__shared__ rocblas_int _info;
rocblas_int i, j, j1 = 0, b1, bn, blksize, gpind;
rocblas_int i, j, j1 = 0, b1, bn, blksize, gpind, seed;
S scl, onenrm, ortol, stpcrt, xj, xjm;

// zero info and ifail
Expand All @@ -122,6 +123,9 @@ __device__ void run_stein(const int tid,
for(i = tid; i < nev; i += MAX_THDS)
ifail[i] = 0;

auto normalize = [](rocblas_int value, rocblas_int max) -> S {
return static_cast<S>(static_cast<double>(value) / max);
};
// iterate over submatrix blocks
for(rocblas_int nblk = 0; nblk < iblock[nev - 1]; nblk++)
{
Expand Down Expand Up @@ -170,9 +174,21 @@ __device__ void run_stein(const int tid,
rocblas_int nrmchk = 0;

// initialize starting eigenvector
// TODO: how to make it random?

// deterministic seed, varies with thread ids and with each new computed eigenvalue
seed = tid + (j + nblk * nev) * MAX_THDS + 1;

// rocSOLVER's prng can be initialized with one or two seeds (better results are achieved if
// two, independent, seeds are used). The two seeds used here are not independent, they just spread
// the first pseudo-random number each thread computes more evenly.
rocsolver_int_prng<rocblas_int> prng(
seed, rocsolver_int_prng<rocblas_int>::max() + 1 - seed);

for(i = tid; i < blksize; i += MAX_THDS)
work[i] = (i == j - j1 ? S(1) : S(-1) / (blksize - 1));
{
// work[i] lies on the interval [0, 1]
work[i] = normalize(prng(), prng.max());
}

// copy the matrix so it won't be destroyed by factorization
for(i = tid; i < blksize - 1; i += MAX_THDS)
Expand Down
280 changes: 280 additions & 0 deletions library/src/include/rocsolver_prng.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
/* **************************************************************************
* Copyright (C) 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* *************************************************************************/

#pragma once

#include <type_traits>

#include "rocblas_utility.hpp"

ROCSOLVER_BEGIN_NAMESPACE

namespace detail
{
///
/// @brief Pseudorandom number generator
///
/// Generates pseudorandom numbers 0 <= X < 2^31 - 1,
/// U = X/(2^31 - 1) in [0, 1) using the sequence
///
/// X_n = Y_n - Z_n, (*)
///
/// where Y_n, Z_n are the following linear congruential generators
/// - Y_n = 48271 * Y_{n-1} mod (2^31 - 1);
/// - Z_n = 40692 * Z_{n-1} mod (2^31 - 249).
///
/// Sequence X_n (*) has the following properties:
/// - it can be computed with 32-bit signed integer arithmetic;
/// - its period length size (2^31 - 2)(2^31 - 250)/62 ~ 2^56
/// renders it useful for generating significantly more than 2^32
/// pseudorandom numbers per run;
/// - it achieves good ratings on the spectral test.
///
/// This generator appears on Eq. 3.4.4-(38) of Donald Knuth, The Art
/// of Computer Programming Volume 2 -- Seminumerical Algorithms,
/// 3rd ed. See book and references therein for facts about Xn (*),
/// (in particular, see entries 20, 21 and 24 in Table 1, pg. 106,
/// and the discussion found in Subsection 3.3.4 near Eq. 3.4.4-(38)).
///
/// \tparam I Type used for input and output of numbers Xn, Yn, Zn.
/// Must be an integer and, at least, 32-bit wide.
///
template <typename I, typename = typename std::enable_if<std::is_integral<I>::value && sizeof(I) >= 4>::type>
struct pseudorandom_number_generator
{
static constexpr std::int32_t m_m31_mXY
= (1LL << 31) - 1; ///< Modulus of Xn and Yn, Mersenne prime M_31 = 2^31 - 1
static constexpr std::int32_t m_mZ = (1LL << 31) - 249; ///< Modulus of Zn sequence, another prime

///
/// Range check Y0, such that 0 < Y0 < m_m31_mXY = 2^31 - 1.
///
/// The number Y' = range_check_Y(Y0) yields next_Y(Y') != 0,
/// for all Y0. Hence, this method is meant to be used only
/// when seeding the Zn generator.
///
/// \param Z0 Input: number to check, output: 0 < Y' < 2^31 - 1.
///
/// \return Number Y' lying strictly between 0 and m_mZ = 2^31 - 1.
///
template <typename K = I,
typename = typename std::enable_if<std::is_same<std::decay_t<K>, I>::value>::type>
__device__ __host__ static auto range_check_Y(K&& Y0) -> I
{
Y0 = std::max(Y0 % static_cast<I>(m_m31_mXY), static_cast<I>(1));
return Y0;
}

///
/// Range check Z0, such that 0 < Z0 < m_mZ = 2^31 - 249.
///
/// The number Z' = range_check_Z(Z0) yields next_Z(Z') != 0,
/// for all Z0. Hence, this method is meant to be used only
/// when seeding the Zn generator.
///
/// \param Z0 Input: number to check, output: 0 < Z' < 2^31 - 249.
///
/// \return Number Z' lying strictly between 0 and m_mZ = 2^31 - 249.
///
template <typename K = I,
typename = typename std::enable_if<std::is_same<std::decay_t<K>, I>::value>::type>
__device__ __host__ static auto range_check_Z(K&& Z0) -> I
{
Z0 = std::max(Z0 % static_cast<I>(m_mZ), static_cast<I>(1));
return Z0;
}

///
/// Iterates Y, Z and computes U = (Y - Z)/(2^31 - 1) \in [0., 1.).
///
/// \tparam S Output type, typically a floating point number.
///
/// \param Y Input: iterate n-1 of Yn, output: next_Y(Y).
///
/// \param Z [Optional] Input: iterate n-1 of Zn, output: next_Z(Z).
//
/// \return Number (Y - Z)/(2^31 - 1) cast to type S.
///
template <typename S,
typename K = I,
typename = typename std::enable_if<std::is_same<std::decay_t<K>, I>::value>::type>
__device__ __host__ static auto uniform01(K&& Y, K&& Z = 0) -> S
{
constexpr double range = static_cast<double>(m_m31_mXY);
I Xnext = next_X(Y, Z);
double U = static_cast<double>(Xnext) / range;
return static_cast<S>(U);
}

///
/// Computes next iterate of sequence Xn.
///
/// \param Y Input: iterate n-1 of Yn, output: next_Y(Y).
///
/// \param Z [Optional] Input: iterate n-1 of Zn, output: next_Z(Z).
///
/// \return Next iterate of Xn, 0 <= Xnext < m_m31_mXY = 2^31 - 1.
///
template <typename K = I,
typename = typename std::enable_if<std::is_same<std::decay_t<K>, I>::value>::type>
__device__ __host__ static auto next_X(K&& Y, K&& Z = 0) -> I
{
std::int32_t Xnext, Ynext, Znext, sgnX;

Ynext = static_cast<std::int32_t>(next_Y(Y));
Znext = static_cast<std::int32_t>(next_Z(Z));

Xnext = Ynext - Znext;
sgnX = Xnext < 0 ? -1 : 1;
Xnext += std::max(-sgnX, 0) * m_m31_mXY;

return static_cast<I>(Xnext);
}

///
/// Computes next iterate of sequence Yn.
///
/// Facts:
/// - Yn has period m_m31_mXY = 2^31 - 2;
/// - next_Y(0) == 0;
/// - if 0 < Y' < 2^31 - 1, next_Y(Y') != 0,
/// thus, for all integers k, next_Y(range_check_Y(k)) != 0.
///
/// \param Y Input: iterate n-1 of Yn, output: next_Y(Y).
///
/// \return Next iterate of Yn, 0 <= Ynext < m_m31_mXY = 2^31 - 1.
///
template <typename K = I,
typename = typename std::enable_if<std::is_same<std::decay_t<K>, I>::value>::type>
__device__ __host__ static auto next_Y(K&& Y) -> I
{
std::int32_t Ynext = static_cast<std::int32_t>(Y), sgnY;

constexpr std::int32_t ay = 48271;
constexpr std::int32_t qy = 44488; // qy = floor(m_m31_mXY/ay)
constexpr std::int32_t ry = 3399; // ry = m_m31_mXY % ay

// Ynext = (ay * Y) % m_m31_mXY;
Ynext = ay * (Ynext % qy) - ry * static_cast<std::int32_t>(Ynext / qy);
sgnY = Ynext < 0 ? -1 : 1;
Ynext += std::max(-sgnY, 0) * m_m31_mXY;

Y = static_cast<I>(Ynext);
return Y;
}

///
/// Computes next iterate of sequence Zn.
///
/// Facts:
/// - Zn has period m_Z = 2^31 - 250;
/// - next_Z(0) == 0;
/// - if 0 < Z' < 2^31 - 249, next_Z(Z') != 0,
/// thus, for all integers k, next_Z(range_check_Z(k)) != 0.
///
/// \param Z Input: iterate n-1 of Zn, output: next_Z(Z).
///
/// \return Next iterate of Zn, 0 <= Znext < m_Z = 2^31 - 249.
///
template <typename K = I,
typename = typename std::enable_if<std::is_same<std::decay_t<K>, I>::value>::type>
__device__ __host__ static auto next_Z(K&& Z) -> I
{
std::int32_t Znext = static_cast<std::int32_t>(Z), sgnZ;

constexpr std::int32_t az = 40692;
constexpr std::int32_t qz = 52774; // qz = floor(m_mZ/az)
constexpr std::int32_t rz = 3791; // rz = m_mZ % az

// Znext = (az * Z) % m_mZ;
Znext = az * (Znext % qz) - rz * static_cast<std::int32_t>(Znext / qz);
sgnZ = Znext < 0 ? -1 : 1;
Znext += std::max(-sgnZ, 0) * m_mZ;

Z = static_cast<I>(Znext);
return Z;
}
};
} /// namespace detail

/// \class rocSOLVER wrapper class for integer, pseudo-random, number generation
///
/// \brief Produces pseudo-random integer values distributed on the interval
/// [0, rocsolver_int_prng<T>::max()]. If T is at least 32-bit wide,
/// then generated values are uniformly distributed.
///
/// \tparam T - type of generated values, usually an integral type.
///
/// \sa detail::pseudorandom_number_generator for more details.
///
template <class T = std::int32_t>
class rocsolver_int_prng
{
public:
using rocsolver_prng_impl_t = typename detail::pseudorandom_number_generator<T>;

/// Constructor.
__device__ __host__ rocsolver_int_prng(T Y0, T Z0 = 0)
{
m_Y = rocsolver_prng_impl_t::range_check_Y(static_cast<std::int32_t>(Y0));
if(Z0 == 0)
{
m_Z = m_Y;
}
else
{
m_Z = rocsolver_prng_impl_t::range_check_Z(static_cast<std::int32_t>(Z0));
}
}

/// Default destructor.
__device__ __host__ ~rocsolver_int_prng() = default;

/// Returns the smallest possible value that can be generated.
static constexpr __device__ __host__ auto min() -> T
{
return static_cast<T>(0);
}

/// Returns the largest possible value that can be generated.
static constexpr __device__ __host__ auto max() -> T
{
return static_cast<T>(rocsolver_prng_impl_t::m_m31_mXY - 1);
}

/// Generates a new pseudo-random integer.
__device__ __host__ auto operator()(void) -> T
{
auto output = rocsolver_prng_impl_t::next_X(m_Y, m_Z);
return static_cast<T>(output);
}

private:
std::int32_t m_Y{}, m_Z{};
};

ROCSOLVER_END_NAMESPACE

0 comments on commit b6820dc

Please sign in to comment.