diff --git a/clients/include/testing_gesvdx.hpp b/clients/include/testing_gesvdx.hpp index 4dd344b8c..920e7ab50 100644 --- a/clients/include/testing_gesvdx.hpp +++ b/clients/include/testing_gesvdx.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2022 Advanced Micro Devices, Inc. + * Copyright (c) 2022-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -166,7 +166,7 @@ void testing_gesvdx_bad_arg() // check bad arguments gesvdx_checkBadArgs(handle, left_svect, right_svect, srange, m, n, dA.data(), lda, - stA, vl, vu, il, iu, dNsv, dS.data(), stS, dU.data(), ldu, stU, + stA, vl, vu, il, iu, dNsv.data(), dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, difail.data(), stF, dinfo.data(), bc); } else @@ -177,7 +177,7 @@ void testing_gesvdx_bad_arg() // check bad arguments gesvdx_checkBadArgs(handle, left_svect, right_svect, srange, m, n, dA.data(), lda, - stA, vl, vu, il, iu, dNsv, dS.data(), stS, dU.data(), ldu, stU, + stA, vl, vu, il, iu, dNsv.data(), dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, difail.data(), stF, dinfo.data(), bc); } } diff --git a/clients/include/testing_managed_malloc.hpp b/clients/include/testing_managed_malloc.hpp index 252f73797..4ad1919ef 100644 --- a/clients/include/testing_managed_malloc.hpp +++ b/clients/include/testing_managed_malloc.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2020-2022 Advanced Micro Devices, Inc. + * Copyright (c) 2020-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -7,6 +7,8 @@ #include "clientcommon.hpp" #include "lapack_host_reference.hpp" #include "norm.hpp" +#include "rocblascommon/rocblas_init.hpp" +#include "rocblascommon/rocblas_vector.hpp" #include "rocsolver.hpp" #include "rocsolver_arguments.hpp" #include "rocsolver_test.hpp" diff --git a/clients/rocblascommon/d_vector.hpp b/clients/rocblascommon/d_vector.hpp deleted file mode 100644 index 7f21bfc0b..000000000 --- a/clients/rocblascommon/d_vector.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* ************************************************************************ - * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. - * ************************************************************************ */ - -#pragma once - -#include -#include - -#include -#include -#include - -#include "common_host_helpers.hpp" -#include "rocblas_init.hpp" -#include "rocblas_test.hpp" - -/* ============================================================================================ - */ -/*! \brief base-class to allocate/deallocate device memory */ -template -class d_vector -{ -private: - size_t size, bytes; - -public: - inline size_t nmemb() const noexcept - { - return size; - } - - d_vector(size_t s) - : size(s) - , bytes(s ? s * sizeof(T) : sizeof(T)) - { - } - - T* device_vector_setup() - { - T* d; - if((hipMalloc)(&d, bytes) != hipSuccess) - { - fmt::print(stderr, "Error allocating {} bytes ({} GB)\n", bytes, bytes >> 30); - d = nullptr; - } - return d; - } - - void device_vector_check(T* d) {} - - void device_vector_teardown(T* d) - { - if(d != nullptr) - { - // Free device memory - CHECK_HIP_ERROR((hipFree)(d)); - } - } -}; diff --git a/clients/rocblascommon/device_batch_vector.hpp b/clients/rocblascommon/device_batch_vector.hpp index 8b6f1052e..7643baf61 100644 --- a/clients/rocblascommon/device_batch_vector.hpp +++ b/clients/rocblascommon/device_batch_vector.hpp @@ -1,298 +1,170 @@ /* ************************************************************************ - * Copyright (c) 2018-2020 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once -#include "d_vector.hpp" +#include +#include +#include +#include + +#include +#include + +#include "common_host_helpers.hpp" +#include "device_memory.hpp" -// -// Local declaration of the host strided batch vector. -// template class host_batch_vector; -//! -//! @brief pseudo-vector subclass which uses a batch of device memory pointers -//! and -//! - an array of pointers in host memory -//! - an array of pointers in device memory -//! template -class device_batch_vector : private d_vector +class device_batch_vector { public: - using value_type = T; - -public: - //! - //! @brief Disallow copying. - //! - device_batch_vector(const device_batch_vector&) = delete; - - //! - //! @brief Disallow assigning. - //! - device_batch_vector& operator=(const device_batch_vector&) = delete; - - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param batch_count The batch count. - //! - explicit device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) - : d_vector(size_t(n) * std::abs(inc)) - , m_n(n) - , m_inc(inc) - , m_batch_count(batch_count) - { - if(false == this->try_initialize_memory()) + device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) + : hPtrArr_(std::make_unique(batch_count)) + , n_(n) + , inc_(inc) + , batch_count_(batch_count) + { + assert(n > 0); + assert(batch_count > 0); + + T** dPtrArr; + THROW_IF_HIP_ERROR(hipMalloc(&dPtrArr, sizeof(T*) * batch_count)); + dPtrArr_ = std::unique_ptr(dPtrArr); + + auto tmp = std::make_unique(batch_count); + const size_t size = vsize(); + for(rocblas_int i = 0; i < batch_count; ++i) { - this->free_memory(); + T* dArr; + THROW_IF_HIP_ERROR(hipMalloc(&dArr, sizeof(T) * size)); + hPtrArr_[i].reset(dArr); + tmp[i] = dArr; } + THROW_IF_HIP_ERROR(hipMemcpy(dPtrArr, tmp.get(), sizeof(T*) * batch_count, hipMemcpyHostToDevice)); } - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param stride (UNUSED) The stride. - //! @param batch_count The batch count. - //! - explicit device_batch_vector(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count) + device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count) : device_batch_vector(n, inc, batch_count) { + assert(stride == 1); } - //! - //! @brief Constructor (kept for backward compatibility only, to be removed). - //! @param batch_count The number of vectors. - //! @param size_vector The size of each vectors. - //! - explicit device_batch_vector(rocblas_int batch_count, size_t size_vector) - : device_batch_vector(size_vector, 1, batch_count) + // The number of elements in each vector. + rocblas_int n() const noexcept { + return n_; } - //! - //! @brief Destructor. - //! - ~device_batch_vector() + // The increment between elements in each vector. + rocblas_int inc() const noexcept { - this->free_memory(); + return inc_; } - //! - //! @brief Returns the length of the vector. - //! - rocblas_int n() const + // The size of each vector. This is a derived property of the number of elements in the vector + // and the spacing between them. + size_t vsize() const { - return this->m_n; + return size_t(n_) * std::abs(inc_); } - //! - //! @brief Returns the increment of the vector. - //! - rocblas_int inc() const + // The number of vectors in the batch. + rocblas_int batch_count() const noexcept { - return this->m_inc; + return batch_count_; } - //! - //! @brief Returns the value of batch_count. - //! - rocblas_int batch_count() const + T* const* data() { - return this->m_batch_count; + return dPtrArr_.get(); } - //! - //! @brief Returns the stride value. - //! - rocblas_stride stride() const + const T* const* data() const { - return 0; + return dPtrArr_.get(); } - - //! - //! @brief Access to device data. - //! @return Pointer to the device data. - //! - T** ptr_on_device() +/* + T* const* ddata() { - return this->m_device_data; + return dPtrArr_; } - //! - //! @brief Const access to device data. - //! @return Const pointer to the device data. - //! - const T* const* ptr_on_device() const + const T* const* ddata() const { - return this->m_device_data; + return dPtrArr_; } - T* const* data() + T* const* hdata() { - return this->m_device_data; + return hPtrArr_; } - const T* const* data() const + const T* const* hdata() const { - return this->m_device_data; + return hPtrArr_; } - - //! - //! @brief Random access. - //! @param batch_index The batch index. - //! @return Pointer to the array on device. - //! +*/ T* operator[](rocblas_int batch_index) { - return this->m_data[batch_index]; + assert(batch_index >= 0); + assert(batch_index < batch_count_); + return hPtrArr_[batch_index].get(); } - //! - //! @brief Constant random access. - //! @param batch_index The batch index. - //! @return Constant pointer to the array on device. - //! const T* operator[](rocblas_int batch_index) const { - return this->m_data[batch_index]; + assert(batch_index >= 0); + assert(batch_index < batch_count_); + return hPtrArr_[batch_index].get(); } - //! - //! @brief Const cast of the data on host. - //! operator const T* const *() const { - return this->m_data; + return hPtrArr_; } // clang-format off - //! - //! @brief Cast of the data on host. - //! operator T**() { - return this->m_data; + return hPtrArr_; } // clang-format on - //! - //! @brief Tell whether ressources allocation failed. - //! explicit operator bool() const { - return nullptr != this->m_data; + return nullptr != hPtrArr_; } - //! - //! @brief Copy from a host batched vector. - //! @param that The host_batch_vector to copy. - //! hipError_t transfer_from(const host_batch_vector& that) { - hipError_t hip_err; - // - // Copy each vector. - // - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - if(hipSuccess - != (hip_err = hipMemcpy((*this)[batch_index], that[batch_index], - sizeof(T) * this->nmemb(), hipMemcpyHostToDevice))) - { - return hip_err; - } - } + assert(n_ == that.n()); + assert(inc_ == that.inc()); + assert(batch_count_ == that.batch_count()); - return hipSuccess; + hipError_t err = hipSuccess; + device_batch_vector& self = *this; + size_t num_bytes = vsize() * sizeof(T); + for(size_t b = 0; err == hipSuccess && b < batch_count_; ++b) + err = hipMemcpy(self[b], that[b], num_bytes, hipMemcpyHostToDevice); + return err; } - //! - //! @brief Check if memory exists. - //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. - //! hipError_t memcheck() const { - if(*this) - return hipSuccess; - else - return hipErrorOutOfMemory; + return hipSuccess; } private: - rocblas_int m_n{}; - rocblas_int m_inc{}; - rocblas_int m_batch_count{}; - T** m_data{}; - T** m_device_data{}; + using PtrDArrT = std::unique_ptr; - //! - //! @brief Try to allocate the ressources. - //! @return true if success false otherwise. - //! - bool try_initialize_memory() - { - bool success = false; - - success = (hipSuccess == (hipMalloc)(&this->m_device_data, this->m_batch_count * sizeof(T*))); - if(success) - { - success = (nullptr != (this->m_data = (T**)calloc(this->m_batch_count, sizeof(T*)))); - if(success) - { - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - success = (nullptr != (this->m_data[batch_index] = this->device_vector_setup())); - if(!success) - { - break; - } - } - - if(success) - { - success = (hipSuccess - == hipMemcpy(this->m_device_data, this->m_data, - sizeof(T*) * this->m_batch_count, hipMemcpyHostToDevice)); - } - } - } - return success; - } - - //! - //! @brief Free the ressources, as much as we can. - //! - void free_memory() - { - if(nullptr != this->m_data) - { - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - if(nullptr != this->m_data[batch_index]) - { - this->device_vector_teardown(this->m_data[batch_index]); - this->m_data[batch_index] = nullptr; - } - } - - free(this->m_data); - this->m_data = nullptr; - } - - if(nullptr != this->m_device_data) - { - auto tmp_device_data = this->m_device_data; - this->m_device_data = nullptr; - CHECK_HIP_ERROR((hipFree)(tmp_device_data)); - } - } +private: + std::unique_ptr hPtrArr_; + std::unique_ptr dPtrArr_; + rocblas_int n_; + rocblas_int inc_; + rocblas_int batch_count_; }; diff --git a/clients/rocblascommon/device_memory.hpp b/clients/rocblascommon/device_memory.hpp new file mode 100644 index 000000000..50cdc70ba --- /dev/null +++ b/clients/rocblascommon/device_memory.hpp @@ -0,0 +1,27 @@ +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * ************************************************************************ */ + +#pragma once + +#include + +#include + +#include "common_host_helpers.hpp" + +struct device_deleter +{ + void operator()(void* p) const + { + // Throwing an error when hipFree fails will likely result in throwing + // from a destructor, which should be avoided. However, we don't really + // have many options. Worst comes to worst, throwing will result in + // std::terminate being called, which is perhaps not such a bad thing + // in the test and bench clients where this is used. + THROW_IF_HIP_ERROR(hipFree(p)); + } +}; + +template +using unique_device_ptr = std::unique_ptr; diff --git a/clients/rocblascommon/device_strided_batch_vector.hpp b/clients/rocblascommon/device_strided_batch_vector.hpp index cc3bbb332..96dbc0b14 100644 --- a/clients/rocblascommon/device_strided_batch_vector.hpp +++ b/clients/rocblascommon/device_strided_batch_vector.hpp @@ -1,243 +1,140 @@ /* ************************************************************************ - * Copyright (c) 2018-2020 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once -// -// Local declaration of the host strided batch vector. -// +#include +#include +#include +#include +#include + +#include +#include + +#include "common_host_helpers.hpp" +#include "device_memory.hpp" + template class host_strided_batch_vector; -//! -//! @brief Implementation of a strided batched vector on device. -//! template -class device_strided_batch_vector : public d_vector +class device_strided_batch_vector { public: - using value_type = T; - -public: - //! - //! @brief The storage type to use. - //! - typedef enum class estorage - { - block, - interleave, - } storage; - - //! - //! @brief Disallow copying. - //! - device_strided_batch_vector(const device_strided_batch_vector&) = delete; - - //! - //! @brief Disallow assigning. - //! - device_strided_batch_vector& operator=(const device_strided_batch_vector&) = delete; - - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param stride The stride. - //! @param batch_count The batch count. - //! @param stg The storage format to use. - //! - explicit device_strided_batch_vector(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count, - storage stg = storage::block) - : d_vector(calculate_nmemb(n, inc, stride, batch_count, stg)) - , m_storage(stg) - , m_n(n) - , m_inc(inc) - , m_stride(stride) - , m_batch_count(batch_count) + device_strided_batch_vector(rocblas_int n, + rocblas_int inc, + rocblas_stride stride, + rocblas_int batch_count) + : n_(n) + , inc_(inc) + , stride_(stride) + , batch_count_(batch_count) { - bool valid_parameters = true; - - switch(this->m_storage) - { - case storage::block: - { - if(std::abs(this->m_stride) < this->m_n * std::abs(this->m_inc)) - { - valid_parameters = false; - } - break; - } - case storage::interleave: - { - if(std::abs(this->m_inc) < std::abs(this->m_stride) * this->m_batch_count) - { - valid_parameters = false; - } - break; - } - } - - if(valid_parameters) - { - this->m_data = this->device_vector_setup(); - } - } + assert(n > 0); + assert(stride != 0); + assert(batch_count > 0); + assert(size_t(n) * std::abs(inc) <= std::abs(stride)); - //! - //! @brief Destructor. - //! - ~device_strided_batch_vector() - { - if(nullptr != this->m_data) - { - this->device_vector_teardown(this->m_data); - this->m_data = nullptr; - } - } - - //! - //! @brief Returns the data pointer. - //! - T* data() - { - return this->m_data; + const size_t sz = size(); + assert(sz > 0); + T* data; + THROW_IF_HIP_ERROR(hipMalloc(&data, sizeof(T) * sz)); + data_ = std::unique_ptr(data); } - //! - //! @brief Returns the data pointer. - //! - const T* data() const + // The number of elements in each vector. + rocblas_int n() const noexcept { - return this->m_data; + return n_; } - //! - //! @brief Returns the length. - //! - rocblas_int n() const + // The increment between elements in each vector. + rocblas_int inc() const noexcept { - return this->m_n; + return inc_; } - //! - //! @brief Returns the increment. - //! - rocblas_int inc() const + // The number of vectors in the batch. + rocblas_int batch_count() const noexcept { - return this->m_inc; + return batch_count_; } - //! - //! @brief Returns the batch count. - //! - rocblas_int batch_count() const + // The total number elements in all vectors in the batch. + rocblas_stride size() const { - return this->m_batch_count; + return size_t(std::abs(stride_)) * batch_count_; } - //! - //! @brief Returns the stride value. - //! - rocblas_stride stride() const + // The number of elements from the start of one vector to the start of the next. + rocblas_stride stride() const noexcept { - return this->m_stride; + return stride_; } - //! - //! @brief Returns pointer. - //! @param batch_index The batch index. - //! @return A mutable pointer to the batch_index'th vector. - //! + // Returns a vector from the batch. T* operator[](rocblas_int batch_index) { - return (this->m_stride >= 0) - ? this->m_data + batch_index * this->m_stride - : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; + assert(batch_index >= 0); + assert(batch_index < batch_count_); + + rocblas_stride index + = stride_ >= 0 ? stride_ * batch_index : stride_ * (batch_index - batch_count_ + 1); + + assert(index >= 0); + assert(index < size()); + + return &data_[index]; } - //! - //! @brief Returns non-mutable pointer. - //! @param batch_index The batch index. - //! @return A non-mutable mutable pointer to the batch_index'th vector. - //! + // Returns a vector from the batch. const T* operator[](rocblas_int batch_index) const { - return (this->m_stride >= 0) - ? this->m_data + batch_index * this->m_stride - : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; - } + assert(batch_index >= 0); + assert(batch_index < batch_count_); - //! - //! @brief Cast operator. - //! @remark Returns the pointer of the first vector. - //! - operator T*() - { - return (*this)[0]; + rocblas_stride index + = stride_ >= 0 ? stride_ * batch_index : stride_ * (batch_index - batch_count_ + 1); + + assert(index >= 0); + assert(index < size()); + + return &data_[index]; } - //! - //! @brief Non-mutable cast operator. - //! @remark Returns the non-mutable pointer of the first vector. - //! - operator const T*() const + // Returns a pointer to the underlying array. + T* data() noexcept { - return (*this)[0]; + return data_.get(); } - //! - //! @brief Tell whether ressources allocation failed. - //! - explicit operator bool() const + // Returns a pointer to the underlying array. + const T* data() const noexcept { - return nullptr != this->m_data; + return data_.get(); } - //! - //! @brief Transfer data from a strided batched vector on device. - //! @param that That strided batched vector on device. - //! @return The hip error. - //! hipError_t transfer_from(const host_strided_batch_vector& that) { - return hipMemcpy(this->data(), that.data(), sizeof(T) * this->nmemb(), hipMemcpyHostToDevice); + assert(n_ == that.n()); + assert(inc_ == that.inc()); + assert(stride_ == that.stride()); + assert(batch_count_ == that.batch_count()); + + return hipMemcpy(this->data(), that.data(), sizeof(T) * size(), hipMemcpyHostToDevice); } - //! - //! @brief Check if memory exists. - //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. - //! hipError_t memcheck() const { - if(*this) - return hipSuccess; - else - return hipErrorOutOfMemory; + return hipSuccess; } private: - storage m_storage{storage::block}; - rocblas_int m_n{}; - rocblas_int m_inc{}; - rocblas_stride m_stride{}; - rocblas_int m_batch_count{}; - T* m_data{}; - - static size_t calculate_nmemb(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count, - storage st) - { - switch(st) - { - case storage::block: return size_t(std::abs(stride)) * batch_count; - case storage::interleave: return size_t(n) * std::abs(inc); - } - return 0; - } + std::unique_ptr data_; + rocblas_int n_; + rocblas_int inc_; + rocblas_stride stride_; + rocblas_int batch_count_; }; diff --git a/clients/rocblascommon/host_batch_vector.hpp b/clients/rocblascommon/host_batch_vector.hpp index 5c21e7832..bfaf59873 100644 --- a/clients/rocblascommon/host_batch_vector.hpp +++ b/clients/rocblascommon/host_batch_vector.hpp @@ -1,265 +1,128 @@ /* ************************************************************************ - * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once +#include +#include +#include #include -#include -#include "rocblas_init.hpp" +#include +#include -// -// Local declaration of the device batch vector. -// template class device_batch_vector; -//! -//! @brief Implementation of the batch vector on host. -//! template class host_batch_vector { public: - using value_type = T; - -public: - //! - //! @brief Delete copy constructor. - //! - host_batch_vector(const host_batch_vector& that) = delete; - - //! - //! @brief Delete copy assignement. - //! - host_batch_vector& operator=(const host_batch_vector& that) = delete; - - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param batch_count The batch count. - //! - explicit host_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) - : m_n(n) - , m_inc(inc) - , m_batch_count(batch_count) + host_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) + : data_(std::make_unique(batch_count)) + , n_(n) + , inc_(inc) + , batch_count_(batch_count) { - if(false == this->try_initialize_memory()) + assert(n > 0); + assert(batch_count > 0); + + const size_t size = vsize(); + for(rocblas_int i = 0; i < batch_count; ++i) { - this->free_memory(); + data_[i] = std::make_unique(size); } } - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param stride (UNUSED) The stride. - //! @param batch_count The batch count. - //! - explicit host_batch_vector(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count) + host_batch_vector(rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count) : host_batch_vector(n, inc, batch_count) { + assert(stride == 1); } - //! - //! @brief Destructor. - //! - ~host_batch_vector() + // The number of elements in each vector. + rocblas_int n() const noexcept { - this->free_memory(); + return n_; } - //! - //! @brief Returns the length of the vector. - //! - rocblas_int n() const + // The increment between elements in each vector. + rocblas_int inc() const noexcept { - return this->m_n; + return inc_; } - //! - //! @brief Returns the increment of the vector. - //! - rocblas_int inc() const + // The size of each vector. This is a derived property of the number of elements in the vector + // and the spacing between them. + size_t vsize() const { - return this->m_inc; + return size_t(n_) * std::abs(inc_); } - //! - //! @brief Returns the batch count. - //! - rocblas_int batch_count() const + // The number of vectors in the batch. + rocblas_int batch_count() const noexcept { - return this->m_batch_count; + return batch_count_; } - //! - //! @brief Returns the stride value. - //! - rocblas_stride stride() const - { - return 0; - } - - //! - //! @brief Random access to the vectors. - //! @param batch_index the batch index. - //! @return The mutable pointer. - //! + // Returns a vector from the batch. T* operator[](rocblas_int batch_index) { - return this->m_data[batch_index]; + assert(batch_index >= 0); + assert(batch_index < batch_count_); + return data_[batch_index].get(); } - //! - //! @brief Constant random access to the vectors. - //! @param batch_index the batch index. - //! @return The non-mutable pointer. - //! + // Returns a vector from the batch. const T* operator[](rocblas_int batch_index) const { - return this->m_data[batch_index]; - } - - // clang-format off - //! - //! @brief Cast to a double pointer. - //! - operator T**() - { - return this->m_data; - } - // clang-format on - - //! - //! @brief Constant cast to a double pointer. - //! - operator const T* const *() - { - return this->m_data; - } - - //! - //! @brief Copy from a host batched vector. - //! @param that the vector the data is copied from. - //! @return true if the copy is done successfully, false otherwise. - //! - bool copy_from(const host_batch_vector& that) - { - if((this->batch_count() == that.batch_count()) && (this->n() == that.n()) - && (this->inc() == that.inc())) - { - size_t num_bytes = this->n() * std::abs(this->inc()) * sizeof(T); - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - memcpy((*this)[batch_index], that[batch_index], num_bytes); - } - return true; - } - else - { - return false; - } + assert(batch_index >= 0); + assert(batch_index < batch_count_); + return data_[batch_index].get(); } - //! - //! @brief Transfer from a device batched vector. - //! @param that the vector the data is copied from. - //! @return the hip error. - //! + // Copy from a device_batch_vector into host memory. hipError_t transfer_from(const device_batch_vector& that) { - hipError_t hip_err; - size_t num_bytes = size_t(this->m_n) * std::abs(this->m_inc) * sizeof(T); - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - if(hipSuccess - != (hip_err = hipMemcpy((*this)[batch_index], that[batch_index], num_bytes, - hipMemcpyDeviceToHost))) - { - return hip_err; - } - } - return hipSuccess; - } + assert(n_ == that.n()); + assert(inc_ == that.inc()); + assert(batch_count_ == that.batch_count()); - //! - //! @brief Check if memory exists. - //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. - //! - hipError_t memcheck() const - { - return (nullptr != this->m_data) ? hipSuccess : hipErrorOutOfMemory; + hipError_t err = hipSuccess; + host_batch_vector& self = *this; + size_t num_bytes = vsize() * sizeof(T); + for(size_t b = 0; err == hipSuccess && b < batch_count_; ++b) + err = hipMemcpy(self[b], that[b], num_bytes, hipMemcpyDeviceToHost); + return err; } private: - rocblas_int m_n{}; - rocblas_int m_inc{}; - rocblas_int m_batch_count{}; - T** m_data{}; - - bool try_initialize_memory() - { - bool success = (nullptr != (this->m_data = (T**)calloc(this->m_batch_count, sizeof(T*)))); - if(success) - { - size_t nmemb = size_t(this->m_n) * std::abs(this->m_inc); - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - success = (nullptr != (this->m_data[batch_index] = (T*)calloc(nmemb, sizeof(T)))); - if(false == success) - { - break; - } - } - } - return success; - } + using PtrArrT = std::unique_ptr; - void free_memory() - { - if(nullptr != this->m_data) - { - for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) - { - if(nullptr != this->m_data[batch_index]) - { - free(this->m_data[batch_index]); - this->m_data[batch_index] = nullptr; - } - } - - free(this->m_data); - this->m_data = nullptr; - } - } +private: + std::unique_ptr data_; + rocblas_int n_; + rocblas_int inc_; + rocblas_int batch_count_; }; -//! -//! @brief Overload output operator. -//! @param os The ostream. -//! @param that That host batch vector. -//! template -std::ostream& operator<<(std::ostream& os, const host_batch_vector& that) +std::ostream& operator<<(std::ostream& os, const host_batch_vector& hbv) { - auto n = that.n(); - auto inc = std::abs(that.inc()); - auto batch_count = that.batch_count(); + rocblas_int n = hbv.n(); + rocblas_int inc = std::abs(hbv.inc()); + rocblas_int batch_count = hbv.batch_count(); - for(rocblas_int batch_index = 0; batch_index < batch_count; ++batch_index) + for(rocblas_int b = 0; b < batch_count; ++b) { - auto batch_data = that[batch_index]; - os << "[" << batch_index << "] = { " << batch_data[0]; - for(rocblas_int i = 1; i < n; ++i) + T* hv = hbv[b]; + os << "[" << b << "] = { "; + for(rocblas_int i = 0; i < n; ++i) { - os << ", " << batch_data[i * inc]; + os << hv[i * inc]; + if(i + 1 < n) + os << ", "; } os << " }" << std::endl; } diff --git a/clients/rocblascommon/host_strided_batch_vector.hpp b/clients/rocblascommon/host_strided_batch_vector.hpp index faf14484e..16686c827 100644 --- a/clients/rocblascommon/host_strided_batch_vector.hpp +++ b/clients/rocblascommon/host_strided_batch_vector.hpp @@ -1,291 +1,152 @@ /* ************************************************************************ - * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once +#include +#include +#include #include -// -// Local declaration of the device strided batch vector. -// +#include +#include + template class device_strided_batch_vector; -//! -//! @brief Implementation of a host strided batched vector. -//! template class host_strided_batch_vector { public: - using value_type = T; - -public: - //! - //! @brief The storage type to use. - //! - typedef enum class estorage + host_strided_batch_vector(rocblas_int n, + rocblas_int inc, + rocblas_stride stride, + rocblas_int batch_count) + : n_(n) + , inc_(inc) + , stride_(stride) + , batch_count_(batch_count) { - block, - interleave - } storage; - - //! - //! @brief Disallow copying. - //! - host_strided_batch_vector(const host_strided_batch_vector&) = delete; - - //! - //! @brief Disallow assigning. - //! - host_strided_batch_vector& operator=(const host_strided_batch_vector&) = delete; + assert(n > 0); + assert(stride != 0); + assert(batch_count > 0); + assert(size_t(n) * std::abs(inc) <= std::abs(stride)); - //! - //! @brief Constructor. - //! @param n The length of the vector. - //! @param inc The increment. - //! @param stride The stride. - //! @param batch_count The batch count. - //! @param stg The storage format to use. - //! - explicit host_strided_batch_vector(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count, - storage stg = storage::block) - : m_storage(stg) - , m_n(n) - , m_inc(inc) - , m_stride(stride) - , m_batch_count(batch_count) - , m_nmemb(calculate_nmemb(n, inc, stride, batch_count, stg)) - { - bool valid_parameters = this->m_nmemb > 0; - if(valid_parameters) - { - switch(this->m_storage) - { - case storage::block: - { - if(std::abs(this->m_stride) < this->m_n * std::abs(this->m_inc)) - { - valid_parameters = false; - } - break; - } - case storage::interleave: - { - if(std::abs(this->m_inc) < std::abs(this->m_stride) * this->m_batch_count) - { - valid_parameters = false; - } - break; - } - } - - if(valid_parameters) - { - this->m_data = new T[this->m_nmemb]; - } - } + const size_t sz = size(); + assert(sz > 0); + data_ = std::make_unique(sz); } - //! - //! @brief Destructor. - //! - ~host_strided_batch_vector() + // The number of elements in each vector. + rocblas_int n() const noexcept { - if(nullptr != this->m_data) - { - delete[] this->m_data; - this->m_data = nullptr; - } + return n_; } - //! - //! @brief Returns the data pointer. - //! - T* data() + // The increment between elements in each vector. + rocblas_int inc() const noexcept { - return this->m_data; + return inc_; } - //! - //! @brief Returns the data pointer. - //! - const T* data() const + // The number of vectors in the batch. + rocblas_int batch_count() const noexcept { - return this->m_data; + return batch_count_; } - //! - //! @brief Returns the length. - //! - rocblas_int n() const + // The total number elements in all vectors in the batch. + rocblas_stride size() const { - return this->m_n; + return size_t(std::abs(stride_)) * batch_count_; } - //! - //! @brief Returns the increment. - //! - rocblas_int inc() const + // The number of elements from the start of one vector to the start of the next. + rocblas_stride stride() const noexcept { - return this->m_inc; + return stride_; } - //! - //! @brief Returns the batch count. - //! - rocblas_int batch_count() const + // Returns a vector from the batch. + T* operator[](rocblas_int batch_index) { - return this->m_batch_count; - } + assert(batch_index >= 0); + assert(batch_index < batch_count_); - //! - //! @brief Returns the stride. - //! - rocblas_stride stride() const - { - return this->m_stride; - } + rocblas_stride index + = stride_ >= 0 ? stride_ * batch_index : stride_ * (batch_index - batch_count_ + 1); - //! - //! @brief Returns pointer. - //! @param batch_index The batch index. - //! @return A mutable pointer to the batch_index'th vector. - //! - T* operator[](rocblas_int batch_index) - { - return (this->m_stride >= 0) - ? this->m_data + this->m_stride * batch_index - : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; + assert(index >= 0); + assert(index < size()); + + return &data_[index]; } - //! - //! @brief Returns non-mutable pointer. - //! @param batch_index The batch index. - //! @return A non-mutable mutable pointer to the batch_index'th vector. - //! + // Returns a vector from the batch. const T* operator[](rocblas_int batch_index) const { - return (this->m_stride >= 0) - ? this->m_data + this->m_stride * batch_index - : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; - } + assert(batch_index >= 0); + assert(batch_index < batch_count_); - //! - //! @brief Cast operator. - //! @remark Returns the pointer of the first vector. - //! - operator T*() - { - return (*this)[0]; - } + rocblas_stride index + = stride_ >= 0 ? stride_ * batch_index : stride_ * (batch_index - batch_count_ + 1); - //! - //! @brief Non-mutable cast operator. - //! @remark Returns the non-mutable pointer of the first vector. - //! - operator const T*() const - { - return (*this)[0]; + assert(index >= 0); + assert(index < size()); + + return &data_[index]; } - //! - //! @brief Tell whether ressources allocation failed. - //! - explicit operator bool() const + // Returns a pointer to the underlying array. + T* data() noexcept { - return nullptr != this->m_data; + return data_.get(); } - //! - //! @brief Copy data from a strided batched vector on host. - //! @param that That strided batched vector on host. - //! @return true if successful, false otherwise. - //! - bool copy_from(const host_strided_batch_vector& that) + // Returns a pointer to the underlying array. + const T* data() const noexcept { - if(that.n() == this->m_n && that.inc() == this->m_inc && that.stride() == this->m_stride - && that.batch_count() == this->m_batch_count) - { - memcpy(this->data(), that.data(), sizeof(T) * this->m_nmemb); - return true; - } - else - { - return false; - } + return data_.get(); } - //! - //! @brief Transfer data from a strided batched vector on device. - //! @param that That strided batched vector on device. - //! @return The hip error. - //! template hipError_t transfer_from(const device_strided_batch_vector& that) { - return hipMemcpy(this->m_data, that.data(), sizeof(T) * this->m_nmemb, hipMemcpyDeviceToHost); - } + assert(n_ == that.n()); + assert(inc_ == that.inc()); + assert(stride_ == that.stride()); + assert(batch_count_ == that.batch_count()); - //! - //! @brief Check if memory exists. - //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. - //! - hipError_t memcheck() const - { - return ((bool)*this) ? hipSuccess : hipErrorOutOfMemory; + return hipMemcpy(data_.get(), that.data(), sizeof(T) * size(), hipMemcpyDeviceToHost); } private: - storage m_storage{storage::block}; - rocblas_int m_n{}; - rocblas_int m_inc{}; - rocblas_stride m_stride{}; - rocblas_int m_batch_count{}; - size_t m_nmemb{}; - T* m_data{}; - - static size_t calculate_nmemb(rocblas_int n, - rocblas_int inc, - rocblas_stride stride, - rocblas_int batch_count, - storage st) - { - switch(st) - { - case storage::block: return size_t(std::abs(stride)) * batch_count; - case storage::interleave: return size_t(n) * std::abs(inc); - } - return 0; - } + std::unique_ptr data_; + rocblas_int n_; + rocblas_int inc_; + rocblas_stride stride_; + rocblas_int batch_count_; }; -//! -//! @brief Overload output operator. -//! @param os The ostream. -//! @param that That host strided batch vector. -//! template -std::ostream& operator<<(std::ostream& os, const host_strided_batch_vector& that) +std::ostream& operator<<(std::ostream& os, const host_strided_batch_vector& hsbv) { - auto n = that.n(); - auto inc = std::abs(that.inc()); - auto batch_count = that.batch_count(); + rocblas_int n = hsbv.n(); + rocblas_int inc = std::abs(hsbv.inc()); + rocblas_int batch_count = hsbv.batch_count(); - for(rocblas_int batch_index = 0; batch_index < batch_count; ++batch_index) + for(rocblas_int b = 0; b < batch_count; ++b) { - auto batch_data = that[batch_index]; - os << "[" << batch_index << "] = { " << batch_data[0]; - for(rocblas_int i = 1; i < n; ++i) + T* hv = hsbv[b]; + os << "[" << b << "] = { "; + for(rocblas_int i = 0; i < n; ++i) { - os << ", " << batch_data[i * inc]; + os << hv[i * inc]; + if(i + 1 < n) + os << ", "; } os << " }" << std::endl; } - return os; } diff --git a/clients/rocblascommon/rocblas_vector.hpp b/clients/rocblascommon/rocblas_vector.hpp index f684c5db2..e7ed79ce7 100644 --- a/clients/rocblascommon/rocblas_vector.hpp +++ b/clients/rocblascommon/rocblas_vector.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2023 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -10,6 +10,8 @@ #include "host_batch_vector.hpp" #include "host_strided_batch_vector.hpp" +#include "rocblas_random.hpp" + //! //! @brief Random number with type deductions. //!