Skip to content

Commit

Permalink
Use libcu++ limits/trait in tests/benchmarks
Browse files Browse the repository at this point in the history
Co-authored-by: Michael Schellenberger Costa <[email protected]>
  • Loading branch information
bernhardmgruber and miscco committed Feb 17, 2025
1 parent 498095d commit ab531ba
Show file tree
Hide file tree
Showing 41 changed files with 234 additions and 291 deletions.
29 changes: 4 additions & 25 deletions c2h/generators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@
#include <thrust/scan.h>
#include <thrust/tabulate.h>

#include <cuda/std/type_traits>
#include <cuda/type_traits>

#include <cstdint>

#include <c2h/bfloat16.cuh>
#include <c2h/custom_type.h>
#include <c2h/device_policy.h>
#include <c2h/extended_types.h>
#include <c2h/fill_striped.h>
#include <c2h/generators.h>
#include <c2h/half.cuh>
#include <c2h/vector.h>

#if C2H_HAS_CURAND
Expand Down Expand Up @@ -118,30 +120,7 @@ private:
c2h::device_vector<float> m_distribution;
};

// TODO(bgruber): modelled after cub::Traits. We should generalize this somewhere into libcu++.
template <typename T>
struct is_floating_point : ::cuda::std::is_floating_point<T>
{};
#if _CCCL_HAS_NVFP16()
template <>
struct is_floating_point<__half> : ::cuda::std::true_type
{};
#endif // _CCCL_HAS_NVFP16()
#if _CCCL_HAS_NVBF16()
template <>
struct is_floating_point<__nv_bfloat16> : ::cuda::std::true_type
{};
#endif // _CCCL_HAS_NVBF16()
#if _CCCL_HAS_NVFP8()
template <>
struct is_floating_point<__nv_fp8_e4m3> : ::cuda::std::true_type
{};
template <>
struct is_floating_point<__nv_fp8_e5m2> : ::cuda::std::true_type
{};
#endif // _CCCL_HAS_NVFP8()

template <typename T, bool = is_floating_point<T>::value>
template <typename T, bool = ::cuda::is_floating_point_v<T>>
struct random_to_item_t
{
float m_min;
Expand Down
13 changes: 7 additions & 6 deletions c2h/include/c2h/catch2_test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include <cuda/std/bit>
#include <cuda/std/cmath>
#include <cuda/std/limits>
#include <cuda/std/type_traits>
#include <cuda/std/utility>

Expand Down Expand Up @@ -283,10 +284,10 @@ inline std::size_t adjust_seed_count(std::size_t requested)
}
} // namespace detail

#define C2H_SEED(N) \
c2h::seed_t \
{ \
GENERATE_COPY(take( \
detail::adjust_seed_count(N), \
random(std::numeric_limits<unsigned long long int>::min(), std::numeric_limits<unsigned long long int>::max()))) \
#define C2H_SEED(N) \
c2h::seed_t \
{ \
GENERATE_COPY(take(detail::adjust_seed_count(N), \
random(::cuda::std::numeric_limits<unsigned long long int>::min(), \
::cuda::std::numeric_limits<unsigned long long int>::max()))) \
}
28 changes: 14 additions & 14 deletions c2h/include/c2h/custom_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@

#pragma once

#include <limits>
#include <cuda/std/limits>

#include <memory>
#include <ostream>

Expand Down Expand Up @@ -178,34 +179,33 @@ class accumulateable_t

} // namespace c2h

namespace std
{
_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <template <typename> class... Policies>
class numeric_limits<c2h::custom_type_t<Policies...>>
class __numeric_limits_impl<c2h::custom_type_t<Policies...>, __numeric_limits_type::__other>
{
public:
static c2h::custom_type_t<Policies...> max()
static __host__ __device__ c2h::custom_type_t<Policies...> max()
{
c2h::custom_type_t<Policies...> val;
val.key = std::numeric_limits<std::size_t>::max();
val.val = std::numeric_limits<std::size_t>::max();
val.key = numeric_limits<std::size_t>::max();
val.val = numeric_limits<std::size_t>::max();
return val;
}

static c2h::custom_type_t<Policies...> min()
static __host__ __device__ c2h::custom_type_t<Policies...> min()
{
c2h::custom_type_t<Policies...> val;
val.key = std::numeric_limits<std::size_t>::min();
val.val = std::numeric_limits<std::size_t>::min();
val.key = numeric_limits<std::size_t>::min();
val.val = numeric_limits<std::size_t>::min();
return val;
}

static c2h::custom_type_t<Policies...> lowest()
static __host__ __device__ c2h::custom_type_t<Policies...> lowest()
{
c2h::custom_type_t<Policies...> val;
val.key = std::numeric_limits<std::size_t>::lowest();
val.val = std::numeric_limits<std::size_t>::lowest();
val.key = numeric_limits<std::size_t>::lowest();
val.val = numeric_limits<std::size_t>::lowest();
return val;
}
};
} // namespace std
_LIBCUDACXX_END_NAMESPACE_STD
45 changes: 5 additions & 40 deletions c2h/include/c2h/generators.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

#include <thrust/detail/config/device_system.h>

#include <limits>
#include <cuda/std/limits>

#include <c2h/custom_type.h>
#include <c2h/vector.h>
Expand All @@ -52,41 +52,6 @@ _CCCL_DIAG_PUSH
# include <cuda_fp8.h>
_CCCL_DIAG_POP
# endif // _CCCL_HAS_NVFP8()

# if _CCCL_HAS_NVFP8()
namespace std
{
template <>
class numeric_limits<__nv_fp8_e4m3>
{
public:
static __nv_fp8_e4m3 max()
{
return cub::Traits<__nv_fp8_e4m3>::Max();
}

static __nv_fp8_e4m3 lowest()
{
return cub::Traits<__nv_fp8_e4m3>::Lowest();
}
};

template <>
class numeric_limits<__nv_fp8_e5m2>
{
public:
static __nv_fp8_e5m2 max()
{
return cub::Traits<__nv_fp8_e5m2>::Max();
}

static __nv_fp8_e5m2 lowest()
{
return cub::Traits<__nv_fp8_e5m2>::Lowest();
}
};
} // namespace std
# endif // _CCCL_HAS_NVFP8()
#endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA

namespace c2h
Expand Down Expand Up @@ -157,8 +122,8 @@ void init_key_segments(const c2h::device_vector<OffsetT>& segment_offsets, KeyT*
template <template <typename> class... Ps>
void gen(seed_t seed,
c2h::device_vector<c2h::custom_type_t<Ps...>>& data,
c2h::custom_type_t<Ps...> min = std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
c2h::custom_type_t<Ps...> max = std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
c2h::custom_type_t<Ps...> min = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
c2h::custom_type_t<Ps...> max = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
{
detail::gen(seed,
reinterpret_cast<char*>(thrust::raw_pointer_cast(data.data())),
Expand All @@ -171,8 +136,8 @@ void gen(seed_t seed,
template <typename T>
void gen(seed_t seed,
c2h::device_vector<T>& data,
T min = std::numeric_limits<T>::lowest(),
T max = std::numeric_limits<T>::max());
T min = ::cuda::std::numeric_limits<T>::lowest(),
T max = ::cuda::std::numeric_limits<T>::max());

template <typename T>
void gen(modulo_t mod, c2h::device_vector<T>& data);
Expand Down
167 changes: 62 additions & 105 deletions c2h/include/c2h/test_util_vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

#include <thrust/detail/config/device_system.h>

#include <cuda/std/limits>

#include <iostream>

/******************************************************************************
Expand Down Expand Up @@ -287,114 +289,62 @@ C2H_VEC_OVERLOAD(ulonglong, unsigned long long)
C2H_VEC_OVERLOAD(float, float)
C2H_VEC_OVERLOAD(double, double)

/*
* The following section defines macros to overload cub::NumericTraits<T>::{Max,Lowest}() for vector
* types.
*/
// Specialize cub::NumericTraits and cuda::std::numeric_limits for vector types.

/**
* Vector1 overloads
*/
# define C2H_VEC_1_TRAITS_OVERLOAD(T, BaseT) \
CUB_NAMESPACE_BEGIN \
template <> \
struct NumericTraits<T> \
{ \
static __host__ __device__ T Max() \
{ \
T retval = {NumericTraits<BaseT>::Max()}; \
return retval; \
} \
static __host__ __device__ T Lowest() \
{ \
T retval = {NumericTraits<BaseT>::Lowest()}; \
return retval; \
} \
}; \
CUB_NAMESPACE_END
# define REPEAT_TO_LIST_1(a) a
# define REPEAT_TO_LIST_2(a) a, a
# define REPEAT_TO_LIST_3(a) a, a, a
# define REPEAT_TO_LIST_4(a) a, a, a, a
# define REPEAT_TO_LIST(N, a) _CCCL_PP_CAT(REPEAT_TO_LIST_, N)(a)

/**
* Vector2 overloads
*/
# define C2H_VEC_2_TRAITS_OVERLOAD(T, BaseT) \
CUB_NAMESPACE_BEGIN \
template <> \
struct NumericTraits<T> \
{ \
static __host__ __device__ T Max() \
{ \
T retval = {NumericTraits<BaseT>::Max(), NumericTraits<BaseT>::Max()}; \
return retval; \
} \
static __host__ __device__ T Lowest() \
{ \
T retval = {NumericTraits<BaseT>::Lowest(), NumericTraits<BaseT>::Lowest()}; \
return retval; \
} \
}; \
CUB_NAMESPACE_END
# define C2H_VEC_TRAITS_OVERLOAD_IMPL(T, BaseT, N) \
CUB_NAMESPACE_BEGIN \
template <> \
struct NumericTraits<T> \
{ \
static __host__ __device__ T Max() \
{ \
T retval = {REPEAT_TO_LIST(N, NumericTraits<BaseT>::Max())}; \
return retval; \
} \
static __host__ __device__ T Lowest() \
{ \
T retval = {REPEAT_TO_LIST(N, NumericTraits<BaseT>::Lowest())}; \
return retval; \
} \
}; \
CUB_NAMESPACE_END \
\
_LIBCUDACXX_BEGIN_NAMESPACE_STD \
template <> \
class numeric_limits<T> \
{ \
public: \
static constexpr bool is_specialized = true; \
static __host__ __device__ T max() \
{ \
T retval = {REPEAT_TO_LIST(N, ::cuda::std::numeric_limits<BaseT>::max())}; \
return retval; \
} \
static __host__ __device__ T min() \
{ \
T retval = {REPEAT_TO_LIST(N, ::cuda::std::numeric_limits<BaseT>::min())}; \
return retval; \
} \
static __host__ __device__ T lowest() \
{ \
T retval = {REPEAT_TO_LIST(N, ::cuda::std::numeric_limits<BaseT>::lowest())}; \
return retval; \
} \
}; \
_LIBCUDACXX_END_NAMESPACE_STD

/**
* Vector3 overloads
*/
# define C2H_VEC_3_TRAITS_OVERLOAD(T, BaseT) \
CUB_NAMESPACE_BEGIN \
template <> \
struct NumericTraits<T> \
{ \
static __host__ __device__ T Max() \
{ \
T retval = {NumericTraits<BaseT>::Max(), NumericTraits<BaseT>::Max(), NumericTraits<BaseT>::Max()}; \
return retval; \
} \
static __host__ __device__ T Lowest() \
{ \
T retval = {NumericTraits<BaseT>::Lowest(), NumericTraits<BaseT>::Lowest(), NumericTraits<BaseT>::Lowest()}; \
return retval; \
} \
}; \
CUB_NAMESPACE_END
# define C2H_VEC_TRAITS_OVERLOAD(COMPONENT_T, BaseT) \
C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##1, BaseT, 1) \
C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##2, BaseT, 2) \
C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##3, BaseT, 3) \
C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##4, BaseT, 4)

/**
* Vector4 overloads
*/
# define C2H_VEC_4_TRAITS_OVERLOAD(T, BaseT) \
CUB_NAMESPACE_BEGIN \
template <> \
struct NumericTraits<T> \
{ \
static __host__ __device__ T Max() \
{ \
T retval = {NumericTraits<BaseT>::Max(), \
NumericTraits<BaseT>::Max(), \
NumericTraits<BaseT>::Max(), \
NumericTraits<BaseT>::Max()}; \
return retval; \
} \
static __host__ __device__ T Lowest() \
{ \
T retval = {NumericTraits<BaseT>::Lowest(), \
NumericTraits<BaseT>::Lowest(), \
NumericTraits<BaseT>::Lowest(), \
NumericTraits<BaseT>::Lowest()}; \
return retval; \
} \
}; \
CUB_NAMESPACE_END

/**
* All vector overloads
*/
# define C2H_VEC_TRAITS_OVERLOAD(COMPONENT_T, BaseT) \
C2H_VEC_1_TRAITS_OVERLOAD(COMPONENT_T##1, BaseT) \
C2H_VEC_2_TRAITS_OVERLOAD(COMPONENT_T##2, BaseT) \
C2H_VEC_3_TRAITS_OVERLOAD(COMPONENT_T##3, BaseT) \
C2H_VEC_4_TRAITS_OVERLOAD(COMPONENT_T##4, BaseT)

/**
* Define for types
*/
_CCCL_SUPPRESS_DEPRECATED_PUSH
C2H_VEC_TRAITS_OVERLOAD(char, signed char)
C2H_VEC_TRAITS_OVERLOAD(short, short)
C2H_VEC_TRAITS_OVERLOAD(int, int)
Expand All @@ -407,6 +357,13 @@ C2H_VEC_TRAITS_OVERLOAD(ulong, unsigned long)
C2H_VEC_TRAITS_OVERLOAD(ulonglong, unsigned long long)
C2H_VEC_TRAITS_OVERLOAD(float, float)
C2H_VEC_TRAITS_OVERLOAD(double, double)
_CCCL_SUPPRESS_DEPRECATED_POP

# undef C2H_VEC_TRAITS_OVERLOAD
# undef C2H_VEC_TRAITS_OVERLOAD_IMPL
# undef REPEAT_TO_LIST_1
# undef REPEAT_TO_LIST_2
# undef REPEAT_TO_LIST_3
# undef REPEAT_TO_LIST_4
# undef REPEAT_TO_LIST

#endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
Loading

0 comments on commit ab531ba

Please sign in to comment.