Use libcu++ limits/trait in tests/benchmarks

Co-authored-by: Michael Schellenberger Costa <[email protected]>
NVIDIA · Feb 17, 2025 · ab531ba · ab531ba
1 parent 498095d
commit ab531ba
Show file tree

Hide file tree

Showing 41 changed files with 234 additions and 291 deletions.
diff --git a/c2h/generators.cu b/c2h/generators.cu
@@ -40,15 +40,17 @@
 #include <thrust/scan.h>
 #include <thrust/tabulate.h>
 
-#include <cuda/std/type_traits>
+#include <cuda/type_traits>
 
 #include <cstdint>
 
+#include <c2h/bfloat16.cuh>
 #include <c2h/custom_type.h>
 #include <c2h/device_policy.h>
 #include <c2h/extended_types.h>
 #include <c2h/fill_striped.h>
 #include <c2h/generators.h>
+#include <c2h/half.cuh>
 #include <c2h/vector.h>
 
 #if C2H_HAS_CURAND
@@ -118,30 +120,7 @@ private:
   c2h::device_vector<float> m_distribution;
 };
 
-// TODO(bgruber): modelled after cub::Traits. We should generalize this somewhere into libcu++.
-template <typename T>
-struct is_floating_point : ::cuda::std::is_floating_point<T>
-{};
-#if _CCCL_HAS_NVFP16()
-template <>
-struct is_floating_point<__half> : ::cuda::std::true_type
-{};
-#endif // _CCCL_HAS_NVFP16()
-#if _CCCL_HAS_NVBF16()
-template <>
-struct is_floating_point<__nv_bfloat16> : ::cuda::std::true_type
-{};
-#endif // _CCCL_HAS_NVBF16()
-#if _CCCL_HAS_NVFP8()
-template <>
-struct is_floating_point<__nv_fp8_e4m3> : ::cuda::std::true_type
-{};
-template <>
-struct is_floating_point<__nv_fp8_e5m2> : ::cuda::std::true_type
-{};
-#endif // _CCCL_HAS_NVFP8()
-
-template <typename T, bool = is_floating_point<T>::value>
+template <typename T, bool = ::cuda::is_floating_point_v<T>>
 struct random_to_item_t
 {
   float m_min;

diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h
@@ -31,6 +31,7 @@
 
 #include <cuda/std/bit>
 #include <cuda/std/cmath>
+#include <cuda/std/limits>
 #include <cuda/std/type_traits>
 #include <cuda/std/utility>
 
@@ -283,10 +284,10 @@ inline std::size_t adjust_seed_count(std::size_t requested)
 }
 } // namespace detail
 
-#define C2H_SEED(N)                                                                                                    \
-  c2h::seed_t                                                                                                          \
-  {                                                                                                                    \
-    GENERATE_COPY(take(                                                                                                \
-      detail::adjust_seed_count(N),                                                                                    \
-      random(std::numeric_limits<unsigned long long int>::min(), std::numeric_limits<unsigned long long int>::max()))) \
+#define C2H_SEED(N)                                                                         \
+  c2h::seed_t                                                                               \
+  {                                                                                         \
+    GENERATE_COPY(take(detail::adjust_seed_count(N),                                        \
+                       random(::cuda::std::numeric_limits<unsigned long long int>::min(),   \
+                              ::cuda::std::numeric_limits<unsigned long long int>::max()))) \
   }
diff --git a/c2h/include/c2h/custom_type.h b/c2h/include/c2h/custom_type.h
@@ -27,7 +27,8 @@
 
 #pragma once
 
-#include <limits>
+#include <cuda/std/limits>
+
 #include <memory>
 #include <ostream>
 
@@ -178,34 +179,33 @@ class accumulateable_t
 
 } // namespace c2h
 
-namespace std
-{
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
 template <template <typename> class... Policies>
-class numeric_limits<c2h::custom_type_t<Policies...>>
+class __numeric_limits_impl<c2h::custom_type_t<Policies...>, __numeric_limits_type::__other>
 {
 public:
-  static c2h::custom_type_t<Policies...> max()
+  static __host__ __device__ c2h::custom_type_t<Policies...> max()
   {
     c2h::custom_type_t<Policies...> val;
-    val.key = std::numeric_limits<std::size_t>::max();
-    val.val = std::numeric_limits<std::size_t>::max();
+    val.key = numeric_limits<std::size_t>::max();
+    val.val = numeric_limits<std::size_t>::max();
     return val;
   }
 
-  static c2h::custom_type_t<Policies...> min()
+  static __host__ __device__ c2h::custom_type_t<Policies...> min()
   {
     c2h::custom_type_t<Policies...> val;
-    val.key = std::numeric_limits<std::size_t>::min();
-    val.val = std::numeric_limits<std::size_t>::min();
+    val.key = numeric_limits<std::size_t>::min();
+    val.val = numeric_limits<std::size_t>::min();
     return val;
   }
 
-  static c2h::custom_type_t<Policies...> lowest()
+  static __host__ __device__ c2h::custom_type_t<Policies...> lowest()
   {
     c2h::custom_type_t<Policies...> val;
-    val.key = std::numeric_limits<std::size_t>::lowest();
-    val.val = std::numeric_limits<std::size_t>::lowest();
+    val.key = numeric_limits<std::size_t>::lowest();
+    val.val = numeric_limits<std::size_t>::lowest();
     return val;
   }
 };
-} // namespace std
+_LIBCUDACXX_END_NAMESPACE_STD
diff --git a/c2h/include/c2h/generators.h b/c2h/include/c2h/generators.h
@@ -29,7 +29,7 @@
 
 #include <thrust/detail/config/device_system.h>
 
-#include <limits>
+#include <cuda/std/limits>
 
 #include <c2h/custom_type.h>
 #include <c2h/vector.h>
@@ -52,41 +52,6 @@ _CCCL_DIAG_PUSH
 #    include <cuda_fp8.h>
 _CCCL_DIAG_POP
 #  endif // _CCCL_HAS_NVFP8()
-
-#  if _CCCL_HAS_NVFP8()
-namespace std
-{
-template <>
-class numeric_limits<__nv_fp8_e4m3>
-{
-public:
-  static __nv_fp8_e4m3 max()
-  {
-    return cub::Traits<__nv_fp8_e4m3>::Max();
-  }
-
-  static __nv_fp8_e4m3 lowest()
-  {
-    return cub::Traits<__nv_fp8_e4m3>::Lowest();
-  }
-};
-
-template <>
-class numeric_limits<__nv_fp8_e5m2>
-{
-public:
-  static __nv_fp8_e5m2 max()
-  {
-    return cub::Traits<__nv_fp8_e5m2>::Max();
-  }
-
-  static __nv_fp8_e5m2 lowest()
-  {
-    return cub::Traits<__nv_fp8_e5m2>::Lowest();
-  }
-};
-} // namespace std
-#  endif // _CCCL_HAS_NVFP8()
 #endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
 
 namespace c2h
@@ -157,8 +122,8 @@ void init_key_segments(const c2h::device_vector<OffsetT>& segment_offsets, KeyT*
 template <template <typename> class... Ps>
 void gen(seed_t seed,
          c2h::device_vector<c2h::custom_type_t<Ps...>>& data,
-         c2h::custom_type_t<Ps...> min = std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
-         c2h::custom_type_t<Ps...> max = std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
+         c2h::custom_type_t<Ps...> min = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
+         c2h::custom_type_t<Ps...> max = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
 {
   detail::gen(seed,
               reinterpret_cast<char*>(thrust::raw_pointer_cast(data.data())),
@@ -171,8 +136,8 @@ void gen(seed_t seed,
 template <typename T>
 void gen(seed_t seed,
          c2h::device_vector<T>& data,
-         T min = std::numeric_limits<T>::lowest(),
-         T max = std::numeric_limits<T>::max());
+         T min = ::cuda::std::numeric_limits<T>::lowest(),
+         T max = ::cuda::std::numeric_limits<T>::max());
 
 template <typename T>
 void gen(modulo_t mod, c2h::device_vector<T>& data);

diff --git a/c2h/include/c2h/test_util_vec.h b/c2h/include/c2h/test_util_vec.h
@@ -29,6 +29,8 @@
 
 #include <thrust/detail/config/device_system.h>
 
+#include <cuda/std/limits>
+
 #include <iostream>
 
 /******************************************************************************
@@ -287,114 +289,62 @@ C2H_VEC_OVERLOAD(ulonglong, unsigned long long)
 C2H_VEC_OVERLOAD(float, float)
 C2H_VEC_OVERLOAD(double, double)
 
-/*
- * The following section defines macros to overload cub::NumericTraits<T>::{Max,Lowest}() for vector
- * types.
- */
+// Specialize cub::NumericTraits and cuda::std::numeric_limits for vector types.
 
-/**
- * Vector1 overloads
- */
-#  define C2H_VEC_1_TRAITS_OVERLOAD(T, BaseT)        \
-    CUB_NAMESPACE_BEGIN                              \
-    template <>                                      \
-    struct NumericTraits<T>                          \
-    {                                                \
-      static __host__ __device__ T Max()             \
-      {                                              \
-        T retval = {NumericTraits<BaseT>::Max()};    \
-        return retval;                               \
-      }                                              \
-      static __host__ __device__ T Lowest()          \
-      {                                              \
-        T retval = {NumericTraits<BaseT>::Lowest()}; \
-        return retval;                               \
-      }                                              \
-    };                                               \
-    CUB_NAMESPACE_END
+#  define REPEAT_TO_LIST_1(a)  a
+#  define REPEAT_TO_LIST_2(a)  a, a
+#  define REPEAT_TO_LIST_3(a)  a, a, a
+#  define REPEAT_TO_LIST_4(a)  a, a, a, a
+#  define REPEAT_TO_LIST(N, a) _CCCL_PP_CAT(REPEAT_TO_LIST_, N)(a)
 
-/**
- * Vector2 overloads
- */
-#  define C2H_VEC_2_TRAITS_OVERLOAD(T, BaseT)                                        \
-    CUB_NAMESPACE_BEGIN                                                              \
-    template <>                                                                      \
-    struct NumericTraits<T>                                                          \
-    {                                                                                \
-      static __host__ __device__ T Max()                                             \
-      {                                                                              \
-        T retval = {NumericTraits<BaseT>::Max(), NumericTraits<BaseT>::Max()};       \
-        return retval;                                                               \
-      }                                                                              \
-      static __host__ __device__ T Lowest()                                          \
-      {                                                                              \
-        T retval = {NumericTraits<BaseT>::Lowest(), NumericTraits<BaseT>::Lowest()}; \
-        return retval;                                                               \
-      }                                                                              \
-    };                                                                               \
-    CUB_NAMESPACE_END
+#  define C2H_VEC_TRAITS_OVERLOAD_IMPL(T, BaseT, N)                                   \
+    CUB_NAMESPACE_BEGIN                                                               \
+    template <>                                                                       \
+    struct NumericTraits<T>                                                           \
+    {                                                                                 \
+      static __host__ __device__ T Max()                                              \
+      {                                                                               \
+        T retval = {REPEAT_TO_LIST(N, NumericTraits<BaseT>::Max())};                  \
+        return retval;                                                                \
+      }                                                                               \
+      static __host__ __device__ T Lowest()                                           \
+      {                                                                               \
+        T retval = {REPEAT_TO_LIST(N, NumericTraits<BaseT>::Lowest())};               \
+        return retval;                                                                \
+      }                                                                               \
+    };                                                                                \
+    CUB_NAMESPACE_END                                                                 \
+                                                                                      \
+    _LIBCUDACXX_BEGIN_NAMESPACE_STD                                                   \
+    template <>                                                                       \
+    class numeric_limits<T>                                                           \
+    {                                                                                 \
+    public:                                                                           \
+      static constexpr bool is_specialized = true;                                    \
+      static __host__ __device__ T max()                                              \
+      {                                                                               \
+        T retval = {REPEAT_TO_LIST(N, ::cuda::std::numeric_limits<BaseT>::max())};    \
+        return retval;                                                                \
+      }                                                                               \
+      static __host__ __device__ T min()                                              \
+      {                                                                               \
+        T retval = {REPEAT_TO_LIST(N, ::cuda::std::numeric_limits<BaseT>::min())};    \
+        return retval;                                                                \
+      }                                                                               \
+      static __host__ __device__ T lowest()                                           \
+      {                                                                               \
+        T retval = {REPEAT_TO_LIST(N, ::cuda::std::numeric_limits<BaseT>::lowest())}; \
+        return retval;                                                                \
+      }                                                                               \
+    };                                                                                \
+    _LIBCUDACXX_END_NAMESPACE_STD
 
-/**
- * Vector3 overloads
- */
-#  define C2H_VEC_3_TRAITS_OVERLOAD(T, BaseT)                                                                        \
-    CUB_NAMESPACE_BEGIN                                                                                              \
-    template <>                                                                                                      \
-    struct NumericTraits<T>                                                                                          \
-    {                                                                                                                \
-      static __host__ __device__ T Max()                                                                             \
-      {                                                                                                              \
-        T retval = {NumericTraits<BaseT>::Max(), NumericTraits<BaseT>::Max(), NumericTraits<BaseT>::Max()};          \
-        return retval;                                                                                               \
-      }                                                                                                              \
-      static __host__ __device__ T Lowest()                                                                          \
-      {                                                                                                              \
-        T retval = {NumericTraits<BaseT>::Lowest(), NumericTraits<BaseT>::Lowest(), NumericTraits<BaseT>::Lowest()}; \
-        return retval;                                                                                               \
-      }                                                                                                              \
-    };                                                                                                               \
-    CUB_NAMESPACE_END
+#  define C2H_VEC_TRAITS_OVERLOAD(COMPONENT_T, BaseT)      \
+    C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##1, BaseT, 1) \
+    C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##2, BaseT, 2) \
+    C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##3, BaseT, 3) \
+    C2H_VEC_TRAITS_OVERLOAD_IMPL(COMPONENT_T##4, BaseT, 4)
 
-/**
- * Vector4 overloads
- */
-#  define C2H_VEC_4_TRAITS_OVERLOAD(T, BaseT)        \
-    CUB_NAMESPACE_BEGIN                              \
-    template <>                                      \
-    struct NumericTraits<T>                          \
-    {                                                \
-      static __host__ __device__ T Max()             \
-      {                                              \
-        T retval = {NumericTraits<BaseT>::Max(),     \
-                    NumericTraits<BaseT>::Max(),     \
-                    NumericTraits<BaseT>::Max(),     \
-                    NumericTraits<BaseT>::Max()};    \
-        return retval;                               \
-      }                                              \
-      static __host__ __device__ T Lowest()          \
-      {                                              \
-        T retval = {NumericTraits<BaseT>::Lowest(),  \
-                    NumericTraits<BaseT>::Lowest(),  \
-                    NumericTraits<BaseT>::Lowest(),  \
-                    NumericTraits<BaseT>::Lowest()}; \
-        return retval;                               \
-      }                                              \
-    };                                               \
-    CUB_NAMESPACE_END
-
-/**
- * All vector overloads
- */
-#  define C2H_VEC_TRAITS_OVERLOAD(COMPONENT_T, BaseT) \
-    C2H_VEC_1_TRAITS_OVERLOAD(COMPONENT_T##1, BaseT)  \
-    C2H_VEC_2_TRAITS_OVERLOAD(COMPONENT_T##2, BaseT)  \
-    C2H_VEC_3_TRAITS_OVERLOAD(COMPONENT_T##3, BaseT)  \
-    C2H_VEC_4_TRAITS_OVERLOAD(COMPONENT_T##4, BaseT)
-
-/**
- * Define for types
- */
-_CCCL_SUPPRESS_DEPRECATED_PUSH
 C2H_VEC_TRAITS_OVERLOAD(char, signed char)
 C2H_VEC_TRAITS_OVERLOAD(short, short)
 C2H_VEC_TRAITS_OVERLOAD(int, int)
@@ -407,6 +357,13 @@ C2H_VEC_TRAITS_OVERLOAD(ulong, unsigned long)
 C2H_VEC_TRAITS_OVERLOAD(ulonglong, unsigned long long)
 C2H_VEC_TRAITS_OVERLOAD(float, float)
 C2H_VEC_TRAITS_OVERLOAD(double, double)
-_CCCL_SUPPRESS_DEPRECATED_POP
+
+#  undef C2H_VEC_TRAITS_OVERLOAD
+#  undef C2H_VEC_TRAITS_OVERLOAD_IMPL
+#  undef REPEAT_TO_LIST_1
+#  undef REPEAT_TO_LIST_2
+#  undef REPEAT_TO_LIST_3
+#  undef REPEAT_TO_LIST_4
+#  undef REPEAT_TO_LIST
 
 #endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA