NVIDIA · bernhardmgruber · Feb 17, 2025 · Feb 15, 2025 · Feb 17, 2025 · Feb 17, 2025
@@ -40,15 +40,17 @@
 #include <thrust/scan.h>
 #include <thrust/tabulate.h>
 
-#include <cuda/std/type_traits>
+#include <cuda/type_traits>
 
 #include <cstdint>
 
+#include <c2h/bfloat16.cuh>
 #include <c2h/custom_type.h>
 #include <c2h/device_policy.h>
 #include <c2h/extended_types.h>
 #include <c2h/fill_striped.h>
 #include <c2h/generators.h>
+#include <c2h/half.cuh>
 #include <c2h/vector.h>
 
 #if C2H_HAS_CURAND
@@ -118,30 +120,7 @@ private:
   c2h::device_vector<float> m_distribution;
 };
 
-// TODO(bgruber): modelled after cub::Traits. We should generalize this somewhere into libcu++.
-template <typename T>
-struct is_floating_point : ::cuda::std::is_floating_point<T>
-{};
-#if _CCCL_HAS_NVFP16()
-template <>
-struct is_floating_point<__half> : ::cuda::std::true_type
-{};
-#endif // _CCCL_HAS_NVFP16()
-#if _CCCL_HAS_NVBF16()
-template <>
-struct is_floating_point<__nv_bfloat16> : ::cuda::std::true_type
-{};
-#endif // _CCCL_HAS_NVBF16()
-#if _CCCL_HAS_NVFP8()
-template <>
-struct is_floating_point<__nv_fp8_e4m3> : ::cuda::std::true_type
-{};
-template <>
-struct is_floating_point<__nv_fp8_e5m2> : ::cuda::std::true_type
-{};
-#endif // _CCCL_HAS_NVFP8()
-
-template <typename T, bool = is_floating_point<T>::value>
+template <typename T, bool = ::cuda::is_floating_point_v<T>>
 struct random_to_item_t
 {
   float m_min;

@@ -244,9 +244,11 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<bfloat16_t> = tru
 #endif // _CCCL_NO_VARIABLE_TEMPLATES
 
 template <>
-class __numeric_limits_impl<bfloat16_t, __numeric_limits_type::__floating_point>
+class numeric_limits<bfloat16_t>
 {
 public:
+  static constexpr bool is_specialized = true;
+
   static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t max()
   {
     return bfloat16_t(numeric_limits<__nv_bfloat16>::max());

@@ -31,6 +31,7 @@
 
 #include <cuda/std/bit>
 #include <cuda/std/cmath>
+#include <cuda/std/limits>
 #include <cuda/std/type_traits>
 #include <cuda/std/utility>
 
@@ -283,10 +284,10 @@ inline std::size_t adjust_seed_count(std::size_t requested)
 }
 } // namespace detail
 
-#define C2H_SEED(N)                                                                                                    \
-  c2h::seed_t                                                                                                          \
-  {                                                                                                                    \
-    GENERATE_COPY(take(                                                                                                \
-      detail::adjust_seed_count(N),                                                                                    \
-      random(std::numeric_limits<unsigned long long int>::min(), std::numeric_limits<unsigned long long int>::max()))) \
+#define C2H_SEED(N)                                                                         \
+  c2h::seed_t                                                                               \
+  {                                                                                         \
+    GENERATE_COPY(take(detail::adjust_seed_count(N),                                        \
+                       random(::cuda::std::numeric_limits<unsigned long long int>::min(),   \
+                              ::cuda::std::numeric_limits<unsigned long long int>::max()))) \
   }
@@ -27,7 +27,8 @@
 
 #pragma once
 
-#include <limits>
+#include <cuda/std/limits>
+
 #include <memory>
 #include <ostream>
 
@@ -178,34 +179,35 @@ class accumulateable_t
 
 } // namespace c2h
 
-namespace std
-{
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
 template <template <typename> class... Policies>
 class numeric_limits<c2h::custom_type_t<Policies...>>
 {
 public:
-  static c2h::custom_type_t<Policies...> max()
+  static constexpr bool is_specialized = true;
+
+  static __host__ __device__ c2h::custom_type_t<Policies...> max()
   {
     c2h::custom_type_t<Policies...> val;
-    val.key = std::numeric_limits<std::size_t>::max();
-    val.val = std::numeric_limits<std::size_t>::max();
+    val.key = numeric_limits<std::size_t>::max();
+    val.val = numeric_limits<std::size_t>::max();
     return val;
   }
 
-  static c2h::custom_type_t<Policies...> min()
+  static __host__ __device__ c2h::custom_type_t<Policies...> min()
   {
     c2h::custom_type_t<Policies...> val;
-    val.key = std::numeric_limits<std::size_t>::min();
-    val.val = std::numeric_limits<std::size_t>::min();
+    val.key = numeric_limits<std::size_t>::min();
+    val.val = numeric_limits<std::size_t>::min();
     return val;
   }
 
-  static c2h::custom_type_t<Policies...> lowest()
+  static __host__ __device__ c2h::custom_type_t<Policies...> lowest()
   {
     c2h::custom_type_t<Policies...> val;
-    val.key = std::numeric_limits<std::size_t>::lowest();
-    val.val = std::numeric_limits<std::size_t>::lowest();
+    val.key = numeric_limits<std::size_t>::lowest();
+    val.val = numeric_limits<std::size_t>::lowest();
     return val;
   }
 };
-} // namespace std
+_LIBCUDACXX_END_NAMESPACE_STD
@@ -29,7 +29,7 @@
 
 #include <thrust/detail/config/device_system.h>
 
-#include <limits>
+#include <cuda/std/limits>
 
 #include <c2h/custom_type.h>
 #include <c2h/vector.h>
@@ -52,41 +52,6 @@ _CCCL_DIAG_PUSH
 #    include <cuda_fp8.h>
 _CCCL_DIAG_POP
 #  endif // _CCCL_HAS_NVFP8()
-
-#  if _CCCL_HAS_NVFP8()
-namespace std
-{
-template <>
-class numeric_limits<__nv_fp8_e4m3>
-{
-public:
-  static __nv_fp8_e4m3 max()
-  {
-    return cub::Traits<__nv_fp8_e4m3>::Max();
-  }
-
-  static __nv_fp8_e4m3 lowest()
-  {
-    return cub::Traits<__nv_fp8_e4m3>::Lowest();
-  }
-};
-
-template <>
-class numeric_limits<__nv_fp8_e5m2>
-{
-public:
-  static __nv_fp8_e5m2 max()
-  {
-    return cub::Traits<__nv_fp8_e5m2>::Max();
-  }
-
-  static __nv_fp8_e5m2 lowest()
-  {
-    return cub::Traits<__nv_fp8_e5m2>::Lowest();
-  }
-};
-} // namespace std
-#  endif // _CCCL_HAS_NVFP8()
 #endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
 
 namespace c2h
@@ -157,8 +122,8 @@ void init_key_segments(const c2h::device_vector<OffsetT>& segment_offsets, KeyT*
 template <template <typename> class... Ps>
 void gen(seed_t seed,
          c2h::device_vector<c2h::custom_type_t<Ps...>>& data,
-         c2h::custom_type_t<Ps...> min = std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
-         c2h::custom_type_t<Ps...> max = std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
+         c2h::custom_type_t<Ps...> min = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
+         c2h::custom_type_t<Ps...> max = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
 {
   detail::gen(seed,
               reinterpret_cast<char*>(thrust::raw_pointer_cast(data.data())),
@@ -171,8 +136,8 @@ void gen(seed_t seed,
 template <typename T>
 void gen(seed_t seed,
          c2h::device_vector<T>& data,
-         T min = std::numeric_limits<T>::lowest(),
-         T max = std::numeric_limits<T>::max());
+         T min = ::cuda::std::numeric_limits<T>::lowest(),
+         T max = ::cuda::std::numeric_limits<T>::max());
 
 template <typename T>
 void gen(modulo_t mod, c2h::device_vector<T>& data);

@@ -339,9 +339,11 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<half_t> = true;
 #endif // _CCCL_NO_VARIABLE_TEMPLATES
 
 template <>
-class __numeric_limits_impl<half_t, __numeric_limits_type::__floating_point>
+class numeric_limits<half_t>
 {
 public:
+  static constexpr bool is_specialized = true;
+
   static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t max()
   {
     return half_t(numeric_limits<__half>::max());