Skip to content

Commit

Permalink
[rocPRIM]Mergeback 6.4 hotfixes (#664)
Browse files Browse the repository at this point in the history
* Add atomic_add overload for double (#652)

The rocprim::histogram_even algorithm uses rocprim::atomic_add. There are atomic_add
overloads for all of the non-half-type primitive data types we support, with the
exception of double.

Because of this, if you called rocprim::histogram_even and used double as the counter
type, you'd get a compilation error.

This changes just adds an atomic_add overload that accepts double, and tacks on a
few unit test cases that exercise histogram_even with float and double counters.

* Make temp_storage functions __host__ and __device__ (#658)

* Enable CMake HIP language (#655)

* Restart on HIP language support

* linking changes

* rearrange

---------

Co-authored-by: Lauren Wrubleski <[email protected]>

* Fix for rocprim::batch_copy when using iterators (#662)

* Fix for using iterators in batch_copy

* Add indirect_iterator test to batch_memcpy

* Add specific iterator test based on reproducer, with iterators in iterators.

* Fix warp dpp reduction (#660)

Co-authored-by: Robin Voetter <[email protected]>

---------

Co-authored-by: Wayne Franz <[email protected]>
Co-authored-by: Nick Breed <[email protected]>
Co-authored-by: Lauren Wrubleski <[email protected]>
Co-authored-by: Beatriz Navidad Vilches <[email protected]>
Co-authored-by: Robin Voetter <[email protected]>
  • Loading branch information
6 people authored Dec 6, 2024
1 parent 1646bbe commit f1d714d
Show file tree
Hide file tree
Showing 16 changed files with 399 additions and 139 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projec
* Fixed an issue where `rocprim::partial_sort_copy` would yield a compile error if the input iterator is const.
* Fixed incorrect 128-bit signed and unsigned integers type traits.
* Fixed compilation issue when `rocprim::radix_key_codec<...>` is specialized with a 128-bit integer.
* Fixed the warp-level reduction `rocprim::warp_reduce.reduce` DPP implementation to avoid undefined intermediate values during the reduction.

### Upcoming changes
* Using the initialisation constructor of `rocprim::reverse_iterator` will throw a deprecation warning. It will be marked as explicit in the next major release.
Expand Down
109 changes: 69 additions & 40 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,25 @@ set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended
# rocPRIM project
project(rocprim LANGUAGES CXX)

# Set CXX flags
if (NOT DEFINED CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
endif()
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Set HIP flags
set(CMAKE_HIP_STANDARD 14)
set(CMAKE_HIP_STANDARD_REQUIRED ON)
set(CMAKE_HIP_EXTENSIONS OFF)

# Set CXX standard
if (CMAKE_CXX_STANDARD EQUAL 14)
message(WARNING "C++14 will be deprecated in the next major release")
elseif(NOT CMAKE_CXX_STANDARD EQUAL 17)
message(FATAL_ERROR "Only C++14 and C++17 are supported")
endif()

if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(ROCPRIM_PROJECT_IS_TOP_LEVEL TRUE)
else()
Expand All @@ -42,17 +61,36 @@ else()
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
endif()

include(CheckLanguage)
include(CMakeDependentOption)

# Build options
option(BUILD_TEST "Build tests (requires googletest)" OFF)
option(BUILD_BENCHMARK "Build benchmarks" OFF)
option(BUILD_NAIVE_BENCHMARK "Build naive benchmarks" OFF)
option(BUILD_EXAMPLE "Build examples" OFF)
option(BUILD_DOCS "Build documentation (requires sphinx)" OFF)
# Disables building tests, benchmarks, examples
option(ONLY_INSTALL "Only install" OFF)
cmake_dependent_option(BUILD_TEST "Build tests (requires googletest)" OFF "NOT ONLY_INSTALL" OFF)
cmake_dependent_option(BUILD_BENCHMARK "Build benchmarks" OFF "NOT ONLY_INSTALL" OFF)
cmake_dependent_option(BUILD_EXAMPLE "Build examples" OFF "NOT ONLY_INSTALL" OFF)
option(BUILD_NAIVE_BENCHMARK "Build naive benchmarks" OFF)
cmake_dependent_option(BUILD_DOCS "Build documentation (requires sphinx)" OFF "NOT ONLY_INSTALL" OFF)
option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF)
option(ROCPRIM_INSTALL "Enable installation of rocPRIM (projects embedding rocPRIM may want to turn this OFF)" ON)

check_language(HIP)
cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF)

if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(ROCPRIM_PROJECT_IS_TOP_LEVEL TRUE)
else()
set(ROCPRIM_PROJECT_IS_TOP_LEVEL FALSE)
endif()

#Adding CMAKE_PREFIX_PATH
if(WIN32)
set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation")
else()
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
endif()

# CMake modules
list(APPEND CMAKE_MODULE_PATH
${CMAKE_CURRENT_SOURCE_DIR}/cmake
Expand All @@ -68,19 +106,6 @@ endif()

set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath")

# Set CXX flags
if (NOT DEFINED CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
endif()
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

if (CMAKE_CXX_STANDARD EQUAL 14)
message(WARNING "C++14 will be deprecated in the next major release")
elseif(NOT CMAKE_CXX_STANDARD EQUAL 17)
message(FATAL_ERROR "Only C++14 and C++17 are supported")
endif()

if(DEFINED BUILD_SHARED_LIBS)
set(PKG_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
else()
Expand All @@ -91,26 +116,30 @@ set(BUILD_SHARED_LIBS OFF) # don't build client dependencies as shared
# Get dependencies (required here to get rocm-cmake)
include(cmake/Dependencies.cmake)
# Use target ID syntax if supported for GPU_TARGETS
if (NOT DEFINED AMDGPU_TARGETS)
set(GPU_TARGETS "all" CACHE STRING "GPU architectures to compile for")
if(USE_HIPCXX)
enable_language(HIP)
else()
set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for")
endif()
set_property(CACHE GPU_TARGETS PROPERTY STRINGS "all")

if(GPU_TARGETS STREQUAL "all")
if(BUILD_ADDRESS_SANITIZER)
# ASAN builds require xnack
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+"
)
if (NOT DEFINED AMDGPU_TARGETS)
set(GPU_TARGETS "all" CACHE STRING "GPU architectures to compile for")
else()
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
)
set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for")
endif()
set_property(CACHE GPU_TARGETS PROPERTY STRINGS "all")

if(GPU_TARGETS STREQUAL "all")
if(BUILD_ADDRESS_SANITIZER)
# ASAN builds require xnack
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+"
)
else()
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
)
endif()

set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE)
endif()

set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE)
endif()

# TODO: Fix VerifyCompiler for HIP on Windows
Expand Down Expand Up @@ -147,12 +176,12 @@ print_configuration_summary()
# rocPRIM library
add_subdirectory(rocprim)

if(ROCPRIM_PROJECT_IS_TOP_LEVEL AND NOT ONLY_INSTALL AND (BUILD_TEST OR BUILD_BENCHMARK))
if(ROCPRIM_PROJECT_IS_TOP_LEVEL AND (BUILD_TEST OR BUILD_BENCHMARK))
rocm_package_setup_component(clients)
endif()

# Tests
if(BUILD_TEST AND NOT ONLY_INSTALL)
if(BUILD_TEST)
if (ROCPRIM_PROJECT_IS_TOP_LEVEL)
rocm_package_setup_client_component(tests)
endif()
Expand All @@ -161,20 +190,20 @@ if(BUILD_TEST AND NOT ONLY_INSTALL)
endif()

# Benchmarks
if(BUILD_BENCHMARK AND NOT ONLY_INSTALL)
if(BUILD_BENCHMARK)
if (ROCPRIM_PROJECT_IS_TOP_LEVEL)
rocm_package_setup_client_component(benchmarks)
endif()
add_subdirectory(benchmark)
endif()

# Examples
if(BUILD_EXAMPLE AND NOT ONLY_INSTALL)
if(BUILD_EXAMPLE)
add_subdirectory(example)
endif()

# Docs
if(BUILD_DOCS AND NOT ONLY_INSTALL)
if(BUILD_DOCS)
add_subdirectory(docs)
endif()

Expand Down
7 changes: 6 additions & 1 deletion benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ endif()
function(add_rocprim_benchmark BENCHMARK_SOURCE)
get_filename_component(BENCHMARK_TARGET ${BENCHMARK_SOURCE} NAME_WE)

if(USE_HIPCXX)
set_source_files_properties(${BENCHMARK_SOURCE} PROPERTIES LANGUAGE HIP)
endif()

if(BENCHMARK_CONFIG_TUNING)
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${BENCHMARK_TARGET}.parallel.cpp.in")
message(STATUS "found ${BENCHMARK_TARGET}.parallel.cpp.in file, compiling in parallel.")
Expand Down Expand Up @@ -77,9 +81,10 @@ function(add_rocprim_benchmark BENCHMARK_SOURCE)
rocprim
benchmark::benchmark
)

target_link_libraries(${BENCHMARK_TARGET}
PRIVATE
rocprim_hip
$<IF:$<LINK_LANGUAGE:HIP>,hip::host,hip::device>
)

target_compile_options(${BENCHMARK_TARGET}
Expand Down
21 changes: 16 additions & 5 deletions cmake/Summary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,29 @@ function(print_configuration_summary)
message(STATUS "******** Summary ********")
message(STATUS "General:")
message(STATUS " System : ${CMAKE_SYSTEM_NAME}")
message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}")
message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP)
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_STRIP}")
if(USE_HIPCXX)
message(STATUS " HIP compiler : ${CMAKE_HIP_COMPILER}")
message(STATUS " HIP compiler version : ${CMAKE_HIP_COMPILER_VERSION}")
string(STRIP "${CMAKE_HIP_FLAGS}" CMAKE_HIP_FLAGS_STRIP)
message(STATUS " HIP flags : ${CMAKE_HIP_FLAGS_STRIP}")
else()
message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}")
message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP)
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_STRIP}")
endif()
get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(GENERATOR_IS_MULTI_CONFIG)
message(STATUS " Build types : ${CMAKE_CONFIGURATION_TYPES}")
else()
message(STATUS " Build type : ${CMAKE_BUILD_TYPE}")
endif()
message(STATUS " Install prefix : ${CMAKE_INSTALL_PREFIX}")
message(STATUS " Device targets : ${GPU_TARGETS}")
if(USE_HIPCXX)
message(STATUS " Device targets : ${CMAKE_HIP_ARCHITECTURES}")
else()
message(STATUS " Device targets : ${GPU_TARGETS}")
endif()
message(STATUS "")
message(STATUS " ONLY_INSTALL : ${ONLY_INSTALL}")
message(STATUS " BUILD_TEST : ${BUILD_TEST}")
Expand Down
12 changes: 7 additions & 5 deletions cmake/VerifyCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip ${ROCM_PATH}/llvm /opt/rocm/llvm /opt/rocm /opt/rocm/hip)
find_package(hip REQUIRED CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm)

if(HIP_COMPILER STREQUAL "clang")
if(NOT (HIP_CXX_COMPILER MATCHES ".*hipcc" OR HIP_CXX_COMPILER MATCHES ".*clang\\+\\+"))
message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as C++ compiler.")
if(NOT USE_HIPCXX)
if(HIP_COMPILER STREQUAL "clang")
if(NOT (HIP_CXX_COMPILER MATCHES ".*hipcc" OR HIP_CXX_COMPILER MATCHES ".*clang\\+\\+"))
message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as C++ compiler.")
endif()
else()
message(FATAL_ERROR "HIP_COMPILER must be 'clang' (AMD ROCm platform)")
endif()
else()
message(FATAL_ERROR "HIP_COMPILER must be 'clang' (AMD ROCm platform)")
endif()
10 changes: 8 additions & 2 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@

function(add_rocprim_example EXAMPLE_SOURCE)
get_filename_component(EXAMPLE_TARGET ${EXAMPLE_SOURCE} NAME_WE)

if(USE_HIPCXX)
set_source_files_properties(${EXAMPLE_SOURCE} PROPERTIES LANGUAGES HIP)
endif()

add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCE})

target_link_libraries(${EXAMPLE_TARGET}
PRIVATE
rocprim_hip
rocprim
)
target_link_libraries(${EXAMPLE_TARGET}
PRIVATE
rocprim_hip
$<IF:$<LINK_LANGUAGE:HIP>,hip::host,hip::device>
)
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,15 @@ class block_reduce_warp_reduce
}
::rocprim::syncthreads();

if(flat_tid < warps_no_)
if(warp_id == 0)
{
// Use warp partial to calculate the final reduce results for every thread
auto warp_partial = storage_.warp_partials[lane_id];
auto warp_partial = storage_.warp_partials[lane_id % warps_no_];

warp_reduce<!warps_no_is_pow_of_two_, warp_reduce_output_type>(
warp_partial, output, warps_no_, reduce_op
);
warp_reduce<!warps_no_is_pow_of_two_, warp_reduce_output_type>(warp_partial,
output,
warps_no_,
reduce_op);
}
}
}
Expand Down
Loading

0 comments on commit f1d714d

Please sign in to comment.