You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
int main() {
// Define the size of the array
const int N = 1024;
// Initialize host data
float h_data[N];
for (int i = 0; i < N; ++i) {
h_data[i] = static_cast<float>(i);
}
// Allocate device memory and copy data from host to device
thrust::device_vector<float> d_data(h_data, h_data + N);
// Define the value to add
float add_value = 10.0f;
// Perform the addition on the GPU using Thrust's transform function
thrust::transform(thrust::device(d_data.begin()), thrust::device(d_data.end()),
thrust::constant_iterator<float>(add_value),
d_data.begin(), thrust::plus<float>());
// Copy data back to host for verification
thrust::copy(thrust::device(d_data.begin()), thrust::device(d_data.end()),
h_data);
// Verify the result on the host
bool success = true;
for (int i = 0; i < N && success; ++i) {
if (h_data[i] != static_cast<float>(i + add_value)) {
success = false;
}
}
if (success) {
std::cout << "RoCThrust library works correctly!" << std::endl;
} else {
std::cout << "RoCThrust library failed the test." << std::endl;
}
return 0;
hipcc -fopenmp -offload-arch=gfx1031 -o rocthrust_example rocthrust_example.cpp
-DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP
-D__HIP_PLATFORM_AMD__
-I/opt/rocm-6.2.2/include
-L/opt/rocm-6.2.2/lib
-lroctracer_client
-lrocprofiler_client
-lrocprofiler_agent
-lhip_hcc
-Wno-deprecated-declarations
clang++: warning: joined argument treated as '-o ffload-arch=gfx1031'; did you mean '--offload-arch=gfx1031'? [-Wunknown-argument]
rocthrust_example.cpp:25:24: error: no template named 'constant_iterator' in namespace 'thrust'; did you mean 'rocprim::constant_iterator'?
25 | thrust::constant_iterator(add_value),
| ^~~~~~~~~~~~~~~~~~~~~~~~~
| rocprim::constant_iterator
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/rocprim/iterator/constant_iterator.hpp:51:7: note: 'rocprim::constant_iterator' declared here
51 | class constant_iterator
| ^
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:22:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.h:57:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/transform.h:721:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/transform.inl:156:61: error: no type named 'type' in 'thrust::iterator_system<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>>'
156 | typedef typename thrust::iterator_system::type System1;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
rocthrust_example.cpp:24:13: note: in instantiation of function template specialization 'thrust::transform<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>, rocprim::constant_iterator, thrust::detail::normal_iterator<thrust::device_ptr>, thrust::plus>' requested here
24 | thrust::transform(thrust::device(d_data.begin()), thrust::device(d_data.end()),
| ^
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:22:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.h:57:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/transform.h:721:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/transform.inl:164:28: error: no matching function for call to 'select_system'
164 | return thrust::transform(select_system(system1,system2,system3), first1, last1, first2, result, op);
| ^~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:110:6: note: candidate template ignored: could not match 'thrust::execution_policy' against 'System1' (aka 'int')
110 | &select_system(thrust::execution_policy &system1,
| ^
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:97:6: note: candidate function not viable: requires 2 arguments, but 3 were provided
97 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98 | thrust::execution_policy &system2)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/hip/detail/cross_system.h:295:52: note: candidate function template not viable: requires 2 arguments, but 3 were provided
295 | hostdevice cross_system<Sys1, Sys2> select_system(
| ^
296 | execution_policy const& sys1, thrust::cpp::execution_policy const& sys2)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/hip/detail/cross_system.h:307:3: note: candidate function template not viable: requires 2 arguments, but 3 were provided
307 | select_system(thrust::cpp::execution_policy const &sys1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
308 | execution_policy const & sys2)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:124:6: note: candidate function not viable: requires 4 arguments, but 3 were provided
124 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125 | thrust::execution_policy &system2,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
126 | thrust::execution_policy &system3,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | thrust::execution_policy &system4)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:168:27: note: candidate function not viable: requires 1 argument, but 3 were provided
168 | thrust::device_system_tag select_system(thrust::any_system_tag)
| ^ ~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:86:5: note: candidate function not viable: requires single argument 'system', but 3 arguments were provided
86 | select_system(thrust::execution_policy &system)
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:139:6: note: candidate function not viable: requires 5 arguments, but 3 were provided
139 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140 | thrust::execution_policy &system2,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
141 | thrust::execution_policy &system3,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
142 | thrust::execution_policy &system4,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
143 | thrust::execution_policy &system5)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:155:6: note: candidate function not viable: requires 6 arguments, but 3 were provided
155 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
156 | thrust::execution_policy &system2,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
157 | thrust::execution_policy &system3,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
158 | thrust::execution_policy &system4,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
159 | thrust::execution_policy &system5,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
160 | thrust::execution_policy &system6)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:103:60: error: no type named 'type' in 'thrust::iterator_system<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>>'
103 | typedef typename thrust::iterator_system::type System1;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
rocthrust_example.cpp:29:13: note: in instantiation of function template specialization 'thrust::copy<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>, float *>' requested here
29 | thrust::copy(thrust::device(d_data.begin()), thrust::device(d_data.end()),
| ^
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:109:10: error: no matching function for call to 'two_system_copy'
109 | return thrust::detail::two_system_copy(system1, system2, first, last, result);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:63:18: note: candidate template ignored: could not match 'thrust::execution_policy' against 'System1' (aka 'int')
63 | OutputIterator two_system_copy(const thrust::execution_policy &system1,
| ^
5 errors generated when compiling for gfx1031.
failed to execute:/opt/rocm-6.2.2/lib/llvm/bin/clang++ --offload-arch=gfx1031 --offload-arch=gfx90c -O3 --driver-mode=g++ -O3 --hip-link -fopenmp -offload-arch=gfx1031 -o "rocthrust_example" -x hip rocthrust_example.cpp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP -D__HIP_PLATFORM_AMD__ -I/opt/rocm-6.2.2/include -L/opt/rocm-6.2.2/lib -lroctracer_client -lrocprofiler_client -lrocprofiler_agent -lhip_hcc -Wno-deprecated-declarations
int main() {
// Define the size of the array
const int N = 1024;
// Initialize host data
float h_data[N];
for (int i = 0; i < N; ++i) {
h_data[i] = static_cast<float>(i);
}
// Allocate device memory and copy data from host to device
thrust::device_vector<float> d_data(h_data, h_data + N);
// Define the value to add
float add_value = 10.0f;
// Perform the addition on the GPU using Thrust's transform function
thrust::transform(thrust::device(d_data.begin()), thrust::device(d_data.end()),
thrust::constant_iterator<float>(add_value),
d_data.begin(), thrust::plus<float>());
// Copy data back to host for verification
thrust::copy(thrust::device(d_data.begin()), thrust::device(d_data.end()),
h_data);
// Verify the result on the host
bool success = true;
for (int i = 0; i < N && success; ++i) {
if (h_data[i] != static_cast<float>(i + add_value)) {
success = false;
}
}
if (success) {
std::cout << "RoCThrust library works correctly!" << std::endl;
} else {
std::cout << "RoCThrust library failed the test." << std::endl;
}
return 0;
(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support
rocminfo --support
ROCk module version 6.8.5 is loaded
HSA System Attributes
Runtime Version: 1.14
Runtime Ext Version: 1.6
System Timestamp Freq.: 1000.000000MHz
Sig. Max Wait Duration: 18446744073709551615 (0xFFFFFFFFFFFFFFFF) (timestamp count)
Machine Model: LARGE
System Endianness: LITTLE
Mwaitx: DISABLED
DMAbuf Support: YES
==========
HSA Agents
Agent 1
Name: AMD Ryzen 9 5900HX with Radeon Graphics
Uuid: CPU-XX
Marketing Name: AMD Ryzen 9 5900HX with Radeon Graphics
Vendor Name: CPU
Feature: None specified
Profile: FULL_PROFILE
Float Round Mode: NEAR
Max Queue Number: 0(0x0)
Queue Min Size: 0(0x0)
Queue Max Size: 0(0x0)
Queue Type: MULTI
Node: 0
Device Type: CPU
Cache Info:
L1: 32768(0x8000) KB
Chip ID: 0(0x0)
ASIC Revision: 0(0x0)
Cacheline Size: 64(0x40)
Max Clock Freq. (MHz): 4680
BDFID: 0
Internal Node ID: 0
Compute Unit: 16
SIMDs per CU: 0
Shader Engines: 0
Shader Arrs. per Eng.: 0
WatchPts on Addr. Ranges:1
Memory Properties:
Features: None
Pool Info:
Pool 1
Segment: GLOBAL; FLAGS: FINE GRAINED
Size: 15736528(0xf01ed0) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:4KB
Alloc Alignment: 4KB
Accessible by all: TRUE
Pool 2
Segment: GLOBAL; FLAGS: KERNARG, FINE GRAINED
Size: 15736528(0xf01ed0) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:4KB
Alloc Alignment: 4KB
Accessible by all: TRUE
Pool 3
Segment: GLOBAL; FLAGS: COARSE GRAINED
Size: 15736528(0xf01ed0) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:4KB
Alloc Alignment: 4KB
Accessible by all: TRUE
ISA Info:
Agent 2
Name: gfx1031
Uuid: GPU-XX
Marketing Name: AMD Radeon RX 6700M
Vendor Name: AMD
Feature: KERNEL_DISPATCH
Profile: BASE_PROFILE
Float Round Mode: NEAR
Max Queue Number: 128(0x80)
Queue Min Size: 64(0x40)
Queue Max Size: 131072(0x20000)
Queue Type: MULTI
Node: 1
Device Type: GPU
Cache Info:
L1: 16(0x10) KB
L2: 3072(0xc00) KB
L3: 98304(0x18000) KB
Chip ID: 29663(0x73df)
ASIC Revision: 0(0x0)
Cacheline Size: 64(0x40)
Max Clock Freq. (MHz): 2465
BDFID: 768
Internal Node ID: 1
Compute Unit: 36
SIMDs per CU: 2
Shader Engines: 2
Shader Arrs. per Eng.: 2
WatchPts on Addr. Ranges:4
Coherent Host Access: FALSE
Memory Properties:
Features: KERNEL_DISPATCH
Fast F16 Operation: TRUE
Wavefront Size: 32(0x20)
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Max Waves Per CU: 32(0x20)
Max Work-item Per CU: 1024(0x400)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
Max fbarriers/Workgrp: 32
Packet Processor uCode:: 118
SDMA engine uCode:: 80
IOMMU Support:: None
Pool Info:
Pool 1
Segment: GLOBAL; FLAGS: COARSE GRAINED
Size: 10469376(0x9fc000) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 2
Segment: GLOBAL; FLAGS: EXTENDED FINE GRAINED
Size: 10469376(0x9fc000) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 3
Segment: GROUP
Size: 64(0x40) KB
Allocatable: FALSE
Alloc Granule: 0KB
Alloc Recommended Granule:0KB
Alloc Alignment: 0KB
Accessible by all: FALSE
ISA Info:
ISA 1
Name: amdgcn-amd-amdhsa--gfx1031
Machine Models: HSA_MACHINE_MODEL_LARGE
Profiles: HSA_PROFILE_BASE
Default Rounding Mode: NEAR
Default Rounding Mode: NEAR
Fast f16: TRUE
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
FBarrier Max Size: 32
Agent 3
Name: gfx90c
Uuid: GPU-XX
Marketing Name: AMD Radeon Graphics
Vendor Name: AMD
Feature: KERNEL_DISPATCH
Profile: BASE_PROFILE
Float Round Mode: NEAR
Max Queue Number: 128(0x80)
Queue Min Size: 64(0x40)
Queue Max Size: 131072(0x20000)
Queue Type: MULTI
Node: 2
Device Type: GPU
Cache Info:
L1: 16(0x10) KB
L2: 1024(0x400) KB
Chip ID: 5688(0x1638)
ASIC Revision: 0(0x0)
Cacheline Size: 64(0x40)
Max Clock Freq. (MHz): 2100
BDFID: 1792
Internal Node ID: 2
Compute Unit: 8
SIMDs per CU: 4
Shader Engines: 1
Shader Arrs. per Eng.: 1
WatchPts on Addr. Ranges:4
Coherent Host Access: FALSE
Memory Properties: APU
Features: KERNEL_DISPATCH
Fast F16 Operation: TRUE
Wavefront Size: 64(0x40)
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Max Waves Per CU: 40(0x28)
Max Work-item Per CU: 2560(0xa00)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
Max fbarriers/Workgrp: 32
Packet Processor uCode:: 472
SDMA engine uCode:: 40
IOMMU Support:: None
Pool Info:
Pool 1
Segment: GLOBAL; FLAGS: COARSE GRAINED
Size: 7868264(0x780f68) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 2
Segment: GLOBAL; FLAGS: EXTENDED FINE GRAINED
Size: 7868264(0x780f68) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 3
Segment: GROUP
Size: 64(0x40) KB
Allocatable: FALSE
Alloc Granule: 0KB
Alloc Recommended Granule:0KB
Alloc Alignment: 0KB
Accessible by all: FALSE
ISA Info:
ISA 1
Name: amdgcn-amd-amdhsa--gfx90c:xnack-
Machine Models: HSA_MACHINE_MODEL_LARGE
Profiles: HSA_PROFILE_BASE
Default Rounding Mode: NEAR
Default Rounding Mode: NEAR
Fast f16: TRUE
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
FBarrier Max Size: 32
*** Done ***
Additional Information
A simple example with compile instructions to simplify writing programs using rocThrust would safe inexperienced users a lot of time.
The text was updated successfully, but these errors were encountered:
Problem Description
I have been trying to compile the following Thrust test program but I keep running into compile issues.
OS:
NAME="Ubuntu"
VERSION="22.04.5 LTS (Jammy Jellyfish)"
CPU:
model name : AMD Ryzen 9 5900HX with Radeon Graphics
GPU:
Name: AMD Ryzen 9 5900HX with Radeon Graphics
Marketing Name: AMD Ryzen 9 5900HX with Radeon Graphics
Name: gfx1031
Marketing Name: AMD Radeon RX 6700M
Name: amdgcn-amd-amdhsa--gfx1031
Name: gfx90c
Marketing Name: AMD Radeon Graphics
Name: amdgcn-amd-amdhsa--gfx90c:xnack-
The Program "rocthrust_example.cpp":
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include
#define CUSTOM_DEVICE_SYSTEM 1
int main() {
// Define the size of the array
const int N = 1024;
}
Compile syntax:
hipcc -fopenmp -offload-arch=gfx1031 -o rocthrust_example rocthrust_example.cpp
-DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP
-D__HIP_PLATFORM_AMD__
-I/opt/rocm-6.2.2/include
-L/opt/rocm-6.2.2/lib
-lroctracer_client
-lrocprofiler_client
-lrocprofiler_agent
-lhip_hcc
-Wno-deprecated-declarations
Compile Errors:
hipcc -fopenmp -offload-arch=gfx1031 -o rocthrust_example rocthrust_example.cpp
-DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP
-D__HIP_PLATFORM_AMD__
-I/opt/rocm-6.2.2/include
-L/opt/rocm-6.2.2/lib
-lroctracer_client
-lrocprofiler_client
-lrocprofiler_agent
-lhip_hcc
-Wno-deprecated-declarations
clang++: warning: joined argument treated as '-o ffload-arch=gfx1031'; did you mean '--offload-arch=gfx1031'? [-Wunknown-argument]
rocthrust_example.cpp:25:24: error: no template named 'constant_iterator' in namespace 'thrust'; did you mean 'rocprim::constant_iterator'?
25 | thrust::constant_iterator(add_value),
| ^~~~~~~~~~~~~~~~~~~~~~~~~
| rocprim::constant_iterator
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/rocprim/iterator/constant_iterator.hpp:51:7: note: 'rocprim::constant_iterator' declared here
51 | class constant_iterator
| ^
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:22:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.h:57:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/transform.h:721:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/transform.inl:156:61: error: no type named 'type' in 'thrust::iterator_system<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>>'
156 | typedef typename thrust::iterator_system::type System1;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
rocthrust_example.cpp:24:13: note: in instantiation of function template specialization 'thrust::transform<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>, rocprim::constant_iterator, thrust::detail::normal_iterator<thrust::device_ptr>, thrust::plus>' requested here
24 | thrust::transform(thrust::device(d_data.begin()), thrust::device(d_data.end()),
| ^
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:22:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.h:57:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/copy.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/transform.h:721:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/transform.inl:164:28: error: no matching function for call to 'select_system'
164 | return thrust::transform(select_system(system1,system2,system3), first1, last1, first2, result, op);
| ^~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:110:6: note: candidate template ignored: could not match 'thrust::execution_policy' against 'System1' (aka 'int')
110 | &select_system(thrust::execution_policy &system1,
| ^
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:97:6: note: candidate function not viable: requires 2 arguments, but 3 were provided
97 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98 | thrust::execution_policy &system2)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/hip/detail/cross_system.h:295:52: note: candidate function template not viable: requires 2 arguments, but 3 were provided
295 | host device cross_system<Sys1, Sys2> select_system(
| ^
296 | execution_policy const& sys1, thrust::cpp::execution_policy const& sys2)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/hip/detail/cross_system.h:307:3: note: candidate function template not viable: requires 2 arguments, but 3 were provided
307 | select_system(thrust::cpp::execution_policy const &sys1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
308 | execution_policy const & sys2)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:124:6: note: candidate function not viable: requires 4 arguments, but 3 were provided
124 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125 | thrust::execution_policy &system2,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
126 | thrust::execution_policy &system3,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127 | thrust::execution_policy &system4)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:168:27: note: candidate function not viable: requires 1 argument, but 3 were provided
168 | thrust::device_system_tag select_system(thrust::any_system_tag)
| ^ ~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:86:5: note: candidate function not viable: requires single argument 'system', but 3 arguments were provided
86 | select_system(thrust::execution_policy &system)
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:139:6: note: candidate function not viable: requires 5 arguments, but 3 were provided
139 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
140 | thrust::execution_policy &system2,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
141 | thrust::execution_policy &system3,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
142 | thrust::execution_policy &system4,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
143 | thrust::execution_policy &system5)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/system/detail/generic/select_system.inl:155:6: note: candidate function not viable: requires 6 arguments, but 3 were provided
155 | &select_system(thrust::execution_policy &system1,
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
156 | thrust::execution_policy &system2,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
157 | thrust::execution_policy &system3,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
158 | thrust::execution_policy &system4,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
159 | thrust::execution_policy &system5,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
160 | thrust::execution_policy &system6)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:103:60: error: no type named 'type' in 'thrust::iterator_system<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>>'
103 | typedef typename thrust::iterator_system::type System1;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
rocthrust_example.cpp:29:13: note: in instantiation of function template specialization 'thrust::copy<thrust::detail::execute_with_allocator<thrust::detail::normal_iterator<thrust::device_ptr>, thrust::hip_rocprim::execute_on_stream_base>, float *>' requested here
29 | thrust::copy(thrust::device(d_data.begin()), thrust::device(d_data.end()),
| ^
In file included from rocthrust_example.cpp:1:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/device_vector.h:26:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/vector_base.h:30:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.h:235:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/contiguous_storage.inl:24:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.h:45:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/allocator/copy_construct_range.inl:23:
In file included from /opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.h:90:
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:109:10: error: no matching function for call to 'two_system_copy'
109 | return thrust::detail::two_system_copy(system1, system2, first, last, result);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.2.2/lib/llvm/bin/../../../include/thrust/detail/copy.inl:63:18: note: candidate template ignored: could not match 'thrust::execution_policy' against 'System1' (aka 'int')
63 | OutputIterator two_system_copy(const thrust::execution_policy &system1,
| ^
5 errors generated when compiling for gfx1031.
failed to execute:/opt/rocm-6.2.2/lib/llvm/bin/clang++ --offload-arch=gfx1031 --offload-arch=gfx90c -O3 --driver-mode=g++ -O3 --hip-link -fopenmp -offload-arch=gfx1031 -o "rocthrust_example" -x hip rocthrust_example.cpp -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP -D__HIP_PLATFORM_AMD__ -I/opt/rocm-6.2.2/include -L/opt/rocm-6.2.2/lib -lroctracer_client -lrocprofiler_client -lrocprofiler_agent -lhip_hcc -Wno-deprecated-declarations
Operating System
Ubuntu 22.02
CPU
AMD Ryzen 9 5900HX with Radeon Graphics
GPU
AMD Radeon RX 6700M
ROCm Version
ROCm 6.2.2
ROCm Component
rocThrust
Steps to Reproduce
Compile the program "rocthrust_example.cpp":
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include
#define CUSTOM_DEVICE_SYSTEM 1
int main() {
// Define the size of the array
const int N = 1024;
}
Compile syntax:
hipcc -fopenmp -offload-arch=gfx1031 -o rocthrust_example rocthrust_example.cpp
-DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP
-D__HIP_PLATFORM_AMD__
-I/opt/rocm-6.2.2/include
-L/opt/rocm-6.2.2/lib
-lroctracer_client
-lrocprofiler_client
-lrocprofiler_agent
-lhip_hcc
-Wno-deprecated-declarations
(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support
rocminfo --support
ROCk module version 6.8.5 is loaded
HSA System Attributes
Runtime Version: 1.14
Runtime Ext Version: 1.6
System Timestamp Freq.: 1000.000000MHz
Sig. Max Wait Duration: 18446744073709551615 (0xFFFFFFFFFFFFFFFF) (timestamp count)
Machine Model: LARGE
System Endianness: LITTLE
Mwaitx: DISABLED
DMAbuf Support: YES
==========
HSA Agents
Agent 1
Name: AMD Ryzen 9 5900HX with Radeon Graphics
Uuid: CPU-XX
Marketing Name: AMD Ryzen 9 5900HX with Radeon Graphics
Vendor Name: CPU
Feature: None specified
Profile: FULL_PROFILE
Float Round Mode: NEAR
Max Queue Number: 0(0x0)
Queue Min Size: 0(0x0)
Queue Max Size: 0(0x0)
Queue Type: MULTI
Node: 0
Device Type: CPU
Cache Info:
L1: 32768(0x8000) KB
Chip ID: 0(0x0)
ASIC Revision: 0(0x0)
Cacheline Size: 64(0x40)
Max Clock Freq. (MHz): 4680
BDFID: 0
Internal Node ID: 0
Compute Unit: 16
SIMDs per CU: 0
Shader Engines: 0
Shader Arrs. per Eng.: 0
WatchPts on Addr. Ranges:1
Memory Properties:
Features: None
Pool Info:
Pool 1
Segment: GLOBAL; FLAGS: FINE GRAINED
Size: 15736528(0xf01ed0) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:4KB
Alloc Alignment: 4KB
Accessible by all: TRUE
Pool 2
Segment: GLOBAL; FLAGS: KERNARG, FINE GRAINED
Size: 15736528(0xf01ed0) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:4KB
Alloc Alignment: 4KB
Accessible by all: TRUE
Pool 3
Segment: GLOBAL; FLAGS: COARSE GRAINED
Size: 15736528(0xf01ed0) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:4KB
Alloc Alignment: 4KB
Accessible by all: TRUE
ISA Info:
Agent 2
Name: gfx1031
Uuid: GPU-XX
Marketing Name: AMD Radeon RX 6700M
Vendor Name: AMD
Feature: KERNEL_DISPATCH
Profile: BASE_PROFILE
Float Round Mode: NEAR
Max Queue Number: 128(0x80)
Queue Min Size: 64(0x40)
Queue Max Size: 131072(0x20000)
Queue Type: MULTI
Node: 1
Device Type: GPU
Cache Info:
L1: 16(0x10) KB
L2: 3072(0xc00) KB
L3: 98304(0x18000) KB
Chip ID: 29663(0x73df)
ASIC Revision: 0(0x0)
Cacheline Size: 64(0x40)
Max Clock Freq. (MHz): 2465
BDFID: 768
Internal Node ID: 1
Compute Unit: 36
SIMDs per CU: 2
Shader Engines: 2
Shader Arrs. per Eng.: 2
WatchPts on Addr. Ranges:4
Coherent Host Access: FALSE
Memory Properties:
Features: KERNEL_DISPATCH
Fast F16 Operation: TRUE
Wavefront Size: 32(0x20)
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Max Waves Per CU: 32(0x20)
Max Work-item Per CU: 1024(0x400)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
Max fbarriers/Workgrp: 32
Packet Processor uCode:: 118
SDMA engine uCode:: 80
IOMMU Support:: None
Pool Info:
Pool 1
Segment: GLOBAL; FLAGS: COARSE GRAINED
Size: 10469376(0x9fc000) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 2
Segment: GLOBAL; FLAGS: EXTENDED FINE GRAINED
Size: 10469376(0x9fc000) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 3
Segment: GROUP
Size: 64(0x40) KB
Allocatable: FALSE
Alloc Granule: 0KB
Alloc Recommended Granule:0KB
Alloc Alignment: 0KB
Accessible by all: FALSE
ISA Info:
ISA 1
Name: amdgcn-amd-amdhsa--gfx1031
Machine Models: HSA_MACHINE_MODEL_LARGE
Profiles: HSA_PROFILE_BASE
Default Rounding Mode: NEAR
Default Rounding Mode: NEAR
Fast f16: TRUE
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
FBarrier Max Size: 32
Agent 3
Name: gfx90c
Uuid: GPU-XX
Marketing Name: AMD Radeon Graphics
Vendor Name: AMD
Feature: KERNEL_DISPATCH
Profile: BASE_PROFILE
Float Round Mode: NEAR
Max Queue Number: 128(0x80)
Queue Min Size: 64(0x40)
Queue Max Size: 131072(0x20000)
Queue Type: MULTI
Node: 2
Device Type: GPU
Cache Info:
L1: 16(0x10) KB
L2: 1024(0x400) KB
Chip ID: 5688(0x1638)
ASIC Revision: 0(0x0)
Cacheline Size: 64(0x40)
Max Clock Freq. (MHz): 2100
BDFID: 1792
Internal Node ID: 2
Compute Unit: 8
SIMDs per CU: 4
Shader Engines: 1
Shader Arrs. per Eng.: 1
WatchPts on Addr. Ranges:4
Coherent Host Access: FALSE
Memory Properties: APU
Features: KERNEL_DISPATCH
Fast F16 Operation: TRUE
Wavefront Size: 64(0x40)
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Max Waves Per CU: 40(0x28)
Max Work-item Per CU: 2560(0xa00)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
Max fbarriers/Workgrp: 32
Packet Processor uCode:: 472
SDMA engine uCode:: 40
IOMMU Support:: None
Pool Info:
Pool 1
Segment: GLOBAL; FLAGS: COARSE GRAINED
Size: 7868264(0x780f68) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 2
Segment: GLOBAL; FLAGS: EXTENDED FINE GRAINED
Size: 7868264(0x780f68) KB
Allocatable: TRUE
Alloc Granule: 4KB
Alloc Recommended Granule:2048KB
Alloc Alignment: 4KB
Accessible by all: FALSE
Pool 3
Segment: GROUP
Size: 64(0x40) KB
Allocatable: FALSE
Alloc Granule: 0KB
Alloc Recommended Granule:0KB
Alloc Alignment: 0KB
Accessible by all: FALSE
ISA Info:
ISA 1
Name: amdgcn-amd-amdhsa--gfx90c:xnack-
Machine Models: HSA_MACHINE_MODEL_LARGE
Profiles: HSA_PROFILE_BASE
Default Rounding Mode: NEAR
Default Rounding Mode: NEAR
Fast f16: TRUE
Workgroup Max Size: 1024(0x400)
Workgroup Max Size per Dimension:
x 1024(0x400)
y 1024(0x400)
z 1024(0x400)
Grid Max Size: 4294967295(0xffffffff)
Grid Max Size per Dimension:
x 4294967295(0xffffffff)
y 4294967295(0xffffffff)
z 4294967295(0xffffffff)
FBarrier Max Size: 32
*** Done ***
Additional Information
A simple example with compile instructions to simplify writing programs using rocThrust would safe inexperienced users a lot of time.
The text was updated successfully, but these errors were encountered: