diff --git a/CMakeLists.txt b/CMakeLists.txt index f201c4b3..a1a80f59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,7 @@ add_subdirectory(ocml) add_subdirectory(ockl) add_subdirectory(opencl) add_subdirectory(hip) +add_subdirectory(asanrtl) if(BUILD_HC_LIB) add_subdirectory(hc) diff --git a/asanrtl/CMakeLists.txt b/asanrtl/CMakeLists.txt new file mode 100644 index 00000000..35c7013e --- /dev/null +++ b/asanrtl/CMakeLists.txt @@ -0,0 +1,17 @@ +##===-------------------------------------------------------------------------- +## ROCm Device Libraries +## +## This file is distributed under the University of Illinois Open Source +## License. See LICENSE.TXT for details. +##===-------------------------------------------------------------------------- + +file(GLOB sources + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cl +) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../irif/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../oclc/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) + +opencl_bc_lib(NAME asanrtl SOURCES ${sources}) diff --git a/asanrtl/src/stubs.cl b/asanrtl/src/stubs.cl new file mode 100644 index 00000000..bce90b25 --- /dev/null +++ b/asanrtl/src/stubs.cl @@ -0,0 +1,89 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +typedef ulong uptr; + +void __asan_report_load_n(uptr addr, uptr size) {} + +void __asan_loadN(uptr addr, uptr size) {} + +void __asan_report_load1(uptr addr) {} + +void __asan_load1(uptr addr) {} + +void __asan_report_load2(uptr addr) {} + +void __asan_load2(uptr addr) {} + +void __asan_report_load4(uptr addr) {} + +void __asan_load4(uptr addr) {} + +void __asan_report_load8(uptr addr) {} + +void __asan_load8(uptr addr) {} + +void __asan_report_load16(uptr addr) {} + +void __asan_load16(uptr addr) {} + +void __asan_report_store_n(uptr addr, uptr size) {} + +void __asan_storeN(uptr addr, uptr size) {} + +void __asan_report_store1(uptr addr) {} + +void __asan_store1(uptr addr) {} + +void __asan_report_store2(uptr addr) {} + +void __asan_store2(uptr addr) {} + +void __asan_report_store4(uptr addr) {} + +void __asan_store4(uptr addr) {} + +void __asan_report_store8(uptr addr) {} + +void __asan_store8(uptr addr) {} + +void __asan_report_store16(uptr addr) {} + +void __asan_store16(uptr addr) {} + +void* __asan_memmove(void* to, void* from, uptr size) { return to; } + +void* __asan_memcpy(void* to, void* from, uptr size) { return to; } + +void* __asan_memset(void* s, int c, uptr n) { return s; } + +void __asan_handle_no_return(void) {} + +void __sanitizer_ptr_cmp(uptr a, uptr b) {} + +void __sanitizer_ptr_sub(uptr a, uptr b) {} + +void __asan_before_dynamic_init(uptr addr) {} + +void __asan_after_dynamic_init(void) {} + +void __asan_register_globals(void *start, uptr n) {} + +void __asan_unregister_globals(void *start, uptr n) {} + +void __asan_register_image_globals(uptr flag) {} + +void __asan_unregister_image_globals(uptr flag) {} + +void __asan_register_elf_globals(uptr flag, uptr start, uptr stop) {} + +void __asan_unregister_elf_globals(uptr flag, uptr start, uptr stop) {} + +void __asan_init(void) {} + +void __asan_version_mismatch_check_v8(void) {} + diff --git a/hc/src/hc_atomic.ll b/hc/src/hc_atomic.ll index 01d4fd89..1dbf7f0d 100644 --- a/hc/src/hc_atomic.ll +++ b/hc/src/hc_atomic.ll @@ -1,6 +1,6 @@ ; ModuleID = 'hc_atomic.bc' -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" ; Function Attrs: alwaysinline nounwind diff --git a/ockl/inc/ockl.h b/ockl/inc/ockl.h index 9f44d94e..27ecdfd3 100644 --- a/ockl/inc/ockl.h +++ b/ockl/inc/ockl.h @@ -450,6 +450,8 @@ extern __attribute__((const)) __global void * OCKL_MANGLE_T(to,global)(void *); extern __attribute__((const)) __local void * OCKL_MANGLE_T(to,local)(void *); extern __attribute__((const)) __private void * OCKL_MANGLE_T(to,private)(void *); +extern void OCKL_MANGLE_T(rtcwait,u32)(uint); + #pragma OPENCL EXTENSION cl_khr_fp16 : disable #endif // OCKL_H diff --git a/ockl/src/services.cl b/ockl/src/services.cl index 9d94716e..ced64478 100644 --- a/ockl/src/services.cl +++ b/ockl/src/services.cl @@ -7,10 +7,13 @@ #include "ockl.h" +// This must match the enumeration defined by the runtime in +// ROCclr/device/devhcmessages.hpp typedef enum { - SERVICE_DEFAULT, - SERVICE_FUNCTION_CALL, - SERVICE_PRINTF, + SERVICE_RESERVED = 0, + SERVICE_FUNCTION_CALL = 1, + SERVICE_PRINTF = 2, + SERVICE_FPRINTF = SERVICE_PRINTF, } service_id_t; extern long2 @@ -231,27 +234,51 @@ message_append_args(uint service_id, ulong msg_desc, uint num_args, ulong arg0, arg4, arg5, arg6); } -/*===--- PRINTF ------------------------------------------------------------*/ +/*===--- FPRINTF -----------------------------------------------------------*/ + +typedef enum { + FPRINTF_CTRL_STDOUT = 0, + FPRINTF_CTRL_STDERR = 1 +} fprintf_ctrl_t; + +static inline ulong +begin_fprintf(fprintf_ctrl_t flags) +{ + // The two standard output streams stderr and stdout are indicated + // using the lowest bits in the control qword. For now, all other + // bits are required to be zero. + const ulong msg_desc = msg_set_begin_flag(0); + ulong control = (ulong)flags; -/** \brief Begin a new printf message. - * \param version Must be zero. + long2 retval = + message_append_args(SERVICE_FPRINTF, msg_desc, + /* num_args = */ 1, control, 0, 0, 0, 0, 0, 0); + return retval.x; +} + +/** \brief Begin a new fprintf message for stdout. * \return Message descriptor for a new printf invocation. */ ulong -__ockl_printf_begin(ulong version) +__ockl_fprintf_stdout_begin() { - const ulong msg_desc = msg_set_begin_flag(0); + return begin_fprintf(FPRINTF_CTRL_STDOUT); +} - long2 retval = message_append_args(SERVICE_PRINTF, msg_desc, 1, version, 0, - 0, 0, 0, 0, 0); - return retval.x; +/** \brief Begin a new fprintf message for stderr. + * \return Message descriptor for a new printf invocation. + */ +ulong +__ockl_fprintf_stderr_begin() +{ + return begin_fprintf(FPRINTF_CTRL_STDERR); } -/** \brief Append up to seven arguments to the printf message. - * \param msg_desc Message descriptor for the current printf. - * \param num_args Number of arguments to be appended (maximum seven). +/** \brief Append up to seven arguments to the fprintf message. + * \param msg_desc Message descriptor for the current fprintf. + * \param num_args Number of arguments to be appended (maximum seven). * \param value0... The argument values to be appended. - * \param is_last If non-zero, this causes the printf to be completed. + * \param is_last If non-zero, this causes the fprintf to be completed. * \return Value depends on #is_last. * * Only the first #num_args arguments are appended to the @@ -259,36 +286,36 @@ __ockl_printf_begin(ulong version) * undefined if #num_args is greater then seven. * * If #is_last is zero, the function returns a message desciptor that - * must be used by a subsequent call to any __ockl_printf* + * must be used by a subsequent call to any __ockl_fprintf* * function. If #is_last is non-zero, the function causes the current - * printf to be completed on the host-side, and returns the value - * returned by that printf. + * fprintf to be completed on the host-side, and returns the value + * returned by that fprintf. */ ulong -__ockl_printf_append_args(ulong msg_desc, uint num_args, ulong value0, - ulong value1, ulong value2, ulong value3, - ulong value4, ulong value5, ulong value6, - uint is_last) +__ockl_fprintf_append_args(ulong msg_desc, uint num_args, ulong value0, + ulong value1, ulong value2, ulong value3, + ulong value4, ulong value5, ulong value6, + uint is_last) { if (is_last) { msg_desc = msg_set_end_flag(msg_desc); } long2 retval = - message_append_args(SERVICE_PRINTF, msg_desc, num_args, value0, value1, + message_append_args(SERVICE_FPRINTF, msg_desc, num_args, value0, value1, value2, value3, value4, value5, value6); return retval.x; } -/** \brief Append a null-terminated string to the printf message. - * \param msg_desc Message descriptor for the current printf. +/** \brief Append a null-terminated string to the fprintf message. + * \param msg_desc Message descriptor for the current fprintf. * \param data Pointer to the string. * \param length Number of bytes, including the null terminator. - * \param is_last If non-zero, this causes the printf to be completed. + * \param is_last If non-zero, this causes the fprintf to be completed. * \return Value depends on #is_last. * * The function appends a single null-terminated string to a current - * printf message, including the final null character. The host-side + * fprintf message, including the final null character. The host-side * can use the bytes as a null-terminated string in place, without * having to first copy the string and then append the null * terminator. @@ -303,14 +330,14 @@ __ockl_printf_append_args(ulong msg_desc, uint num_args, ulong value0, * transmission, the string is null-padded to a multiple of eight. * * If #is_last is zero, the function returns a message desciptor that - * must be used by a subsequent call to any __ockl_printf* + * must be used by a subsequent call to any __ockl_fprintf* * function. If #is_last is non-zero, the function causes the current - * printf to be completed on the host-side, and returns the value - * returned by that printf. + * fprintf to be completed on the host-side, and returns the value + * returned by that fprintf. */ ulong -__ockl_printf_append_string_n(ulong msg_desc, const char *data, ulong length, - uint is_last) +__ockl_fprintf_append_string_n(ulong msg_desc, const char *data, ulong length, + uint is_last) { long2 retval = {0, 0}; @@ -319,12 +346,39 @@ __ockl_printf_append_string_n(ulong msg_desc, const char *data, ulong length, } if (!data) { - retval = message_append_args(SERVICE_PRINTF, msg_desc, 1, 0, 0, 0, 0, 0, + retval = message_append_args(SERVICE_FPRINTF, msg_desc, 1, 0, 0, 0, 0, 0, 0, 0); return retval.x; } - retval = message_append_bytes(SERVICE_PRINTF, msg_desc, (const uchar *)data, + retval = message_append_bytes(SERVICE_FPRINTF, msg_desc, (const uchar *)data, length); return retval.x; } + +/*===--- PRINTF ------------------------------------------------------------*/ +/* DEPRECATED. Wrappers that should be removed eventually. */ + +ulong +__ockl_printf_begin(ulong ignored /* used to be version */) +{ + return __ockl_fprintf_stdout_begin(); +} + +ulong +__ockl_printf_append_args(ulong msg_desc, uint num_args, ulong value0, + ulong value1, ulong value2, ulong value3, + ulong value4, ulong value5, ulong value6, + uint is_last) +{ + return __ockl_fprintf_append_args(msg_desc, num_args, value0, value1, + value2, value3, value4, value5, value6, + is_last); +} + +ulong +__ockl_printf_append_string_n(ulong msg_desc, const char *data, ulong length, + uint is_last) +{ + return __ockl_fprintf_append_string_n(msg_desc, data, length, is_last); +} diff --git a/ockl/src/wait.cl b/ockl/src/wait.cl new file mode 100644 index 00000000..b2495992 --- /dev/null +++ b/ockl/src/wait.cl @@ -0,0 +1,56 @@ + +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "irif.h" +#include "ockl.h" +#include "oclc.h" + +__attribute__((target("s-memrealtime"))) void +OCKL_MANGLE_T(rtcwait,u32)(uint ticks) +{ + ulong now = __builtin_amdgcn_s_memrealtime(); + ulong end = now + __builtin_amdgcn_readfirstlane(ticks); + + if (__oclc_ISA_version >= 9000) { + while (end > now + 1625) { + __builtin_amdgcn_s_sleep(127); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 806) { + __builtin_amdgcn_s_sleep(63); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 396) { + __builtin_amdgcn_s_sleep(31); + now = __builtin_amdgcn_s_memrealtime(); + } + } + + while (end > now + 192) { + __builtin_amdgcn_s_sleep(15); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 89) { + __builtin_amdgcn_s_sleep(7); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 38) { + __builtin_amdgcn_s_sleep(3); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now) { + __builtin_amdgcn_s_sleep(1); + now = __builtin_amdgcn_s_memrealtime(); + } +} + diff --git a/ockl/src/wgscratch.ll b/ockl/src/wgscratch.ll index 76015fda..a9a3ee18 100644 --- a/ockl/src/wgscratch.ll +++ b/ockl/src/wgscratch.ll @@ -1,4 +1,4 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" ; 1024 work-items means up to 32 work groups diff --git a/ocml/src/tanhF.cl b/ocml/src/tanhF.cl index ed559fa3..ec3d9961 100644 --- a/ocml/src/tanhF.cl +++ b/ocml/src/tanhF.cl @@ -7,22 +7,39 @@ #include "mathF.h" +#if defined EXTRA_ACCURACY #define FLOAT_SPECIALIZATION #include "ep.h" extern CONSTATTR float2 MATH_PRIVATE(epexpep)(float2 x); +#endif CONSTATTR float MATH_MANGLE(tanh)(float x) { float y = BUILTIN_ABS_F32(x); + +#if defined EXTRA_ACCURACY float2 e = MATH_PRIVATE(epexpep)(con(y, 0.0f)); float2 ei = rcp(e); float2 t = fdiv(fsub(e, ei), fadd(e, ei)); float z = t.hi; - z = y > 8.6875f ? 1.0f : z; - z = y < 0x1.0p-12f ? y : z; + z = y > 9.0f ? 1.0f : z; + z = y < 0x1.0p-13f ? y : z; +#else + float z; + if (y < 0.625f) { + float y2 = y*y; + float p = MATH_MAD(y2, MATH_MAD(y2, MATH_MAD(y2, MATH_MAD(y2, + -0x1.758e7ap-8f, 0x1.521192p-6f), -0x1.b8389cp-5f), + 0x1.110704p-3f), -0x1.555532p-2f); + z = MATH_MAD(y2, y*p, y); + } else { + float t = MATH_MANGLE(exp)(2.0f * y); + z = 1.0f - MATH_FAST_DIV(2.0f, t + 1.0f); + } +#endif return BUILTIN_COPYSIGN_F32(z, x); }