From 43d4ccb25559ad476cbafca8fbb95dba0d6e03ba Mon Sep 17 00:00:00 2001 From: Aakanksha Patil Date: Mon, 9 Nov 2020 17:08:27 -0500 Subject: [PATCH 1/8] Add new versions (gfx1033) Change-Id: I578e3736fe3c56bbc423f5298fc3b7ae7c1f5c22 --- oclc/src/isa_version_1033.cl | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 oclc/src/isa_version_1033.cl diff --git a/oclc/src/isa_version_1033.cl b/oclc/src/isa_version_1033.cl new file mode 100644 index 0000000..9a07a2c --- /dev/null +++ b/oclc/src/isa_version_1033.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 10303; + From 4cf575f94a6ccecbe00019eff055e50470af565b Mon Sep 17 00:00:00 2001 From: Tony Date: Tue, 24 Nov 2020 04:30:35 +0000 Subject: [PATCH 2/8] [AMDGPU] Add missing targets - gfx600 - gfx601 - gfx602 - gfx703 - gfx704 - gfx705 - gfx805 - gfx909 - gfx90c Change-Id: If4d2b3aee3a10a28db33387afa323b5fd1ba29ec --- oclc/src/isa_version_600.cl | 11 +++++++++++ oclc/src/isa_version_601.cl | 11 +++++++++++ oclc/src/isa_version_602.cl | 11 +++++++++++ oclc/src/isa_version_703.cl | 11 +++++++++++ oclc/src/isa_version_704.cl | 11 +++++++++++ oclc/src/isa_version_705.cl | 11 +++++++++++ oclc/src/isa_version_805.cl | 11 +++++++++++ oclc/src/isa_version_909.cl | 10 ++++++++++ oclc/src/isa_version_90c.cl | 10 ++++++++++ 9 files changed, 97 insertions(+) create mode 100644 oclc/src/isa_version_600.cl create mode 100644 oclc/src/isa_version_601.cl create mode 100644 oclc/src/isa_version_602.cl create mode 100644 oclc/src/isa_version_703.cl create mode 100644 oclc/src/isa_version_704.cl create mode 100644 oclc/src/isa_version_705.cl create mode 100644 oclc/src/isa_version_805.cl create mode 100644 oclc/src/isa_version_909.cl create mode 100644 oclc/src/isa_version_90c.cl diff --git a/oclc/src/isa_version_600.cl b/oclc/src/isa_version_600.cl new file mode 100644 index 0000000..f22f1aa --- /dev/null +++ b/oclc/src/isa_version_600.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 6000; + diff --git a/oclc/src/isa_version_601.cl b/oclc/src/isa_version_601.cl new file mode 100644 index 0000000..6e3f623 --- /dev/null +++ b/oclc/src/isa_version_601.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 6001; + diff --git a/oclc/src/isa_version_602.cl b/oclc/src/isa_version_602.cl new file mode 100644 index 0000000..c657477 --- /dev/null +++ b/oclc/src/isa_version_602.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 6002; + diff --git a/oclc/src/isa_version_703.cl b/oclc/src/isa_version_703.cl new file mode 100644 index 0000000..a4b4d78 --- /dev/null +++ b/oclc/src/isa_version_703.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 7003; + diff --git a/oclc/src/isa_version_704.cl b/oclc/src/isa_version_704.cl new file mode 100644 index 0000000..fd437e6 --- /dev/null +++ b/oclc/src/isa_version_704.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 7004; + diff --git a/oclc/src/isa_version_705.cl b/oclc/src/isa_version_705.cl new file mode 100644 index 0000000..78b9ef7 --- /dev/null +++ b/oclc/src/isa_version_705.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 7005; + diff --git a/oclc/src/isa_version_805.cl b/oclc/src/isa_version_805.cl new file mode 100644 index 0000000..18e8084 --- /dev/null +++ b/oclc/src/isa_version_805.cl @@ -0,0 +1,11 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 8005; + diff --git a/oclc/src/isa_version_909.cl b/oclc/src/isa_version_909.cl new file mode 100644 index 0000000..4503d37 --- /dev/null +++ b/oclc/src/isa_version_909.cl @@ -0,0 +1,10 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 9009; diff --git a/oclc/src/isa_version_90c.cl b/oclc/src/isa_version_90c.cl new file mode 100644 index 0000000..935753a --- /dev/null +++ b/oclc/src/isa_version_90c.cl @@ -0,0 +1,10 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" + +const __constant int __oclc_ISA_version = 9012; From d108d508d67887613ae2734ac9a36697f8afab75 Mon Sep 17 00:00:00 2001 From: Manoj S K Date: Fri, 11 Dec 2020 09:57:27 -0800 Subject: [PATCH 3/8] Changes for package naming Details are part of jira: SWDEV-257566 Signed-off-by: Manoj S K Change-Id: Ia954fa94ed5e48391d7d3ce3e4f9fcf7c6b785ad --- CMakeLists.txt | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ecbdc6a..f201c4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,10 +96,39 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators." ) + ## ROCM version updates as per naming convention + set ( ROCM_VERSION_FOR_PACKAGE "99999" ) + if( DEFINED ENV{ROCM_LIBPATCH_VERSION} ) + set ( ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION} ) + endif() ## Debian package values - set ( CPACK_DEBIAN_PACKAGE_MAINTAINER "AMD" ) + set ( CPACK_DEBIAN_PACKAGE_MAINTAINER "ROCm Compiler Support " ) set ( CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm" CACHE PATH "CPACK packaging path prefix default" ) - set ( CPACK_DEBIAN_PACKAGE_DEPENDS "" ) + set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" ) + if( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} ) + set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} ) + endif() + ## RPM package variables + set ( CPACK_RPM_PACKAGE_RELEASE "local" ) + if( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} ) + set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} ) + endif() + ## get distro for RPM package using dist + message("device-libs CPACK_RPM_PACKAGE_RELEASE now is ${CPACK_RPM_PACKAGE_RELEASE}") + + execute_process( COMMAND rpm --eval %{?dist} + RESULT_VARIABLE _result_var + OUTPUT_VARIABLE _output_var + OUTPUT_STRIP_TRAILING_WHITESPACE ) + if( _result_var EQUAL "0" AND NOT _output_var STREQUAL "" ) + string (APPEND CPACK_RPM_PACKAGE_RELEASE ${_output_var}) + endif() + # set package name as per standard + set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}" ) + + set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" ) + set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" ) + set ( CPACK_DEBIAN_PACKAGE_DEPENDS "" ) include( CPack ) endif() From e778e213fc0e8aba156eb66ae84b63bee3b10d7b Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Thu, 17 Dec 2020 11:04:15 +0530 Subject: [PATCH 4/8] Replace printf with a limited version of fprintf For now, only two built-in streams are supported: stdout and stderr. The stream is specified by choosing the appropriate begin function: - ockl_fprintf_stdout_begin - ockl_fprintf_stderr_begin The first ulong field in the fprintf message is the "control qword". The identity of the standard stream is encoded as a bit in this qword. Future enhancements will extend the message to specify a 64-bit FILE* argument after the control qword. Change-Id: Idc4f976b0fecbdb3f44b1d65c6820ba95bdfc139 --- ockl/src/services.cl | 122 +++++++++++++++++++++++++++++++------------ 1 file changed, 88 insertions(+), 34 deletions(-) diff --git a/ockl/src/services.cl b/ockl/src/services.cl index 9d94716..ced6447 100644 --- a/ockl/src/services.cl +++ b/ockl/src/services.cl @@ -7,10 +7,13 @@ #include "ockl.h" +// This must match the enumeration defined by the runtime in +// ROCclr/device/devhcmessages.hpp typedef enum { - SERVICE_DEFAULT, - SERVICE_FUNCTION_CALL, - SERVICE_PRINTF, + SERVICE_RESERVED = 0, + SERVICE_FUNCTION_CALL = 1, + SERVICE_PRINTF = 2, + SERVICE_FPRINTF = SERVICE_PRINTF, } service_id_t; extern long2 @@ -231,27 +234,51 @@ message_append_args(uint service_id, ulong msg_desc, uint num_args, ulong arg0, arg4, arg5, arg6); } -/*===--- PRINTF ------------------------------------------------------------*/ +/*===--- FPRINTF -----------------------------------------------------------*/ + +typedef enum { + FPRINTF_CTRL_STDOUT = 0, + FPRINTF_CTRL_STDERR = 1 +} fprintf_ctrl_t; + +static inline ulong +begin_fprintf(fprintf_ctrl_t flags) +{ + // The two standard output streams stderr and stdout are indicated + // using the lowest bits in the control qword. For now, all other + // bits are required to be zero. + const ulong msg_desc = msg_set_begin_flag(0); + ulong control = (ulong)flags; -/** \brief Begin a new printf message. - * \param version Must be zero. + long2 retval = + message_append_args(SERVICE_FPRINTF, msg_desc, + /* num_args = */ 1, control, 0, 0, 0, 0, 0, 0); + return retval.x; +} + +/** \brief Begin a new fprintf message for stdout. * \return Message descriptor for a new printf invocation. */ ulong -__ockl_printf_begin(ulong version) +__ockl_fprintf_stdout_begin() { - const ulong msg_desc = msg_set_begin_flag(0); + return begin_fprintf(FPRINTF_CTRL_STDOUT); +} - long2 retval = message_append_args(SERVICE_PRINTF, msg_desc, 1, version, 0, - 0, 0, 0, 0, 0); - return retval.x; +/** \brief Begin a new fprintf message for stderr. + * \return Message descriptor for a new printf invocation. + */ +ulong +__ockl_fprintf_stderr_begin() +{ + return begin_fprintf(FPRINTF_CTRL_STDERR); } -/** \brief Append up to seven arguments to the printf message. - * \param msg_desc Message descriptor for the current printf. - * \param num_args Number of arguments to be appended (maximum seven). +/** \brief Append up to seven arguments to the fprintf message. + * \param msg_desc Message descriptor for the current fprintf. + * \param num_args Number of arguments to be appended (maximum seven). * \param value0... The argument values to be appended. - * \param is_last If non-zero, this causes the printf to be completed. + * \param is_last If non-zero, this causes the fprintf to be completed. * \return Value depends on #is_last. * * Only the first #num_args arguments are appended to the @@ -259,36 +286,36 @@ __ockl_printf_begin(ulong version) * undefined if #num_args is greater then seven. * * If #is_last is zero, the function returns a message desciptor that - * must be used by a subsequent call to any __ockl_printf* + * must be used by a subsequent call to any __ockl_fprintf* * function. If #is_last is non-zero, the function causes the current - * printf to be completed on the host-side, and returns the value - * returned by that printf. + * fprintf to be completed on the host-side, and returns the value + * returned by that fprintf. */ ulong -__ockl_printf_append_args(ulong msg_desc, uint num_args, ulong value0, - ulong value1, ulong value2, ulong value3, - ulong value4, ulong value5, ulong value6, - uint is_last) +__ockl_fprintf_append_args(ulong msg_desc, uint num_args, ulong value0, + ulong value1, ulong value2, ulong value3, + ulong value4, ulong value5, ulong value6, + uint is_last) { if (is_last) { msg_desc = msg_set_end_flag(msg_desc); } long2 retval = - message_append_args(SERVICE_PRINTF, msg_desc, num_args, value0, value1, + message_append_args(SERVICE_FPRINTF, msg_desc, num_args, value0, value1, value2, value3, value4, value5, value6); return retval.x; } -/** \brief Append a null-terminated string to the printf message. - * \param msg_desc Message descriptor for the current printf. +/** \brief Append a null-terminated string to the fprintf message. + * \param msg_desc Message descriptor for the current fprintf. * \param data Pointer to the string. * \param length Number of bytes, including the null terminator. - * \param is_last If non-zero, this causes the printf to be completed. + * \param is_last If non-zero, this causes the fprintf to be completed. * \return Value depends on #is_last. * * The function appends a single null-terminated string to a current - * printf message, including the final null character. The host-side + * fprintf message, including the final null character. The host-side * can use the bytes as a null-terminated string in place, without * having to first copy the string and then append the null * terminator. @@ -303,14 +330,14 @@ __ockl_printf_append_args(ulong msg_desc, uint num_args, ulong value0, * transmission, the string is null-padded to a multiple of eight. * * If #is_last is zero, the function returns a message desciptor that - * must be used by a subsequent call to any __ockl_printf* + * must be used by a subsequent call to any __ockl_fprintf* * function. If #is_last is non-zero, the function causes the current - * printf to be completed on the host-side, and returns the value - * returned by that printf. + * fprintf to be completed on the host-side, and returns the value + * returned by that fprintf. */ ulong -__ockl_printf_append_string_n(ulong msg_desc, const char *data, ulong length, - uint is_last) +__ockl_fprintf_append_string_n(ulong msg_desc, const char *data, ulong length, + uint is_last) { long2 retval = {0, 0}; @@ -319,12 +346,39 @@ __ockl_printf_append_string_n(ulong msg_desc, const char *data, ulong length, } if (!data) { - retval = message_append_args(SERVICE_PRINTF, msg_desc, 1, 0, 0, 0, 0, 0, + retval = message_append_args(SERVICE_FPRINTF, msg_desc, 1, 0, 0, 0, 0, 0, 0, 0); return retval.x; } - retval = message_append_bytes(SERVICE_PRINTF, msg_desc, (const uchar *)data, + retval = message_append_bytes(SERVICE_FPRINTF, msg_desc, (const uchar *)data, length); return retval.x; } + +/*===--- PRINTF ------------------------------------------------------------*/ +/* DEPRECATED. Wrappers that should be removed eventually. */ + +ulong +__ockl_printf_begin(ulong ignored /* used to be version */) +{ + return __ockl_fprintf_stdout_begin(); +} + +ulong +__ockl_printf_append_args(ulong msg_desc, uint num_args, ulong value0, + ulong value1, ulong value2, ulong value3, + ulong value4, ulong value5, ulong value6, + uint is_last) +{ + return __ockl_fprintf_append_args(msg_desc, num_args, value0, value1, + value2, value3, value4, value5, value6, + is_last); +} + +ulong +__ockl_printf_append_string_n(ulong msg_desc, const char *data, ulong length, + uint is_last) +{ + return __ockl_fprintf_append_string_n(msg_desc, data, length, is_last); +} From ad4e74ccb16dbb3c6871bd97c3c7667002d54edd Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Wed, 10 Feb 2021 12:28:05 -0800 Subject: [PATCH 5/8] Add stub implementation of new library Change-Id: I498b0e954ba341c9ab749f2d65975a5e8e364239 --- CMakeLists.txt | 1 + asanrtl/CMakeLists.txt | 17 ++++++++ asanrtl/src/stubs.cl | 89 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 asanrtl/CMakeLists.txt create mode 100644 asanrtl/src/stubs.cl diff --git a/CMakeLists.txt b/CMakeLists.txt index f201c4b..a1a80f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,7 @@ add_subdirectory(ocml) add_subdirectory(ockl) add_subdirectory(opencl) add_subdirectory(hip) +add_subdirectory(asanrtl) if(BUILD_HC_LIB) add_subdirectory(hc) diff --git a/asanrtl/CMakeLists.txt b/asanrtl/CMakeLists.txt new file mode 100644 index 0000000..35c7013 --- /dev/null +++ b/asanrtl/CMakeLists.txt @@ -0,0 +1,17 @@ +##===-------------------------------------------------------------------------- +## ROCm Device Libraries +## +## This file is distributed under the University of Illinois Open Source +## License. See LICENSE.TXT for details. +##===-------------------------------------------------------------------------- + +file(GLOB sources + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cl +) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../irif/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../oclc/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) + +opencl_bc_lib(NAME asanrtl SOURCES ${sources}) diff --git a/asanrtl/src/stubs.cl b/asanrtl/src/stubs.cl new file mode 100644 index 0000000..bce90b2 --- /dev/null +++ b/asanrtl/src/stubs.cl @@ -0,0 +1,89 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +typedef ulong uptr; + +void __asan_report_load_n(uptr addr, uptr size) {} + +void __asan_loadN(uptr addr, uptr size) {} + +void __asan_report_load1(uptr addr) {} + +void __asan_load1(uptr addr) {} + +void __asan_report_load2(uptr addr) {} + +void __asan_load2(uptr addr) {} + +void __asan_report_load4(uptr addr) {} + +void __asan_load4(uptr addr) {} + +void __asan_report_load8(uptr addr) {} + +void __asan_load8(uptr addr) {} + +void __asan_report_load16(uptr addr) {} + +void __asan_load16(uptr addr) {} + +void __asan_report_store_n(uptr addr, uptr size) {} + +void __asan_storeN(uptr addr, uptr size) {} + +void __asan_report_store1(uptr addr) {} + +void __asan_store1(uptr addr) {} + +void __asan_report_store2(uptr addr) {} + +void __asan_store2(uptr addr) {} + +void __asan_report_store4(uptr addr) {} + +void __asan_store4(uptr addr) {} + +void __asan_report_store8(uptr addr) {} + +void __asan_store8(uptr addr) {} + +void __asan_report_store16(uptr addr) {} + +void __asan_store16(uptr addr) {} + +void* __asan_memmove(void* to, void* from, uptr size) { return to; } + +void* __asan_memcpy(void* to, void* from, uptr size) { return to; } + +void* __asan_memset(void* s, int c, uptr n) { return s; } + +void __asan_handle_no_return(void) {} + +void __sanitizer_ptr_cmp(uptr a, uptr b) {} + +void __sanitizer_ptr_sub(uptr a, uptr b) {} + +void __asan_before_dynamic_init(uptr addr) {} + +void __asan_after_dynamic_init(void) {} + +void __asan_register_globals(void *start, uptr n) {} + +void __asan_unregister_globals(void *start, uptr n) {} + +void __asan_register_image_globals(uptr flag) {} + +void __asan_unregister_image_globals(uptr flag) {} + +void __asan_register_elf_globals(uptr flag, uptr start, uptr stop) {} + +void __asan_unregister_elf_globals(uptr flag, uptr start, uptr stop) {} + +void __asan_init(void) {} + +void __asan_version_mismatch_check_v8(void) {} + From a61829e29bf1788556200fc1b1228296515fd93a Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Thu, 11 Feb 2021 08:41:31 -0800 Subject: [PATCH 6/8] Reduce tanh accuracy to improve performance Change-Id: Idc0e1ddad8e29b8c2de75463d0da31386fc740f1 --- ocml/src/tanhF.cl | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ocml/src/tanhF.cl b/ocml/src/tanhF.cl index ed559fa..ec3d996 100644 --- a/ocml/src/tanhF.cl +++ b/ocml/src/tanhF.cl @@ -7,22 +7,39 @@ #include "mathF.h" +#if defined EXTRA_ACCURACY #define FLOAT_SPECIALIZATION #include "ep.h" extern CONSTATTR float2 MATH_PRIVATE(epexpep)(float2 x); +#endif CONSTATTR float MATH_MANGLE(tanh)(float x) { float y = BUILTIN_ABS_F32(x); + +#if defined EXTRA_ACCURACY float2 e = MATH_PRIVATE(epexpep)(con(y, 0.0f)); float2 ei = rcp(e); float2 t = fdiv(fsub(e, ei), fadd(e, ei)); float z = t.hi; - z = y > 8.6875f ? 1.0f : z; - z = y < 0x1.0p-12f ? y : z; + z = y > 9.0f ? 1.0f : z; + z = y < 0x1.0p-13f ? y : z; +#else + float z; + if (y < 0.625f) { + float y2 = y*y; + float p = MATH_MAD(y2, MATH_MAD(y2, MATH_MAD(y2, MATH_MAD(y2, + -0x1.758e7ap-8f, 0x1.521192p-6f), -0x1.b8389cp-5f), + 0x1.110704p-3f), -0x1.555532p-2f); + z = MATH_MAD(y2, y*p, y); + } else { + float t = MATH_MANGLE(exp)(2.0f * y); + z = 1.0f - MATH_FAST_DIV(2.0f, t + 1.0f); + } +#endif return BUILTIN_COPYSIGN_F32(z, x); } From 9678ef758569e1a5ae5a9a5c1617e5ec1cecf3d2 Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Fri, 12 Feb 2021 15:03:45 -0800 Subject: [PATCH 7/8] Quiet data layout warnings Change-Id: I4cfb48f40114a08ecfcbcee5325b54ad1806e7db --- hc/src/hc_atomic.ll | 2 +- ockl/src/wgscratch.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hc/src/hc_atomic.ll b/hc/src/hc_atomic.ll index 01d4fd8..1dbf7f0 100644 --- a/hc/src/hc_atomic.ll +++ b/hc/src/hc_atomic.ll @@ -1,6 +1,6 @@ ; ModuleID = 'hc_atomic.bc' -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" ; Function Attrs: alwaysinline nounwind diff --git a/ockl/src/wgscratch.ll b/ockl/src/wgscratch.ll index 76015fd..a9a3ee1 100644 --- a/ockl/src/wgscratch.ll +++ b/ockl/src/wgscratch.ll @@ -1,4 +1,4 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" ; 1024 work-items means up to 32 work groups From d17fc7b5dbabd7be60083adf8baf0f1293100f1f Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Mon, 15 Feb 2021 10:13:48 -0800 Subject: [PATCH 8/8] Add wait function Change-Id: I96f306126e3496eb2300abc579e07c0e45cbde79 --- ockl/inc/ockl.h | 2 ++ ockl/src/wait.cl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 ockl/src/wait.cl diff --git a/ockl/inc/ockl.h b/ockl/inc/ockl.h index 9f44d94..27ecdfd 100644 --- a/ockl/inc/ockl.h +++ b/ockl/inc/ockl.h @@ -450,6 +450,8 @@ extern __attribute__((const)) __global void * OCKL_MANGLE_T(to,global)(void *); extern __attribute__((const)) __local void * OCKL_MANGLE_T(to,local)(void *); extern __attribute__((const)) __private void * OCKL_MANGLE_T(to,private)(void *); +extern void OCKL_MANGLE_T(rtcwait,u32)(uint); + #pragma OPENCL EXTENSION cl_khr_fp16 : disable #endif // OCKL_H diff --git a/ockl/src/wait.cl b/ockl/src/wait.cl new file mode 100644 index 0000000..b249599 --- /dev/null +++ b/ockl/src/wait.cl @@ -0,0 +1,56 @@ + +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "irif.h" +#include "ockl.h" +#include "oclc.h" + +__attribute__((target("s-memrealtime"))) void +OCKL_MANGLE_T(rtcwait,u32)(uint ticks) +{ + ulong now = __builtin_amdgcn_s_memrealtime(); + ulong end = now + __builtin_amdgcn_readfirstlane(ticks); + + if (__oclc_ISA_version >= 9000) { + while (end > now + 1625) { + __builtin_amdgcn_s_sleep(127); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 806) { + __builtin_amdgcn_s_sleep(63); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 396) { + __builtin_amdgcn_s_sleep(31); + now = __builtin_amdgcn_s_memrealtime(); + } + } + + while (end > now + 192) { + __builtin_amdgcn_s_sleep(15); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 89) { + __builtin_amdgcn_s_sleep(7); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now + 38) { + __builtin_amdgcn_s_sleep(3); + now = __builtin_amdgcn_s_memrealtime(); + } + + while (end > now) { + __builtin_amdgcn_s_sleep(1); + now = __builtin_amdgcn_s_memrealtime(); + } +} +