diff --git a/ockl/src/gaaf.cl b/ockl/src/gaaf.cl index e5b00b7c..cb5ed822 100644 --- a/ockl/src/gaaf.cl +++ b/ockl/src/gaaf.cl @@ -18,12 +18,12 @@ extern void __llvm_amdgcn_global_atomic_fadd_p1f32_f32(__global float *, float) __asm("llvm.amdgcn.global.atomic.fadd.p1f32.f32"); void -__ockl_global_atomic_add_f32(__global float *p, float v) +__ockl_atomic_add_noret_f32(float *p, float v) { - if (__oclc_ISA_version == 9008) { - __llvm_amdgcn_global_atomic_fadd_p1f32_f32(p, v); + if (__oclc_ISA_version == 9008 && !__ockl_is_local_addr(p) && !__ockl_is_private_addr(p)) { + __llvm_amdgcn_global_atomic_fadd_p1f32_f32((__global float *)p, v); } else { - __global atomic_uint *t = (__global atomic_uint *)p; + atomic_uint *t = (atomic_uint *)p; uint e = AL(t, memory_order_relaxed, memory_scope_device); while (!AC(t, &e, AS_UINT(v + AS_FLOAT(e)), memory_order_relaxed, memory_order_relaxed, memory_scope_device)) ; diff --git a/ockl/src/mtime.cl b/ockl/src/mtime.cl index 2b77cdb9..543aaa30 100644 --- a/ockl/src/mtime.cl +++ b/ockl/src/mtime.cl @@ -7,7 +7,7 @@ #include "ockl.h" -ulong +__attribute__((target("s-memtime-inst"))) ulong OCKL_MANGLE_U64(memtime)(void) { return __builtin_amdgcn_s_memtime();