Skip to content

Commit

Permalink
SWDEV-246241 Let clang atomic builtins fetch add/sub support floating…
Browse files Browse the repository at this point in the history
… point types

Differential Revision: https://reviews.llvm.org/D71726

Change-Id: I72f4be2f67893676d0709ff563817aaf65e686a4
  • Loading branch information
yxsamliu authored and searlmc1 committed Aug 12, 2020
1 parent 6c1c186 commit b98349b
Show file tree
Hide file tree
Showing 16 changed files with 251 additions and 41 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -7880,6 +7880,9 @@ def err_atomic_op_needs_non_const_pointer : Error<
def err_atomic_op_needs_trivial_copy : Error<
"address argument to atomic operation must be a pointer to a "
"trivially-copyable type (%0 invalid)">;
def err_atomic_op_needs_atomic_int_ptr_or_fp : Error<
"address argument to atomic operation must be a pointer to %select{|atomic }0"
"integer, pointer or supported floating point type (%1 invalid)">;
def err_atomic_op_needs_atomic_int_or_ptr : Error<
"address argument to atomic operation must be a pointer to %select{|atomic }0"
"integer or pointer (%1 invalid)">;
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1414,6 +1414,12 @@ class TargetInfo : public virtual TransferrableTargetInfo,
/// Whether target allows debuginfo types for decl only variables.
virtual bool allowDebugInfoForExternalVar() const { return false; }

/// Whether floating point atomic fetch add/sub is supported.
virtual bool
isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const {
return false;
}

protected:
/// Copy type and layout related info.
void copyAuxTarget(const TargetInfo *Aux);
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Basic/Targets/AArch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,18 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
int getEHDataRegisterNumber(unsigned RegNo) const override;

bool hasInt128Type() const override;


bool
isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const override {
switch (llvm::APFloat::SemanticsToEnum(FS)) {
case llvm::APFloat::S_IEEEsingle:
case llvm::APFloat::S_IEEEdouble:
return true;
default:
return false;
}
}
};

class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Basic/Targets/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,17 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
}

void setAuxTarget(const TargetInfo *Aux) override;

bool
isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const override {
switch (llvm::APFloat::SemanticsToEnum(FS)) {
case llvm::APFloat::S_IEEEsingle:
case llvm::APFloat::S_IEEEdouble:
return true;
default:
return false;
}
}
};

} // namespace targets
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Basic/Targets/ARM.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,17 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
int getEHDataRegisterNumber(unsigned RegNo) const override;

bool hasSjLjLowering() const override;


bool
isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const override {
switch (llvm::APFloat::SemanticsToEnum(FS)) {
case llvm::APFloat::S_IEEEsingle:
return true;
default:
return false;
}
}
};

class LLVM_LIBRARY_VISIBILITY ARMleTargetInfo : public ARMTargetInfo {
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Basic/Targets/Hexagon.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,17 @@ class LLVM_LIBRARY_VISIBILITY HexagonTargetInfo : public TargetInfo {
// We can write more stricter checks later.
return CPU.find('t') != std::string::npos;
}

bool
isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const override {
switch (llvm::APFloat::SemanticsToEnum(FS)) {
case llvm::APFloat::S_IEEEsingle:
case llvm::APFloat::S_IEEEdouble:
return true;
default:
return false;
}
}
};
} // namespace targets
} // namespace clang
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Basic/Targets/Mips.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,17 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo {
unsigned getUnwindWordWidth() const override;

bool validateTarget(DiagnosticsEngine &Diags) const override;

bool
isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const override {
switch (llvm::APFloat::SemanticsToEnum(FS)) {
case llvm::APFloat::S_IEEEsingle:
case llvm::APFloat::S_IEEEdouble:
return true;
default:
return false;
}
}
};
} // namespace targets
} // namespace clang
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,18 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
uint64_t getPointerAlignV(unsigned AddrSpace) const override {
return getPointerWidthV(AddrSpace);
}

bool
isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const override {
switch (llvm::APFloat::SemanticsToEnum(FS)) {
case llvm::APFloat::S_IEEEsingle:
return MaxAtomicInlineWidth >= 32;
case llvm::APFloat::S_IEEEdouble:
return MaxAtomicInlineWidth >= 64;
default:
return false;
}
}
};

// X86-32 generic target
Expand Down
43 changes: 30 additions & 13 deletions clang/lib/CodeGen/CGAtomic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,21 +594,25 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;

case AtomicExpr::AO__atomic_add_fetch:
PostOp = llvm::Instruction::Add;
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
: llvm::Instruction::Add;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
Op = llvm::AtomicRMWInst::Add;
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
: llvm::AtomicRMWInst::Add;
break;

case AtomicExpr::AO__atomic_sub_fetch:
PostOp = llvm::Instruction::Sub;
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
: llvm::Instruction::Sub;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
Op = llvm::AtomicRMWInst::Sub;
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
: llvm::AtomicRMWInst::Sub;
break;

case AtomicExpr::AO__atomic_min_fetch:
Expand Down Expand Up @@ -806,6 +810,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits;
bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0;
bool UseLibcall = Misaligned | Oversized;
bool ShouldCastToIntPtrTy = true;


if (UseLibcall) {
CGM.getDiags().Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
Expand Down Expand Up @@ -875,11 +881,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
break;
}
LLVM_FALLTHROUGH;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_sub:
case AtomicExpr::AO__atomic_add_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
ShouldCastToIntPtrTy = !MemTy->isFloatingType();
LLVM_FALLTHROUGH;

case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_store:
Expand Down Expand Up @@ -920,15 +929,23 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
AtomicInfo Atomics(*this, AtomicVal);

Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1);
if (Val2.isValid()) Val2 = Atomics.convertToAtomicIntPointer(Val2);
if (Dest.isValid())
Dest = Atomics.emitCastToAtomicIntPointer(Dest);
else if (E->isCmpXChg())
if (ShouldCastToIntPtrTy) {
Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
if (Val1.isValid())
Val1 = Atomics.convertToAtomicIntPointer(Val1);
if (Val2.isValid())
Val2 = Atomics.convertToAtomicIntPointer(Val2);
}
if (Dest.isValid()) {
if (ShouldCastToIntPtrTy)
Dest = Atomics.emitCastToAtomicIntPointer(Dest);
} else if (E->isCmpXChg())
Dest = CreateMemTemp(RValTy, "cmpxchg.bool");
else if (!RValTy->isVoidType())
Dest = Atomics.emitCastToAtomicIntPointer(Atomics.CreateTempAlloca());
else if (!RValTy->isVoidType()) {
Dest = Atomics.CreateTempAlloca();
if (ShouldCastToIntPtrTy)
Dest = Atomics.emitCastToAtomicIntPointer(Dest);
}

// Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
if (UseLibcall) {
Expand Down
11 changes: 7 additions & 4 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4366,9 +4366,11 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
// For an arithmetic operation, the implied arithmetic must be well-formed.
if (Form == Arithmetic) {
// gcc does not enforce these rules for GNU atomics, but we do so for sanity.
if (IsAddSub && !ValType->isIntegerType()
&& !ValType->isPointerType()) {
Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_or_ptr)
if (IsAddSub && !ValType->isIntegerType() && !ValType->isPointerType() &&
(!ValType->isFloatingType() ||
!Context.getTargetInfo().isFPAtomicFetchAddSubSupported(
Context.getFloatTypeSemantics(ValType)))) {
Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_ptr_or_fp)
<< IsC11 << Ptr->getType() << Ptr->getSourceRange();
return ExprError();
}
Expand Down Expand Up @@ -4495,7 +4497,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
// passed by address. For the rest, GNU uses by-address and C11 uses
// by-value.
assert(Form != Load);
if (Form == Init || (Form == Arithmetic && ValType->isIntegerType()))
if (Form == Init || (Form == Arithmetic && ValType->isIntegerType()) ||
(IsAddSub && ValType->isFloatingType()))
Ty = ValType;
else if (Form == Copy || Form == Xchg) {
if (IsPassedByAddress) {
Expand Down
44 changes: 44 additions & 0 deletions clang/test/CodeGen/fp-atomic-ops.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// RUN: %clang_cc1 %s -emit-llvm -DDOUBLE -O0 -o - -triple=amdgcn-amd-amdhsa \
// RUN: | opt -instnamer -S | FileCheck -check-prefixes=FLOAT,DOUBLE %s

// RUN: %clang_cc1 %s -emit-llvm -DDOUBLE -O0 -o - -triple=aarch64-linux-gnu \
// RUN: | opt -instnamer -S | FileCheck -check-prefixes=FLOAT,DOUBLE %s

// RUN: %clang_cc1 %s -emit-llvm -O0 -o - -triple=armv8-apple-ios7.0 \
// RUN: | opt -instnamer -S | FileCheck -check-prefixes=FLOAT %s

// RUN: %clang_cc1 %s -emit-llvm -DDOUBLE -O0 -o - -triple=hexagon \
// RUN: | opt -instnamer -S | FileCheck -check-prefixes=FLOAT,DOUBLE %s

// RUN: %clang_cc1 %s -emit-llvm -DDOUBLE -O0 -o - -triple=mips64-mti-linux-gnu \
// RUN: | opt -instnamer -S | FileCheck -check-prefixes=FLOAT,DOUBLE %s

// RUN: %clang_cc1 %s -emit-llvm -O0 -o - -triple=i686-linux-gnu \
// RUN: | opt -instnamer -S | FileCheck -check-prefixes=FLOAT %s

// RUN: %clang_cc1 %s -emit-llvm -DDOUBLE -O0 -o - -triple=x86_64-linux-gnu \
// RUN: | opt -instnamer -S | FileCheck -check-prefixes=FLOAT,DOUBLE %s

typedef enum memory_order {
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;

void test(float *f, float ff, double *d, double dd) {
// FLOAT: atomicrmw fadd float* {{.*}} monotonic
__atomic_fetch_add(f, ff, memory_order_relaxed);

// FLOAT: atomicrmw fsub float* {{.*}} monotonic
__atomic_fetch_sub(f, ff, memory_order_relaxed);

#ifdef DOUBLE
// DOUBLE: atomicrmw fadd double* {{.*}} monotonic
__atomic_fetch_add(d, dd, memory_order_relaxed);

// DOUBLE: atomicrmw fsub double* {{.*}} monotonic
__atomic_fetch_sub(d, dd, memory_order_relaxed);
#endif
}
27 changes: 27 additions & 0 deletions clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
// RUN: -fnative-half-arguments-and-returns | FileCheck %s

// REQUIRES: amdgpu-registered-target

#include "Inputs/cuda.h"
#include <stdatomic.h>

__device__ float ffp1(float *p) {
// CHECK-LABEL: @_Z4ffp1Pf
// CHECK: atomicrmw fadd float* {{.*}} monotonic
return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
}

__device__ double ffp2(double *p) {
// CHECK-LABEL: @_Z4ffp2Pd
// CHECK: atomicrmw fsub double* {{.*}} monotonic
return __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
}

// long double is the same as double for amdgcn.
__device__ long double ffp3(long double *p) {
// CHECK-LABEL: @_Z4ffp3Pe
// CHECK: atomicrmw fsub double* {{.*}} monotonic
return __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
}
23 changes: 20 additions & 3 deletions clang/test/CodeGenOpenCL/atomic-ops.cl
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \
// RUN: | opt -instnamer -S | FileCheck %s

// Also test serialization of atomic operations here, to avoid duplicating the test.
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl
// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa
// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa \
// RUN: -emit-llvm -o - | opt -instnamer -S | FileCheck %s

#ifndef ALREADY_INCLUDED
#define ALREADY_INCLUDED

#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable

typedef __INTPTR_TYPE__ intptr_t;
typedef int int8 __attribute__((ext_vector_type(8)));

Expand Down Expand Up @@ -185,6 +190,18 @@ float ff3(atomic_float *d) {
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}

float ff4(global atomic_float *d, float a) {
// CHECK-LABEL: @ff4
// CHECK: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}

float ff5(global atomic_double *d, double a) {
// CHECK-LABEL: @ff5
// CHECK: atomicrmw fadd double addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}

// CHECK-LABEL: @atomic_init_foo
void atomic_init_foo()
{
Expand Down
Loading

0 comments on commit b98349b

Please sign in to comment.