diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 97852523033ddeb..155ba8c6184fa3f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -451,7 +451,8 @@ def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts", "HasAtomicFaddInsts", "true", "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, " - "global_atomic_pk_add_f16 instructions" + "global_atomic_pk_add_f16 instructions", + [FeatureFlatGlobalInsts] >; def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support", diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 293202a1be6ebad..20b1f157a3b6309 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -175,7 +175,7 @@ class FLAT_Store_Pseudo { - let is_flat_global = 1 in { + let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { def "" : FLAT_Load_Pseudo, GlobalSaddrTable<0, opName>; def _SADDR : FLAT_Load_Pseudo, @@ -184,7 +184,7 @@ multiclass FLAT_Global_Load_Pseudo { - let is_flat_global = 1 in { + let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { def "" : FLAT_Store_Pseudo, GlobalSaddrTable<0, opName>; def _SADDR : FLAT_Store_Pseudo, @@ -369,10 +369,12 @@ multiclass FLAT_Global_Atomic_Pseudo< SDPatternOperator atomic_rtn = null_frag, SDPatternOperator atomic_no_rtn = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> : - FLAT_Global_Atomic_Pseudo_NO_RTN, - FLAT_Global_Atomic_Pseudo_RTN; - + RegisterClass data_rc = vdst_rc> { + let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { + defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN; + defm "" : FLAT_Global_Atomic_Pseudo_RTN; + } +} //===----------------------------------------------------------------------===// // Flat Instructions @@ -509,7 +511,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", } // End SubtargetPredicate = isGFX7GFX10 -let SubtargetPredicate = HasFlatGlobalInsts in { defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; @@ -619,7 +620,6 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64>; } // End is_flat_global = 1 -} // End SubtargetPredicate = HasFlatGlobalInsts let SubtargetPredicate = HasFlatScratchInsts in { diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll index b91536eadec1269..315180dff5fac6d 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -27,3 +27,19 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(float addrspace(1)* %ptr %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst ret void } + +; Make sure this artificially selects with an incorrect subtarget, but the feature set. +; GCN-LABEL: {{^}}global_atomic_fadd_ret_f32_wrong_subtarget: +define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 { + %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst + store float %result, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}global_atomic_fadd_noret_f32_wrong_subtarget: +define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 { + %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst + ret void +} + +attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll index eb59c691ef67942..693b09dd0c7b768 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll @@ -70,3 +70,14 @@ main_body: call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) ret void } + +; Make sure this artificially selects with an incorrect subtarget, but +; the feature set. +; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget: +; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off +define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 { + call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %ptr, float %data) + ret void +} + +attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }