Skip to content

Commit

Permalink
Cherry-pick to mainline for SWDEV-228246
Browse files Browse the repository at this point in the history
AMDGPU: Fix overriding global FP atomic feature predicates

Global TableGen let override blocks are pretty dangerous and override
any local special cases. In this case, the broader HasFlatGlobalInsts
was overriding the more specific predicate for
FeatureAtomicFaddInsts. Make sure HasFlatGlobalInsts is implied by
FeatureAtomicFaddInsts, and make sure the right predicate is used.

One issue with independently setting the subtarget features on
incompatible targets is all of the encoding families do not define all
opcodes. This will hit an assert on gfx10 for example, since we set
the encoding independently based on the generation and not based on a
feature.

Change-Id: Ib23d5fe2e230469d406d4c92617637a1864605d4
  • Loading branch information
arsenm authored and searlmc1 committed Jun 5, 2020
1 parent fd74445 commit 0383ad1
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 9 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,8 @@ def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
"HasAtomicFaddInsts",
"true",
"Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
"global_atomic_pk_add_f16 instructions"
"global_atomic_pk_add_f16 instructions",
[FeatureFlatGlobalInsts]
>;

def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
Expand Down
16 changes: 8 additions & 8 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
}

multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
let is_flat_global = 1 in {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
Expand All @@ -184,7 +184,7 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}

multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_global = 1 in {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
Expand Down Expand Up @@ -369,10 +369,12 @@ multiclass FLAT_Global_Atomic_Pseudo<
SDPatternOperator atomic_rtn = null_frag,
SDPatternOperator atomic_no_rtn = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> :
FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>,
FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;

RegisterClass data_rc = vdst_rc> {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
}
}

//===----------------------------------------------------------------------===//
// Flat Instructions
Expand Down Expand Up @@ -509,7 +511,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",

} // End SubtargetPredicate = isGFX7GFX10

let SubtargetPredicate = HasFlatGlobalInsts in {
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
Expand Down Expand Up @@ -619,7 +620,6 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
VReg_64, i64, atomic_dec_global_64>;
} // End is_flat_global = 1

} // End SubtargetPredicate = HasFlatGlobalInsts


let SubtargetPredicate = HasFlatScratchInsts in {
Expand Down
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,19 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(float addrspace(1)* %ptr
%result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst
ret void
}

; Make sure this artificially selects with an incorrect subtarget, but the feature set.
; GCN-LABEL: {{^}}global_atomic_fadd_ret_f32_wrong_subtarget:
define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 {
%result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst
store float %result, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}global_atomic_fadd_noret_f32_wrong_subtarget:
define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 {
%result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst
ret void
}

attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,14 @@ main_body:
call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
ret void
}

; Make sure this artificially selects with an incorrect subtarget, but
; the feature set.
; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off
define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 {
call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %ptr, float %data)
ret void
}

attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }

0 comments on commit 0383ad1

Please sign in to comment.