Skip to content

Commit

Permalink
[AMDGPU] Don't form sext/abs/neg fp8 cvt (llvm#83843)
Browse files Browse the repository at this point in the history
gfx940 does not allow abs/sext/neg on v_cvt_fp8/bf8 & pk variants.

Fixes SWDEV-447468

Change-Id: I818c4e029b04728bbf0fe15c5fff96c3727a7e97
  • Loading branch information
Pierre-vh authored and yanyao-wang committed Apr 17, 2024
1 parent 95f9a87 commit 122eb3d
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 0 deletions.
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4479,6 +4479,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}

if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
const MachineOperand *Src0ModsMO =
getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
unsigned Mods = Src0ModsMO->getImm();
if (Mods & SISrcMods::ABS || Mods & SISrcMods::NEG ||
Mods & SISrcMods::SEXT) {
ErrInfo = "sext, abs and neg are not allowed on this instruction";
return false;
}
}

uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
if (isVOPC(BasicOpcode)) {
if (!ST.hasSDWASdst() && DstIdx != -1) {
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,15 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
}

bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
switch (MI.getOpcode()) {
case AMDGPU::V_CVT_F32_FP8_sdwa:
case AMDGPU::V_CVT_F32_BF8_sdwa:
case AMDGPU::V_CVT_PK_F32_FP8_sdwa:
case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
// Does not support input modifiers: noabs, noneg, nosext.
return false;
}

// Find operand in instruction that matches source operand and replace it with
// target operand. Set corresponding src_sel
bool IsPreserveSrc = false;
Expand Down
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,99 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3)
ret i32 %ret
}

define float @test_sext_cvt_f32_fp8(i16 %a) {
; GFX940-LABEL: test_sext_cvt_f32_fp8:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_f32_fp8:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a.sext, i32 1)
ret float %ret
}

define float @test_sext_cvt_f32_bf8(i16 %a) {
; GFX940-LABEL: test_sext_cvt_f32_bf8:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_f32_bf8:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a.sext, i32 1)
ret float %ret
}

define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a.sext, i1 true)
ret <2 x float> %ret
}

define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a.sext, i1 false)
ret <2 x float> %ret
}
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx940 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" %s

# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0 = V_CVT_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0 = V_CVT_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0 = V_CVT_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0 = V_CVT_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
# CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction ***
# CHECK: $vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec

---
name: test
liveins:
body: |
bb.0:
liveins: $vgpr0, $vgpr0_vgpr1
; sext/neg
$vgpr0 = V_CVT_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
$vgpr0 = V_CVT_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
$vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
$vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
; abs
$vgpr0 = V_CVT_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
$vgpr0 = V_CVT_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
$vgpr0_vgpr1 = V_CVT_PK_F32_FP8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
$vgpr0_vgpr1 = V_CVT_PK_F32_BF8_sdwa 2, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec
...

0 comments on commit 122eb3d

Please sign in to comment.