Skip to content

Commit

Permalink
[AMDGPU] Allow selection of BITOP3 for some 2 opcodes and B32 cases (l…
Browse files Browse the repository at this point in the history
…lvm#122267)

This came up in downstream static analysis - as a dead code.

Admittedly, it depends on what the intention was when checking for [`if
(NumOpcodes == 2 &&
IsB32)`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp#L3792C3-L3792C32)
and I took a guess that for certain cases the selection should take
place.

If that's incorrect, that whole if statement can be removed, as it is
after a check for: [`if (NumOpcodes <
4)`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp#L3788)
  • Loading branch information
jchlanda authored Jan 10, 2025
1 parent 4c0a0f7 commit 01a7d4e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 22 deletions.
13 changes: 6 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3782,13 +3782,7 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
if (NumOpcodes < 2 || Src.empty())
return false;

// For a uniform case threshold should be higher to account for moves between
// VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be in SGPRs
// and a readtfirstlane after.
if (NumOpcodes < 4)
return false;

bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
const bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
if (NumOpcodes == 2 && IsB32) {
// Avoid using BITOP3 for OR3, XOR3, AND_OR. This is not faster but makes
// asm more readable. This cannot be modeled with AddedComplexity because
Expand All @@ -3797,6 +3791,11 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
mi_match(MI, *MRI, m_GOr(m_GOr(m_Reg(), m_Reg()), m_Reg())) ||
mi_match(MI, *MRI, m_GOr(m_GAnd(m_Reg(), m_Reg()), m_Reg())))
return false;
} else if (NumOpcodes < 4) {
// For a uniform case threshold should be higher to account for moves
// between VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be
// in SGPRs and a readtfirstlane after.
return false;
}

unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
Expand Down
26 changes: 11 additions & 15 deletions llvm/test/CodeGen/AMDGPU/bitop3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: not_and_and_and:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
Expand Down Expand Up @@ -103,8 +102,7 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: and_and_not_and:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v2, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%notc = xor i32 %c, -1
Expand All @@ -122,8 +120,7 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: and_and_and:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
; GFX950-GISEL-NEXT: ; return to shader part epilog
%and1 = and i32 %a, %c
%and2 = and i32 %and1, %b
Expand All @@ -141,8 +138,7 @@ define amdgpu_ps float @test_12(i32 %a, i32 %b) {
;
; GFX950-GISEL-LABEL: test_12:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%and1 = and i32 %nota, %b
Expand Down Expand Up @@ -214,9 +210,11 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: test_12_src_overflow:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
; GFX950-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v0
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v0
; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v2
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
; GFX950-GISEL-NEXT: v_and_b32_e32 v2, v3, v4
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%notc = xor i32 %c, -1
Expand All @@ -242,11 +240,9 @@ define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: test_100_src_overflow:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_or_b32_e32 v3, v2, v0
; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v3
; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v1
; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v2, v0, v2 bitop3:3
; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v0, v4
; GFX950-GISEL-NEXT: v_bitop3_b32 v4, v0, v1, v0 bitop3:0x30
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2
Expand Down

0 comments on commit 01a7d4e

Please sign in to comment.