forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- To fix the backend crash in SILoadStoreOptimizer [AMDGPU] Fix crash in SILoadStoreOptimizer SILoadStoreOptimizer::checkAndPrepareMerge() expects base and paired instruction to come in order and scans MBB from base to the paired instruction. An original order can be changed if there were a dependent instruction in between and base instruction was moved. Fixed by bailing the optimization. In theory it might be possible still to perform a merge by swapping instructions, but on practice it bails anyway because it finds dependency on that same instruction which has resulted in the base move. Differential Revision: https://reviews.llvm.org/D77245 Change-Id: I7da50f9003878835b6545998bc1e6d2a2c978f77
- Loading branch information
1 parent
6a0ffc8
commit 3f5fe97
Showing
3 changed files
with
62 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s | ||
|
||
@L = external local_unnamed_addr addrspace(3) global [9 x double], align 16 | ||
@Ldisp = external local_unnamed_addr addrspace(3) global [96 x double], align 16 | ||
|
||
; Stores are reordered during loads merge. This case used to assert while | ||
; scanning for a paired instruction because it used to expect paired one | ||
; to follow a base one. | ||
|
||
; GCN-LABEL: {{^}}out_of_order_merge: | ||
; GCN-COUNT2: ds_read2_b64 | ||
; GCN-COUNT3: ds_write_b64 | ||
define amdgpu_kernel void @out_of_order_merge() { | ||
entry: | ||
%gep1 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 0 | ||
%gep2 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 1 | ||
%tmp12 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 0, i32 1) to <2 x double> addrspace(3)*), align 8 | ||
%tmp14 = extractelement <2 x double> %tmp12, i32 0 | ||
%tmp15 = extractelement <2 x double> %tmp12, i32 1 | ||
%add50.i = fadd double %tmp14, %tmp15 | ||
store double %add50.i, double addrspace(3)* %gep1, align 8 | ||
%tmp16 = load double, double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 1, i32 0), align 8 | ||
store double %tmp16, double addrspace(3)* %gep2, align 8 | ||
%tmp17 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 2, i32 1) to <2 x double> addrspace(3)*), align 8 | ||
%tmp19 = extractelement <2 x double> %tmp17, i32 1 | ||
store double %tmp19, double addrspace(3)* undef, align 8 | ||
ret void | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt %s -o - | FileCheck -check-prefix=GCN %s | ||
|
||
# GCN-LABEL: name: out_of_order_merge | ||
# GCN: DS_READ2_B64_gfx9 | ||
# GCN: DS_WRITE_B64_gfx9 | ||
# GCN: DS_READ2_B64_gfx9 | ||
# GCN: DS_WRITE_B64_gfx9 | ||
# GCN: DS_WRITE_B64_gfx9 | ||
--- | ||
name: out_of_order_merge | ||
body: | | ||
bb.0: | ||
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | ||
%5:vreg_64 = DS_READ_B64_gfx9 %4, 776, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3) | ||
%6:vreg_64 = DS_READ_B64_gfx9 %4, 784, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3) | ||
%17:vreg_64 = DS_READ_B64_gfx9 %4, 840, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3) | ||
DS_WRITE_B64_gfx9 %4, %17, 8, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef` + 8, addrspace 3) | ||
DS_WRITE_B64_gfx9 %4, %6, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, align 16, addrspace 3) | ||
%24:vreg_64 = DS_READ_B64_gfx9 %4, 928, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3) | ||
DS_WRITE_B64_gfx9 undef %29:vgpr_32, %5, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, addrspace 3) | ||
S_ENDPGM 0 | ||
... |