1 | //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPUMemoryUtils.h" |
10 | #include "AMDGPU.h" |
11 | #include "AMDGPUBaseInfo.h" |
12 | #include "llvm/ADT/SmallSet.h" |
13 | #include "llvm/Analysis/AliasAnalysis.h" |
14 | #include "llvm/Analysis/MemorySSA.h" |
15 | #include "llvm/IR/DataLayout.h" |
16 | #include "llvm/IR/Instructions.h" |
17 | #include "llvm/IR/IntrinsicInst.h" |
18 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
19 | #include "llvm/IR/ReplaceConstant.h" |
20 | |
21 | #define DEBUG_TYPE "amdgpu-memory-utils" |
22 | |
23 | using namespace llvm; |
24 | |
25 | namespace llvm { |
26 | |
27 | namespace AMDGPU { |
28 | |
29 | Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { |
30 | return DL.getValueOrABITypeAlignment(Alignment: GV->getPointerAlignment(DL), |
31 | Ty: GV->getValueType()); |
32 | } |
33 | |
34 | bool isDynamicLDS(const GlobalVariable &GV) { |
35 | // external zero size addrspace(3) without initializer is dynlds. |
36 | const Module *M = GV.getParent(); |
37 | const DataLayout &DL = M->getDataLayout(); |
38 | if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) |
39 | return false; |
40 | return DL.getTypeAllocSize(Ty: GV.getValueType()) == 0; |
41 | } |
42 | |
43 | bool isLDSVariableToLower(const GlobalVariable &GV) { |
44 | if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { |
45 | return false; |
46 | } |
47 | if (isDynamicLDS(GV)) { |
48 | return true; |
49 | } |
50 | if (GV.isConstant()) { |
51 | // A constant undef variable can't be written to, and any load is |
52 | // undef, so it should be eliminated by the optimizer. It could be |
53 | // dropped by the back end if not. This pass skips over it. |
54 | return false; |
55 | } |
56 | if (GV.hasInitializer() && !isa<UndefValue>(Val: GV.getInitializer())) { |
57 | // Initializers are unimplemented for LDS address space. |
58 | // Leave such variables in place for consistent error reporting. |
59 | return false; |
60 | } |
61 | return true; |
62 | } |
63 | |
64 | bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { |
65 | Instruction *DefInst = Def->getMemoryInst(); |
66 | |
67 | if (isa<FenceInst>(Val: DefInst)) |
68 | return false; |
69 | |
70 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: DefInst)) { |
71 | switch (II->getIntrinsicID()) { |
72 | case Intrinsic::amdgcn_s_barrier: |
73 | case Intrinsic::amdgcn_s_barrier_signal: |
74 | case Intrinsic::amdgcn_s_barrier_signal_var: |
75 | case Intrinsic::amdgcn_s_barrier_signal_isfirst: |
76 | case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: |
77 | case Intrinsic::amdgcn_s_barrier_init: |
78 | case Intrinsic::amdgcn_s_barrier_join: |
79 | case Intrinsic::amdgcn_s_barrier_wait: |
80 | case Intrinsic::amdgcn_s_barrier_leave: |
81 | case Intrinsic::amdgcn_s_get_barrier_state: |
82 | case Intrinsic::amdgcn_s_wakeup_barrier: |
83 | case Intrinsic::amdgcn_wave_barrier: |
84 | case Intrinsic::amdgcn_sched_barrier: |
85 | case Intrinsic::amdgcn_sched_group_barrier: |
86 | return false; |
87 | default: |
88 | break; |
89 | } |
90 | } |
91 | |
92 | // Ignore atomics not aliasing with the original load, any atomic is a |
93 | // universal MemoryDef from MSSA's point of view too, just like a fence. |
94 | const auto checkNoAlias = [AA, Ptr](auto I) -> bool { |
95 | return I && AA->isNoAlias(I->getPointerOperand(), Ptr); |
96 | }; |
97 | |
98 | if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(Val: DefInst)) || |
99 | checkNoAlias(dyn_cast<AtomicRMWInst>(Val: DefInst))) |
100 | return false; |
101 | |
102 | return true; |
103 | } |
104 | |
105 | bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, |
106 | AAResults *AA) { |
107 | MemorySSAWalker *Walker = MSSA->getWalker(); |
108 | SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(I: Load)}; |
109 | SmallSet<MemoryAccess *, 8> Visited; |
110 | MemoryLocation Loc(MemoryLocation::get(LI: Load)); |
111 | |
112 | LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); |
113 | |
114 | // Start with a nearest dominating clobbering access, it will be either |
115 | // live on entry (nothing to do, load is not clobbered), MemoryDef, or |
116 | // MemoryPhi if several MemoryDefs can define this memory state. In that |
117 | // case add all Defs to WorkList and continue going up and checking all |
118 | // the definitions of this memory location until the root. When all the |
119 | // defs are exhausted and came to the entry state we have no clobber. |
120 | // Along the scan ignore barriers and fences which are considered clobbers |
121 | // by the MemorySSA, but not really writing anything into the memory. |
122 | while (!WorkList.empty()) { |
123 | MemoryAccess *MA = WorkList.pop_back_val(); |
124 | if (!Visited.insert(Ptr: MA).second) |
125 | continue; |
126 | |
127 | if (MSSA->isLiveOnEntryDef(MA)) |
128 | continue; |
129 | |
130 | if (MemoryDef *Def = dyn_cast<MemoryDef>(Val: MA)) { |
131 | LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n'); |
132 | |
133 | if (isReallyAClobber(Ptr: Load->getPointerOperand(), Def, AA)) { |
134 | LLVM_DEBUG(dbgs() << " -> load is clobbered\n" ); |
135 | return true; |
136 | } |
137 | |
138 | WorkList.push_back( |
139 | Elt: Walker->getClobberingMemoryAccess(MA: Def->getDefiningAccess(), Loc)); |
140 | continue; |
141 | } |
142 | |
143 | const MemoryPhi *Phi = cast<MemoryPhi>(Val: MA); |
144 | for (const auto &Use : Phi->incoming_values()) |
145 | WorkList.push_back(Elt: cast<MemoryAccess>(Val: &Use)); |
146 | } |
147 | |
148 | LLVM_DEBUG(dbgs() << " -> no clobber\n" ); |
149 | return false; |
150 | } |
151 | |
152 | } // end namespace AMDGPU |
153 | |
154 | } // end namespace llvm |
155 | |