1//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUMemoryUtils.h"
10#include "AMDGPU.h"
11#include "AMDGPUBaseInfo.h"
12#include "llvm/ADT/SmallSet.h"
13#include "llvm/Analysis/AliasAnalysis.h"
14#include "llvm/Analysis/MemorySSA.h"
15#include "llvm/IR/DataLayout.h"
16#include "llvm/IR/Instructions.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/IntrinsicsAMDGPU.h"
19#include "llvm/IR/ReplaceConstant.h"
20
21#define DEBUG_TYPE "amdgpu-memory-utils"
22
23using namespace llvm;
24
25namespace llvm {
26
27namespace AMDGPU {
28
29Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
30 return DL.getValueOrABITypeAlignment(Alignment: GV->getPointerAlignment(DL),
31 Ty: GV->getValueType());
32}
33
34bool isDynamicLDS(const GlobalVariable &GV) {
35 // external zero size addrspace(3) without initializer is dynlds.
36 const Module *M = GV.getParent();
37 const DataLayout &DL = M->getDataLayout();
38 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
39 return false;
40 return DL.getTypeAllocSize(Ty: GV.getValueType()) == 0;
41}
42
43bool isLDSVariableToLower(const GlobalVariable &GV) {
44 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
45 return false;
46 }
47 if (isDynamicLDS(GV)) {
48 return true;
49 }
50 if (GV.isConstant()) {
51 // A constant undef variable can't be written to, and any load is
52 // undef, so it should be eliminated by the optimizer. It could be
53 // dropped by the back end if not. This pass skips over it.
54 return false;
55 }
56 if (GV.hasInitializer() && !isa<UndefValue>(Val: GV.getInitializer())) {
57 // Initializers are unimplemented for LDS address space.
58 // Leave such variables in place for consistent error reporting.
59 return false;
60 }
61 return true;
62}
63
64bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
65 Instruction *DefInst = Def->getMemoryInst();
66
67 if (isa<FenceInst>(Val: DefInst))
68 return false;
69
70 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: DefInst)) {
71 switch (II->getIntrinsicID()) {
72 case Intrinsic::amdgcn_s_barrier:
73 case Intrinsic::amdgcn_s_barrier_signal:
74 case Intrinsic::amdgcn_s_barrier_signal_var:
75 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
76 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
77 case Intrinsic::amdgcn_s_barrier_init:
78 case Intrinsic::amdgcn_s_barrier_join:
79 case Intrinsic::amdgcn_s_barrier_wait:
80 case Intrinsic::amdgcn_s_barrier_leave:
81 case Intrinsic::amdgcn_s_get_barrier_state:
82 case Intrinsic::amdgcn_s_wakeup_barrier:
83 case Intrinsic::amdgcn_wave_barrier:
84 case Intrinsic::amdgcn_sched_barrier:
85 case Intrinsic::amdgcn_sched_group_barrier:
86 return false;
87 default:
88 break;
89 }
90 }
91
92 // Ignore atomics not aliasing with the original load, any atomic is a
93 // universal MemoryDef from MSSA's point of view too, just like a fence.
94 const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
95 return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
96 };
97
98 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(Val: DefInst)) ||
99 checkNoAlias(dyn_cast<AtomicRMWInst>(Val: DefInst)))
100 return false;
101
102 return true;
103}
104
105bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
106 AAResults *AA) {
107 MemorySSAWalker *Walker = MSSA->getWalker();
108 SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(I: Load)};
109 SmallSet<MemoryAccess *, 8> Visited;
110 MemoryLocation Loc(MemoryLocation::get(LI: Load));
111
112 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
113
114 // Start with a nearest dominating clobbering access, it will be either
115 // live on entry (nothing to do, load is not clobbered), MemoryDef, or
116 // MemoryPhi if several MemoryDefs can define this memory state. In that
117 // case add all Defs to WorkList and continue going up and checking all
118 // the definitions of this memory location until the root. When all the
119 // defs are exhausted and came to the entry state we have no clobber.
120 // Along the scan ignore barriers and fences which are considered clobbers
121 // by the MemorySSA, but not really writing anything into the memory.
122 while (!WorkList.empty()) {
123 MemoryAccess *MA = WorkList.pop_back_val();
124 if (!Visited.insert(Ptr: MA).second)
125 continue;
126
127 if (MSSA->isLiveOnEntryDef(MA))
128 continue;
129
130 if (MemoryDef *Def = dyn_cast<MemoryDef>(Val: MA)) {
131 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
132
133 if (isReallyAClobber(Ptr: Load->getPointerOperand(), Def, AA)) {
134 LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
135 return true;
136 }
137
138 WorkList.push_back(
139 Elt: Walker->getClobberingMemoryAccess(MA: Def->getDefiningAccess(), Loc));
140 continue;
141 }
142
143 const MemoryPhi *Phi = cast<MemoryPhi>(Val: MA);
144 for (const auto &Use : Phi->incoming_values())
145 WorkList.push_back(Elt: cast<MemoryAccess>(Val: &Use));
146 }
147
148 LLVM_DEBUG(dbgs() << " -> no clobber\n");
149 return false;
150}
151
152} // end namespace AMDGPU
153
154} // end namespace llvm
155

source code of llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp