1//===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements two passes that enable HIP C++ Standard Parallelism
9// Support:
10//
11// 1. AcceleratorCodeSelection (required): Given that only algorithms are
12// accelerated, and that the accelerated implementation exists in the form of
13// a compute kernel, we assume that only the kernel, and all functions
14// reachable from it, constitute code that the user expects the accelerator
15// to execute. Thus, we identify the set of all functions reachable from
16// kernels, and then remove all unreachable ones. This last part is necessary
17// because it is possible for code that the user did not expect to execute on
18// an accelerator to contain constructs that cannot be handled by the target
19// BE, which cannot be provably demonstrated to be dead code in general, and
20// thus can lead to mis-compilation. The degenerate case of this is when a
21// Module contains no kernels (the parent TU had no algorithm invocations fit
22// for acceleration), which we handle by completely emptying said module.
23// **NOTE**: The above does not handle indirectly reachable functions i.e.
24// it is possible to obtain a case where the target of an indirect
25// call is otherwise unreachable and thus is removed; this
26// restriction is aligned with the current `-hipstdpar` limitations
27// and will be relaxed in the future.
28//
29// 2. AllocationInterposition (required only when on-demand paging is
30// unsupported): Some accelerators or operating systems might not support
31// transparent on-demand paging. Thus, they would only be able to access
32// memory that is allocated by an accelerator-aware mechanism. For such cases
33// the user can opt into enabling allocation / deallocation interposition,
34// whereby we replace calls to known allocation / deallocation functions with
35// calls to runtime implemented equivalents that forward the requests to
36// accelerator-aware interfaces. We also support freeing system allocated
37// memory that ends up in one of the runtime equivalents, since this can
38// happen if e.g. a library that was compiled without interposition returns
39// an allocation that can be validly passed to `free`.
40//===----------------------------------------------------------------------===//
41
42#include "llvm/Transforms/HipStdPar/HipStdPar.h"
43
44#include "llvm/ADT/SmallPtrSet.h"
45#include "llvm/ADT/SmallVector.h"
46#include "llvm/ADT/STLExtras.h"
47#include "llvm/Analysis/CallGraph.h"
48#include "llvm/Analysis/OptimizationRemarkEmitter.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugInfoMetadata.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/Module.h"
53#include "llvm/Transforms/Utils/ModuleUtils.h"
54
55#include <cassert>
56#include <string>
57#include <utility>
58
59using namespace llvm;
60
61template<typename T>
62static inline void eraseFromModule(T &ToErase) {
63 ToErase.replaceAllUsesWith(PoisonValue::get(T: ToErase.getType()));
64 ToErase.eraseFromParent();
65}
66
67static inline bool checkIfSupported(GlobalVariable &G) {
68 if (!G.isThreadLocal())
69 return true;
70
71 G.dropDroppableUses();
72
73 if (!G.isConstantUsed())
74 return true;
75
76 std::string W;
77 raw_string_ostream OS(W);
78
79 OS << "Accelerator does not support the thread_local variable "
80 << G.getName();
81
82 Instruction *I = nullptr;
83 SmallVector<User *> Tmp(G.user_begin(), G.user_end());
84 SmallPtrSet<User *, 5> Visited;
85 do {
86 auto U = std::move(Tmp.back());
87 Tmp.pop_back();
88
89 if (Visited.contains(Ptr: U))
90 continue;
91
92 if (isa<Instruction>(Val: U))
93 I = cast<Instruction>(Val: U);
94 else
95 Tmp.insert(I: Tmp.end(), From: U->user_begin(), To: U->user_end());
96
97 Visited.insert(Ptr: U);
98 } while (!I && !Tmp.empty());
99
100 assert(I && "thread_local global should have at least one non-constant use.");
101
102 G.getContext().diagnose(
103 DI: DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
104 I->getDebugLoc(), DS_Error));
105
106 return false;
107}
108
109static inline void clearModule(Module &M) { // TODO: simplify.
110 while (!M.functions().empty())
111 eraseFromModule(ToErase&: *M.begin());
112 while (!M.globals().empty())
113 eraseFromModule(ToErase&: *M.globals().begin());
114 while (!M.aliases().empty())
115 eraseFromModule(ToErase&: *M.aliases().begin());
116 while (!M.ifuncs().empty())
117 eraseFromModule(ToErase&: *M.ifuncs().begin());
118}
119
120static inline void maybeHandleGlobals(Module &M) {
121 unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
122 for (auto &&G : M.globals()) { // TODO: should we handle these in the FE?
123 if (!checkIfSupported(G))
124 return clearModule(M);
125
126 if (G.isThreadLocal())
127 continue;
128 if (G.isConstant())
129 continue;
130 if (G.getAddressSpace() != GlobAS)
131 continue;
132 if (G.getLinkage() != GlobalVariable::ExternalLinkage)
133 continue;
134
135 G.setLinkage(GlobalVariable::ExternalWeakLinkage);
136 G.setExternallyInitialized(true);
137 }
138}
139
140template<unsigned N>
141static inline void removeUnreachableFunctions(
142 const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
143 removeFromUsedLists(M, [&](Constant *C) {
144 if (auto F = dyn_cast<Function>(Val: C))
145 return !Reachable.contains(F);
146
147 return false;
148 });
149
150 SmallVector<std::reference_wrapper<Function>> ToRemove;
151 copy_if(M, std::back_inserter(x&: ToRemove), [&](auto &&F) {
152 return !F.isIntrinsic() && !Reachable.contains(&F);
153 });
154
155 for_each(Range&: ToRemove, F: eraseFromModule<Function>);
156}
157
158static inline bool isAcceleratorExecutionRoot(const Function *F) {
159 if (!F)
160 return false;
161
162 return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
163}
164
165static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
166 const auto Dx = F->getName().rfind(Str: "__hipstdpar_unsupported");
167
168 if (Dx == StringRef::npos)
169 return true;
170
171 const auto N = F->getName().substr(Start: 0, N: Dx);
172
173 std::string W;
174 raw_string_ostream OS(W);
175
176 if (N == "__ASM")
177 OS << "Accelerator does not support the ASM block:\n"
178 << cast<ConstantDataArray>(Val: CB->getArgOperand(i: 0))->getAsCString();
179 else
180 OS << "Accelerator does not support the " << N << " function.";
181
182 auto Caller = CB->getParent()->getParent();
183
184 Caller->getContext().diagnose(
185 DI: DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
186
187 return false;
188}
189
190PreservedAnalyses
191 HipStdParAcceleratorCodeSelectionPass::run(Module &M,
192 ModuleAnalysisManager &MAM) {
193 auto &CGA = MAM.getResult<CallGraphAnalysis>(IR&: M);
194
195 SmallPtrSet<const Function *, 32> Reachable;
196 for (auto &&CGN : CGA) {
197 if (!isAcceleratorExecutionRoot(F: CGN.first))
198 continue;
199
200 Reachable.insert(Ptr: CGN.first);
201
202 SmallVector<const Function *> Tmp({CGN.first});
203 do {
204 auto F = std::move(Tmp.back());
205 Tmp.pop_back();
206
207 for (auto &&N : *CGA[F]) {
208 if (!N.second)
209 continue;
210 if (!N.second->getFunction())
211 continue;
212 if (Reachable.contains(Ptr: N.second->getFunction()))
213 continue;
214
215 if (!checkIfSupported(F: N.second->getFunction(),
216 CB: dyn_cast<CallBase>(Val&: *N.first)))
217 return PreservedAnalyses::none();
218
219 Reachable.insert(Ptr: N.second->getFunction());
220 Tmp.push_back(Elt: N.second->getFunction());
221 }
222 } while (!std::empty(cont: Tmp));
223 }
224
225 if (std::empty(cont: Reachable))
226 clearModule(M);
227 else
228 removeUnreachableFunctions(Reachable, M);
229
230 maybeHandleGlobals(M);
231
232 return PreservedAnalyses::none();
233}
234
235static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
236 {"aligned_alloc", "__hipstdpar_aligned_alloc"},
237 {"calloc", "__hipstdpar_calloc"},
238 {"free", "__hipstdpar_free"},
239 {"malloc", "__hipstdpar_malloc"},
240 {"memalign", "__hipstdpar_aligned_alloc"},
241 {"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
242 {"realloc", "__hipstdpar_realloc"},
243 {"reallocarray", "__hipstdpar_realloc_array"},
244 {"_ZdaPv", "__hipstdpar_operator_delete"},
245 {"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
246 {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
247 {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
248 {"_ZdlPv", "__hipstdpar_operator_delete"},
249 {"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
250 {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
251 {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
252 {"_Znam", "__hipstdpar_operator_new"},
253 {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
254 {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
255 {"_ZnamSt11align_val_tRKSt9nothrow_t",
256 "__hipstdpar_operator_new_aligned_nothrow"},
257
258 {"_Znwm", "__hipstdpar_operator_new"},
259 {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
260 {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
261 {"_ZnwmSt11align_val_tRKSt9nothrow_t",
262 "__hipstdpar_operator_new_aligned_nothrow"},
263 {"__builtin_calloc", "__hipstdpar_calloc"},
264 {"__builtin_free", "__hipstdpar_free"},
265 {"__builtin_malloc", "__hipstdpar_malloc"},
266 {"__builtin_operator_delete", "__hipstdpar_operator_delete"},
267 {"__builtin_operator_new", "__hipstdpar_operator_new"},
268 {"__builtin_realloc", "__hipstdpar_realloc"},
269 {"__libc_calloc", "__hipstdpar_calloc"},
270 {"__libc_free", "__hipstdpar_free"},
271 {"__libc_malloc", "__hipstdpar_malloc"},
272 {"__libc_memalign", "__hipstdpar_aligned_alloc"},
273 {"__libc_realloc", "__hipstdpar_realloc"}
274};
275
276PreservedAnalyses
277HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
278 SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(cont: ReplaceMap),
279 std::cend(cont: ReplaceMap));
280
281 for (auto &&F : M) {
282 if (!F.hasName())
283 continue;
284 if (!AllocReplacements.contains(Val: F.getName()))
285 continue;
286
287 if (auto R = M.getFunction(Name: AllocReplacements[F.getName()])) {
288 F.replaceAllUsesWith(V: R);
289 } else {
290 std::string W;
291 raw_string_ostream OS(W);
292
293 OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
294 << ". Tried to run the allocation interposition pass without the "
295 << "replacement functions available.";
296
297 F.getContext().diagnose(DI: DiagnosticInfoUnsupported(F, W,
298 F.getSubprogram(),
299 DS_Warning));
300 }
301 }
302
303 if (auto F = M.getFunction(Name: "__hipstdpar_hidden_free")) {
304 auto LibcFree = M.getOrInsertFunction(Name: "__libc_free", T: F->getFunctionType(),
305 AttributeList: F->getAttributes());
306 F->replaceAllUsesWith(V: LibcFree.getCallee());
307
308 eraseFromModule(ToErase&: *F);
309 }
310
311 return PreservedAnalyses::none();
312}
313

source code of llvm/lib/Transforms/HipStdPar/HipStdPar.cpp