1 | //===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Place garbage collection safepoints at appropriate locations in the IR. This |
10 | // does not make relocation semantics or variable liveness explicit. That's |
11 | // done by RewriteStatepointsForGC. |
12 | // |
13 | // Terminology: |
14 | // - A call is said to be "parseable" if there is a stack map generated for the |
15 | // return PC of the call. A runtime can determine where values listed in the |
16 | // deopt arguments and (after RewriteStatepointsForGC) gc arguments are located |
17 | // on the stack when the code is suspended inside such a call. Every parse |
18 | // point is represented by a call wrapped in an gc.statepoint intrinsic. |
19 | // - A "poll" is an explicit check in the generated code to determine if the |
20 | // runtime needs the generated code to cooperate by calling a helper routine |
21 | // and thus suspending its execution at a known state. The call to the helper |
22 | // routine will be parseable. The (gc & runtime specific) logic of a poll is |
23 | // assumed to be provided in a function of the name "gc.safepoint_poll". |
24 | // |
25 | // We aim to insert polls such that running code can quickly be brought to a |
26 | // well defined state for inspection by the collector. In the current |
27 | // implementation, this is done via the insertion of poll sites at method entry |
28 | // and the backedge of most loops. We try to avoid inserting more polls than |
29 | // are necessary to ensure a finite period between poll sites. This is not |
30 | // because the poll itself is expensive in the generated code; it's not. Polls |
31 | // do tend to impact the optimizer itself in negative ways; we'd like to avoid |
32 | // perturbing the optimization of the method as much as we can. |
33 | // |
34 | // We also need to make most call sites parseable. The callee might execute a |
35 | // poll (or otherwise be inspected by the GC). If so, the entire stack |
36 | // (including the suspended frame of the current method) must be parseable. |
37 | // |
38 | // This pass will insert: |
39 | // - Call parse points ("call safepoints") for any call which may need to |
40 | // reach a safepoint during the execution of the callee function. |
41 | // - Backedge safepoint polls and entry safepoint polls to ensure that |
42 | // executing code reaches a safepoint poll in a finite amount of time. |
43 | // |
44 | // We do not currently support return statepoints, but adding them would not |
45 | // be hard. They are not required for correctness - entry safepoints are an |
46 | // alternative - but some GCs may prefer them. Patches welcome. |
47 | // |
48 | //===----------------------------------------------------------------------===// |
49 | |
50 | #include "llvm/Transforms/Scalar/PlaceSafepoints.h" |
51 | #include "llvm/InitializePasses.h" |
52 | #include "llvm/Pass.h" |
53 | |
54 | #include "llvm/ADT/SetVector.h" |
55 | #include "llvm/ADT/Statistic.h" |
56 | #include "llvm/Analysis/CFG.h" |
57 | #include "llvm/Analysis/LoopInfo.h" |
58 | #include "llvm/Analysis/ScalarEvolution.h" |
59 | #include "llvm/Analysis/TargetLibraryInfo.h" |
60 | #include "llvm/IR/Dominators.h" |
61 | #include "llvm/IR/IntrinsicInst.h" |
62 | #include "llvm/IR/LegacyPassManager.h" |
63 | #include "llvm/IR/Statepoint.h" |
64 | #include "llvm/Support/CommandLine.h" |
65 | #include "llvm/Support/Debug.h" |
66 | #include "llvm/Transforms/Scalar.h" |
67 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
68 | #include "llvm/Transforms/Utils/Cloning.h" |
69 | #include "llvm/Transforms/Utils/Local.h" |
70 | |
71 | using namespace llvm; |
72 | |
73 | #define DEBUG_TYPE "place-safepoints" |
74 | |
75 | STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted" ); |
76 | STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted" ); |
77 | |
78 | STATISTIC(CallInLoop, |
79 | "Number of loops without safepoints due to calls in loop" ); |
80 | STATISTIC(FiniteExecution, |
81 | "Number of loops without safepoints finite execution" ); |
82 | |
83 | // Ignore opportunities to avoid placing safepoints on backedges, useful for |
84 | // validation |
85 | static cl::opt<bool> AllBackedges("spp-all-backedges" , cl::Hidden, |
86 | cl::init(Val: false)); |
87 | |
88 | /// How narrow does the trip count of a loop have to be to have to be considered |
89 | /// "counted"? Counted loops do not get safepoints at backedges. |
90 | static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width" , |
91 | cl::Hidden, cl::init(Val: 32)); |
92 | |
93 | // If true, split the backedge of a loop when placing the safepoint, otherwise |
94 | // split the latch block itself. Both are useful to support for |
95 | // experimentation, but in practice, it looks like splitting the backedge |
96 | // optimizes better. |
97 | static cl::opt<bool> SplitBackedge("spp-split-backedge" , cl::Hidden, |
98 | cl::init(Val: false)); |
99 | |
100 | namespace { |
101 | /// An analysis pass whose purpose is to identify each of the backedges in |
102 | /// the function which require a safepoint poll to be inserted. |
103 | class PlaceBackedgeSafepointsLegacyPass : public FunctionPass { |
104 | public: |
105 | static char ID; |
106 | |
107 | /// The output of the pass - gives a list of each backedge (described by |
108 | /// pointing at the branch) which need a poll inserted. |
109 | std::vector<Instruction *> PollLocations; |
110 | |
111 | /// True unless we're running spp-no-calls in which case we need to disable |
112 | /// the call-dependent placement opts. |
113 | bool CallSafepointsEnabled; |
114 | |
115 | PlaceBackedgeSafepointsLegacyPass(bool CallSafepoints = false) |
116 | : FunctionPass(ID), CallSafepointsEnabled(CallSafepoints) { |
117 | initializePlaceBackedgeSafepointsLegacyPassPass( |
118 | *PassRegistry::getPassRegistry()); |
119 | } |
120 | |
121 | bool runOnLoop(Loop *); |
122 | |
123 | void runOnLoopAndSubLoops(Loop *L) { |
124 | // Visit all the subloops |
125 | for (Loop *I : *L) |
126 | runOnLoopAndSubLoops(L: I); |
127 | runOnLoop(L); |
128 | } |
129 | |
130 | bool runOnFunction(Function &F) override { |
131 | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
132 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
133 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
134 | TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
135 | for (Loop *I : *LI) { |
136 | runOnLoopAndSubLoops(L: I); |
137 | } |
138 | return false; |
139 | } |
140 | |
141 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
142 | AU.addRequired<DominatorTreeWrapperPass>(); |
143 | AU.addRequired<ScalarEvolutionWrapperPass>(); |
144 | AU.addRequired<LoopInfoWrapperPass>(); |
145 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
146 | // We no longer modify the IR at all in this pass. Thus all |
147 | // analysis are preserved. |
148 | AU.setPreservesAll(); |
149 | } |
150 | |
151 | private: |
152 | ScalarEvolution *SE = nullptr; |
153 | DominatorTree *DT = nullptr; |
154 | LoopInfo *LI = nullptr; |
155 | TargetLibraryInfo *TLI = nullptr; |
156 | }; |
157 | } // namespace |
158 | |
159 | static cl::opt<bool> NoEntry("spp-no-entry" , cl::Hidden, cl::init(Val: false)); |
160 | static cl::opt<bool> NoCall("spp-no-call" , cl::Hidden, cl::init(Val: false)); |
161 | static cl::opt<bool> NoBackedge("spp-no-backedge" , cl::Hidden, cl::init(Val: false)); |
162 | |
163 | char PlaceBackedgeSafepointsLegacyPass::ID = 0; |
164 | |
165 | INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsLegacyPass, |
166 | "place-backedge-safepoints-impl" , |
167 | "Place Backedge Safepoints" , false, false) |
168 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) |
169 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
170 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
171 | INITIALIZE_PASS_END(PlaceBackedgeSafepointsLegacyPass, |
172 | "place-backedge-safepoints-impl" , |
173 | "Place Backedge Safepoints" , false, false) |
174 | |
175 | static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *, |
176 | BasicBlock *Pred, |
177 | DominatorTree &DT, |
178 | const TargetLibraryInfo &TLI); |
179 | |
180 | static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, |
181 | BasicBlock *Pred); |
182 | |
183 | static Instruction *findLocationForEntrySafepoint(Function &F, |
184 | DominatorTree &DT); |
185 | |
186 | static bool isGCSafepointPoll(Function &F); |
187 | static bool shouldRewriteFunction(Function &F); |
188 | static bool enableEntrySafepoints(Function &F); |
189 | static bool enableBackedgeSafepoints(Function &F); |
190 | static bool enableCallSafepoints(Function &F); |
191 | |
192 | static void |
193 | InsertSafepointPoll(BasicBlock::iterator InsertBefore, |
194 | std::vector<CallBase *> &ParsePointsNeeded /*rval*/, |
195 | const TargetLibraryInfo &TLI); |
196 | |
197 | bool PlaceBackedgeSafepointsLegacyPass::runOnLoop(Loop *L) { |
198 | // Loop through all loop latches (branches controlling backedges). We need |
199 | // to place a safepoint on every backedge (potentially). |
200 | // Note: In common usage, there will be only one edge due to LoopSimplify |
201 | // having run sometime earlier in the pipeline, but this code must be correct |
202 | // w.r.t. loops with multiple backedges. |
203 | BasicBlock * = L->getHeader(); |
204 | SmallVector<BasicBlock *, 16> LoopLatches; |
205 | L->getLoopLatches(LoopLatches); |
206 | for (BasicBlock *Pred : LoopLatches) { |
207 | assert(L->contains(Pred)); |
208 | |
209 | // Make a policy decision about whether this loop needs a safepoint or |
210 | // not. Note that this is about unburdening the optimizer in loops, not |
211 | // avoiding the runtime cost of the actual safepoint. |
212 | if (!AllBackedges) { |
213 | if (mustBeFiniteCountedLoop(L, SE, Pred)) { |
214 | LLVM_DEBUG(dbgs() << "skipping safepoint placement in finite loop\n" ); |
215 | FiniteExecution++; |
216 | continue; |
217 | } |
218 | if (CallSafepointsEnabled && |
219 | containsUnconditionalCallSafepoint(L, Header, Pred, DT&: *DT, TLI: *TLI)) { |
220 | // Note: This is only semantically legal since we won't do any further |
221 | // IPO or inlining before the actual call insertion.. If we hadn't, we |
222 | // might latter loose this call safepoint. |
223 | LLVM_DEBUG( |
224 | dbgs() |
225 | << "skipping safepoint placement due to unconditional call\n" ); |
226 | CallInLoop++; |
227 | continue; |
228 | } |
229 | } |
230 | |
231 | // TODO: We can create an inner loop which runs a finite number of |
232 | // iterations with an outer loop which contains a safepoint. This would |
233 | // not help runtime performance that much, but it might help our ability to |
234 | // optimize the inner loop. |
235 | |
236 | // Safepoint insertion would involve creating a new basic block (as the |
237 | // target of the current backedge) which does the safepoint (of all live |
238 | // variables) and branches to the true header |
239 | Instruction *Term = Pred->getTerminator(); |
240 | |
241 | LLVM_DEBUG(dbgs() << "[LSP] terminator instruction: " << *Term); |
242 | |
243 | PollLocations.push_back(x: Term); |
244 | } |
245 | |
246 | return false; |
247 | } |
248 | |
249 | bool PlaceSafepointsPass::runImpl(Function &F, const TargetLibraryInfo &TLI) { |
250 | if (F.isDeclaration() || F.empty()) { |
251 | // This is a declaration, nothing to do. Must exit early to avoid crash in |
252 | // dom tree calculation |
253 | return false; |
254 | } |
255 | |
256 | if (isGCSafepointPoll(F)) { |
257 | // Given we're inlining this inside of safepoint poll insertion, this |
258 | // doesn't make any sense. Note that we do make any contained calls |
259 | // parseable after we inline a poll. |
260 | return false; |
261 | } |
262 | |
263 | if (!shouldRewriteFunction(F)) |
264 | return false; |
265 | |
266 | bool Modified = false; |
267 | |
268 | // In various bits below, we rely on the fact that uses are reachable from |
269 | // defs. When there are basic blocks unreachable from the entry, dominance |
270 | // and reachablity queries return non-sensical results. Thus, we preprocess |
271 | // the function to ensure these properties hold. |
272 | Modified |= removeUnreachableBlocks(F); |
273 | |
274 | // STEP 1 - Insert the safepoint polling locations. We do not need to |
275 | // actually insert parse points yet. That will be done for all polls and |
276 | // calls in a single pass. |
277 | |
278 | DominatorTree DT; |
279 | DT.recalculate(Func&: F); |
280 | |
281 | SmallVector<Instruction *, 16> PollsNeeded; |
282 | std::vector<CallBase *> ParsePointNeeded; |
283 | |
284 | if (enableBackedgeSafepoints(F)) { |
285 | // Construct a pass manager to run the LoopPass backedge logic. We |
286 | // need the pass manager to handle scheduling all the loop passes |
287 | // appropriately. Doing this by hand is painful and just not worth messing |
288 | // with for the moment. |
289 | legacy::FunctionPassManager FPM(F.getParent()); |
290 | bool CanAssumeCallSafepoints = enableCallSafepoints(F); |
291 | auto *PBS = new PlaceBackedgeSafepointsLegacyPass(CanAssumeCallSafepoints); |
292 | FPM.add(P: PBS); |
293 | FPM.run(F); |
294 | |
295 | // We preserve dominance information when inserting the poll, otherwise |
296 | // we'd have to recalculate this on every insert |
297 | DT.recalculate(Func&: F); |
298 | |
299 | auto &PollLocations = PBS->PollLocations; |
300 | |
301 | auto OrderByBBName = [](Instruction *a, Instruction *b) { |
302 | return a->getParent()->getName() < b->getParent()->getName(); |
303 | }; |
304 | // We need the order of list to be stable so that naming ends up stable |
305 | // when we split edges. This makes test cases much easier to write. |
306 | llvm::sort(C&: PollLocations, Comp: OrderByBBName); |
307 | |
308 | // We can sometimes end up with duplicate poll locations. This happens if |
309 | // a single loop is visited more than once. The fact this happens seems |
310 | // wrong, but it does happen for the split-backedge.ll test case. |
311 | PollLocations.erase(first: std::unique(first: PollLocations.begin(), last: PollLocations.end()), |
312 | last: PollLocations.end()); |
313 | |
314 | // Insert a poll at each point the analysis pass identified |
315 | // The poll location must be the terminator of a loop latch block. |
316 | for (Instruction *Term : PollLocations) { |
317 | // We are inserting a poll, the function is modified |
318 | Modified = true; |
319 | |
320 | if (SplitBackedge) { |
321 | // Split the backedge of the loop and insert the poll within that new |
322 | // basic block. This creates a loop with two latches per original |
323 | // latch (which is non-ideal), but this appears to be easier to |
324 | // optimize in practice than inserting the poll immediately before the |
325 | // latch test. |
326 | |
327 | // Since this is a latch, at least one of the successors must dominate |
328 | // it. Its possible that we have a) duplicate edges to the same header |
329 | // and b) edges to distinct loop headers. We need to insert pools on |
330 | // each. |
331 | SetVector<BasicBlock *> ; |
332 | for (unsigned i = 0; i < Term->getNumSuccessors(); i++) { |
333 | BasicBlock *Succ = Term->getSuccessor(Idx: i); |
334 | if (DT.dominates(A: Succ, B: Term->getParent())) { |
335 | Headers.insert(X: Succ); |
336 | } |
337 | } |
338 | assert(!Headers.empty() && "poll location is not a loop latch?" ); |
339 | |
340 | // The split loop structure here is so that we only need to recalculate |
341 | // the dominator tree once. Alternatively, we could just keep it up to |
342 | // date and use a more natural merged loop. |
343 | SetVector<BasicBlock *> SplitBackedges; |
344 | for (BasicBlock * : Headers) { |
345 | BasicBlock *NewBB = SplitEdge(From: Term->getParent(), To: Header, DT: &DT); |
346 | PollsNeeded.push_back(Elt: NewBB->getTerminator()); |
347 | NumBackedgeSafepoints++; |
348 | } |
349 | } else { |
350 | // Split the latch block itself, right before the terminator. |
351 | PollsNeeded.push_back(Elt: Term); |
352 | NumBackedgeSafepoints++; |
353 | } |
354 | } |
355 | } |
356 | |
357 | if (enableEntrySafepoints(F)) { |
358 | if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) { |
359 | PollsNeeded.push_back(Elt: Location); |
360 | Modified = true; |
361 | NumEntrySafepoints++; |
362 | } |
363 | // TODO: else we should assert that there was, in fact, a policy choice to |
364 | // not insert a entry safepoint poll. |
365 | } |
366 | |
367 | // Now that we've identified all the needed safepoint poll locations, insert |
368 | // safepoint polls themselves. |
369 | for (Instruction *PollLocation : PollsNeeded) { |
370 | std::vector<CallBase *> RuntimeCalls; |
371 | InsertSafepointPoll(InsertBefore: PollLocation->getIterator(), ParsePointsNeeded&: RuntimeCalls, TLI); |
372 | llvm::append_range(C&: ParsePointNeeded, R&: RuntimeCalls); |
373 | } |
374 | |
375 | return Modified; |
376 | } |
377 | |
378 | PreservedAnalyses PlaceSafepointsPass::run(Function &F, |
379 | FunctionAnalysisManager &AM) { |
380 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
381 | |
382 | if (!runImpl(F, TLI)) |
383 | return PreservedAnalyses::all(); |
384 | |
385 | // TODO: can we preserve more? |
386 | return PreservedAnalyses::none(); |
387 | } |
388 | |
389 | static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) { |
390 | if (callsGCLeafFunction(Call, TLI)) |
391 | return false; |
392 | if (auto *CI = dyn_cast<CallInst>(Val: Call)) { |
393 | if (CI->isInlineAsm()) |
394 | return false; |
395 | } |
396 | |
397 | return !(isa<GCStatepointInst>(Val: Call) || isa<GCRelocateInst>(Val: Call) || |
398 | isa<GCResultInst>(Val: Call)); |
399 | } |
400 | |
401 | /// Returns true if this loop is known to contain a call safepoint which |
402 | /// must unconditionally execute on any iteration of the loop which returns |
403 | /// to the loop header via an edge from Pred. Returns a conservative correct |
404 | /// answer; i.e. false is always valid. |
405 | static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *, |
406 | BasicBlock *Pred, |
407 | DominatorTree &DT, |
408 | const TargetLibraryInfo &TLI) { |
409 | // In general, we're looking for any cut of the graph which ensures |
410 | // there's a call safepoint along every edge between Header and Pred. |
411 | // For the moment, we look only for the 'cuts' that consist of a single call |
412 | // instruction in a block which is dominated by the Header and dominates the |
413 | // loop latch (Pred) block. Somewhat surprisingly, walking the entire chain |
414 | // of such dominating blocks gets substantially more occurrences than just |
415 | // checking the Pred and Header blocks themselves. This may be due to the |
416 | // density of loop exit conditions caused by range and null checks. |
417 | // TODO: structure this as an analysis pass, cache the result for subloops, |
418 | // avoid dom tree recalculations |
419 | assert(DT.dominates(Header, Pred) && "loop latch not dominated by header?" ); |
420 | |
421 | BasicBlock *Current = Pred; |
422 | while (true) { |
423 | for (Instruction &I : *Current) { |
424 | if (auto *Call = dyn_cast<CallBase>(Val: &I)) |
425 | // Note: Technically, needing a safepoint isn't quite the right |
426 | // condition here. We should instead be checking if the target method |
427 | // has an |
428 | // unconditional poll. In practice, this is only a theoretical concern |
429 | // since we don't have any methods with conditional-only safepoint |
430 | // polls. |
431 | if (needsStatepoint(Call, TLI)) |
432 | return true; |
433 | } |
434 | |
435 | if (Current == Header) |
436 | break; |
437 | Current = DT.getNode(BB: Current)->getIDom()->getBlock(); |
438 | } |
439 | |
440 | return false; |
441 | } |
442 | |
443 | /// Returns true if this loop is known to terminate in a finite number of |
444 | /// iterations. Note that this function may return false for a loop which |
445 | /// does actual terminate in a finite constant number of iterations due to |
446 | /// conservatism in the analysis. |
447 | static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, |
448 | BasicBlock *Pred) { |
449 | // A conservative bound on the loop as a whole. |
450 | const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L); |
451 | if (!isa<SCEVCouldNotCompute>(Val: MaxTrips) && |
452 | SE->getUnsignedRange(S: MaxTrips).getUnsignedMax().isIntN( |
453 | N: CountedLoopTripWidth)) |
454 | return true; |
455 | |
456 | // If this is a conditional branch to the header with the alternate path |
457 | // being outside the loop, we can ask questions about the execution frequency |
458 | // of the exit block. |
459 | if (L->isLoopExiting(BB: Pred)) { |
460 | // This returns an exact expression only. TODO: We really only need an |
461 | // upper bound here, but SE doesn't expose that. |
462 | const SCEV *MaxExec = SE->getExitCount(L, ExitingBlock: Pred); |
463 | if (!isa<SCEVCouldNotCompute>(Val: MaxExec) && |
464 | SE->getUnsignedRange(S: MaxExec).getUnsignedMax().isIntN( |
465 | N: CountedLoopTripWidth)) |
466 | return true; |
467 | } |
468 | |
469 | return /* not finite */ false; |
470 | } |
471 | |
472 | static void scanOneBB(Instruction *Start, Instruction *End, |
473 | std::vector<CallInst *> &Calls, |
474 | DenseSet<BasicBlock *> &Seen, |
475 | std::vector<BasicBlock *> &Worklist) { |
476 | for (BasicBlock::iterator BBI(Start), BBE0 = Start->getParent()->end(), |
477 | BBE1 = BasicBlock::iterator(End); |
478 | BBI != BBE0 && BBI != BBE1; BBI++) { |
479 | if (CallInst *CI = dyn_cast<CallInst>(Val: &*BBI)) |
480 | Calls.push_back(x: CI); |
481 | |
482 | // FIXME: This code does not handle invokes |
483 | assert(!isa<InvokeInst>(&*BBI) && |
484 | "support for invokes in poll code needed" ); |
485 | |
486 | // Only add the successor blocks if we reach the terminator instruction |
487 | // without encountering end first |
488 | if (BBI->isTerminator()) { |
489 | BasicBlock *BB = BBI->getParent(); |
490 | for (BasicBlock *Succ : successors(BB)) { |
491 | if (Seen.insert(V: Succ).second) { |
492 | Worklist.push_back(x: Succ); |
493 | } |
494 | } |
495 | } |
496 | } |
497 | } |
498 | |
499 | static void scanInlinedCode(Instruction *Start, Instruction *End, |
500 | std::vector<CallInst *> &Calls, |
501 | DenseSet<BasicBlock *> &Seen) { |
502 | Calls.clear(); |
503 | std::vector<BasicBlock *> Worklist; |
504 | Seen.insert(V: Start->getParent()); |
505 | scanOneBB(Start, End, Calls, Seen, Worklist); |
506 | while (!Worklist.empty()) { |
507 | BasicBlock *BB = Worklist.back(); |
508 | Worklist.pop_back(); |
509 | scanOneBB(Start: &*BB->begin(), End, Calls, Seen, Worklist); |
510 | } |
511 | } |
512 | |
513 | /// Returns true if an entry safepoint is not required before this callsite in |
514 | /// the caller function. |
515 | static bool doesNotRequireEntrySafepointBefore(CallBase *Call) { |
516 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Call)) { |
517 | switch (II->getIntrinsicID()) { |
518 | case Intrinsic::experimental_gc_statepoint: |
519 | case Intrinsic::experimental_patchpoint_void: |
520 | case Intrinsic::experimental_patchpoint: |
521 | // The can wrap an actual call which may grow the stack by an unbounded |
522 | // amount or run forever. |
523 | return false; |
524 | default: |
525 | // Most LLVM intrinsics are things which do not expand to actual calls, or |
526 | // at least if they do, are leaf functions that cause only finite stack |
527 | // growth. In particular, the optimizer likes to form things like memsets |
528 | // out of stores in the original IR. Another important example is |
529 | // llvm.localescape which must occur in the entry block. Inserting a |
530 | // safepoint before it is not legal since it could push the localescape |
531 | // out of the entry block. |
532 | return true; |
533 | } |
534 | } |
535 | return false; |
536 | } |
537 | |
538 | static Instruction *findLocationForEntrySafepoint(Function &F, |
539 | DominatorTree &DT) { |
540 | |
541 | // Conceptually, this poll needs to be on method entry, but in |
542 | // practice, we place it as late in the entry block as possible. We |
543 | // can place it as late as we want as long as it dominates all calls |
544 | // that can grow the stack. This, combined with backedge polls, |
545 | // give us all the progress guarantees we need. |
546 | |
547 | // hasNextInstruction and nextInstruction are used to iterate |
548 | // through a "straight line" execution sequence. |
549 | |
550 | auto HasNextInstruction = [](Instruction *I) { |
551 | if (!I->isTerminator()) |
552 | return true; |
553 | |
554 | BasicBlock *nextBB = I->getParent()->getUniqueSuccessor(); |
555 | return nextBB && (nextBB->getUniquePredecessor() != nullptr); |
556 | }; |
557 | |
558 | auto NextInstruction = [&](Instruction *I) { |
559 | assert(HasNextInstruction(I) && |
560 | "first check if there is a next instruction!" ); |
561 | |
562 | if (I->isTerminator()) |
563 | return &I->getParent()->getUniqueSuccessor()->front(); |
564 | return &*++I->getIterator(); |
565 | }; |
566 | |
567 | Instruction *Cursor = nullptr; |
568 | for (Cursor = &F.getEntryBlock().front(); HasNextInstruction(Cursor); |
569 | Cursor = NextInstruction(Cursor)) { |
570 | |
571 | // We need to ensure a safepoint poll occurs before any 'real' call. The |
572 | // easiest way to ensure finite execution between safepoints in the face of |
573 | // recursive and mutually recursive functions is to enforce that each take |
574 | // a safepoint. Additionally, we need to ensure a poll before any call |
575 | // which can grow the stack by an unbounded amount. This isn't required |
576 | // for GC semantics per se, but is a common requirement for languages |
577 | // which detect stack overflow via guard pages and then throw exceptions. |
578 | if (auto *Call = dyn_cast<CallBase>(Val: Cursor)) { |
579 | if (doesNotRequireEntrySafepointBefore(Call)) |
580 | continue; |
581 | break; |
582 | } |
583 | } |
584 | |
585 | assert((HasNextInstruction(Cursor) || Cursor->isTerminator()) && |
586 | "either we stopped because of a call, or because of terminator" ); |
587 | |
588 | return Cursor; |
589 | } |
590 | |
591 | const char GCSafepointPollName[] = "gc.safepoint_poll" ; |
592 | |
593 | static bool isGCSafepointPoll(Function &F) { |
594 | return F.getName().equals(RHS: GCSafepointPollName); |
595 | } |
596 | |
597 | /// Returns true if this function should be rewritten to include safepoint |
598 | /// polls and parseable call sites. The main point of this function is to be |
599 | /// an extension point for custom logic. |
600 | static bool shouldRewriteFunction(Function &F) { |
601 | // TODO: This should check the GCStrategy |
602 | if (F.hasGC()) { |
603 | const auto &FunctionGCName = F.getGC(); |
604 | const StringRef StatepointExampleName("statepoint-example" ); |
605 | const StringRef CoreCLRName("coreclr" ); |
606 | return (StatepointExampleName == FunctionGCName) || |
607 | (CoreCLRName == FunctionGCName); |
608 | } else |
609 | return false; |
610 | } |
611 | |
612 | // TODO: These should become properties of the GCStrategy, possibly with |
613 | // command line overrides. |
614 | static bool enableEntrySafepoints(Function &F) { return !NoEntry; } |
615 | static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; } |
616 | static bool enableCallSafepoints(Function &F) { return !NoCall; } |
617 | |
618 | // Insert a safepoint poll immediately before the given instruction. Does |
619 | // not handle the parsability of state at the runtime call, that's the |
620 | // callers job. |
621 | static void |
622 | InsertSafepointPoll(BasicBlock::iterator InsertBefore, |
623 | std::vector<CallBase *> &ParsePointsNeeded /*rval*/, |
624 | const TargetLibraryInfo &TLI) { |
625 | BasicBlock *OrigBB = InsertBefore->getParent(); |
626 | Module *M = InsertBefore->getModule(); |
627 | assert(M && "must be part of a module" ); |
628 | |
629 | // Inline the safepoint poll implementation - this will get all the branch, |
630 | // control flow, etc.. Most importantly, it will introduce the actual slow |
631 | // path call - where we need to insert a safepoint (parsepoint). |
632 | |
633 | auto *F = M->getFunction(Name: GCSafepointPollName); |
634 | assert(F && "gc.safepoint_poll function is missing" ); |
635 | assert(F->getValueType() == |
636 | FunctionType::get(Type::getVoidTy(M->getContext()), false) && |
637 | "gc.safepoint_poll declared with wrong type" ); |
638 | assert(!F->empty() && "gc.safepoint_poll must be a non-empty function" ); |
639 | CallInst *PollCall = CallInst::Create(Func: F, NameStr: "" , InsertBefore); |
640 | |
641 | // Record some information about the call site we're replacing |
642 | BasicBlock::iterator Before(PollCall), After(PollCall); |
643 | bool IsBegin = false; |
644 | if (Before == OrigBB->begin()) |
645 | IsBegin = true; |
646 | else |
647 | Before--; |
648 | |
649 | After++; |
650 | assert(After != OrigBB->end() && "must have successor" ); |
651 | |
652 | // Do the actual inlining |
653 | InlineFunctionInfo IFI; |
654 | bool InlineStatus = InlineFunction(CB&: *PollCall, IFI).isSuccess(); |
655 | assert(InlineStatus && "inline must succeed" ); |
656 | (void)InlineStatus; // suppress warning in release-asserts |
657 | |
658 | // Check post-conditions |
659 | assert(IFI.StaticAllocas.empty() && "can't have allocs" ); |
660 | |
661 | std::vector<CallInst *> Calls; // new calls |
662 | DenseSet<BasicBlock *> BBs; // new BBs + insertee |
663 | |
664 | // Include only the newly inserted instructions, Note: begin may not be valid |
665 | // if we inserted to the beginning of the basic block |
666 | BasicBlock::iterator Start = IsBegin ? OrigBB->begin() : std::next(x: Before); |
667 | |
668 | // If your poll function includes an unreachable at the end, that's not |
669 | // valid. Bugpoint likes to create this, so check for it. |
670 | assert(isPotentiallyReachable(&*Start, &*After) && |
671 | "malformed poll function" ); |
672 | |
673 | scanInlinedCode(Start: &*Start, End: &*After, Calls, Seen&: BBs); |
674 | assert(!Calls.empty() && "slow path not found for safepoint poll" ); |
675 | |
676 | // Record the fact we need a parsable state at the runtime call contained in |
677 | // the poll function. This is required so that the runtime knows how to |
678 | // parse the last frame when we actually take the safepoint (i.e. execute |
679 | // the slow path) |
680 | assert(ParsePointsNeeded.empty()); |
681 | for (auto *CI : Calls) { |
682 | // No safepoint needed or wanted |
683 | if (!needsStatepoint(Call: CI, TLI)) |
684 | continue; |
685 | |
686 | // These are likely runtime calls. Should we assert that via calling |
687 | // convention or something? |
688 | ParsePointsNeeded.push_back(x: CI); |
689 | } |
690 | assert(ParsePointsNeeded.size() <= Calls.size()); |
691 | } |
692 | |