1//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file contains classes used to discover if for a particular value
9// there from sue to definition that crosses a suspend block.
10//
11// Using the information discovered we form a Coroutine Frame structure to
12// contain those values. All uses of those values are replaced with appropriate
13// GEP + load from the coroutine frame. At the point of the definition we spill
14// the value into the coroutine frame.
15//===----------------------------------------------------------------------===//
16
17#include "CoroInternal.h"
18#include "llvm/ADT/BitVector.h"
19#include "llvm/ADT/PostOrderIterator.h"
20#include "llvm/ADT/ScopeExit.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/Analysis/PtrUseVisitor.h"
23#include "llvm/Analysis/StackLifetime.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/IR/CFG.h"
26#include "llvm/IR/DIBuilder.h"
27#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstIterator.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/OptimizedStructLayout.h"
35#include "llvm/Support/circular_raw_ostream.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38#include "llvm/Transforms/Utils/Local.h"
39#include "llvm/Transforms/Utils/PromoteMemToReg.h"
40#include <algorithm>
41#include <deque>
42#include <optional>
43
44using namespace llvm;
45
46// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
47// "coro-frame", which results in leaner debug spew.
48#define DEBUG_TYPE "coro-suspend-crossing"
49
50enum { SmallVectorThreshold = 32 };
51
52// Provides two way mapping between the blocks and numbers.
53namespace {
54class BlockToIndexMapping {
55 SmallVector<BasicBlock *, SmallVectorThreshold> V;
56
57public:
58 size_t size() const { return V.size(); }
59
60 BlockToIndexMapping(Function &F) {
61 for (BasicBlock &BB : F)
62 V.push_back(Elt: &BB);
63 llvm::sort(C&: V);
64 }
65
66 size_t blockToIndex(BasicBlock const *BB) const {
67 auto *I = llvm::lower_bound(Range: V, Value&: BB);
68 assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
69 return I - V.begin();
70 }
71
72 BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
73};
74} // end anonymous namespace
75
76// The SuspendCrossingInfo maintains data that allows to answer a question
77// whether given two BasicBlocks A and B there is a path from A to B that
78// passes through a suspend point.
79//
80// For every basic block 'i' it maintains a BlockData that consists of:
81// Consumes: a bit vector which contains a set of indices of blocks that can
82// reach block 'i'. A block can trivially reach itself.
83// Kills: a bit vector which contains a set of indices of blocks that can
84// reach block 'i' but there is a path crossing a suspend point
85// not repeating 'i' (path to 'i' without cycles containing 'i').
86// Suspend: a boolean indicating whether block 'i' contains a suspend point.
87// End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
88// KillLoop: There is a path from 'i' to 'i' not otherwise repeating 'i' that
89// crosses a suspend point.
90//
91namespace {
92class SuspendCrossingInfo {
93 BlockToIndexMapping Mapping;
94
95 struct BlockData {
96 BitVector Consumes;
97 BitVector Kills;
98 bool Suspend = false;
99 bool End = false;
100 bool KillLoop = false;
101 bool Changed = false;
102 };
103 SmallVector<BlockData, SmallVectorThreshold> Block;
104
105 iterator_range<pred_iterator> predecessors(BlockData const &BD) const {
106 BasicBlock *BB = Mapping.indexToBlock(Index: &BD - &Block[0]);
107 return llvm::predecessors(BB);
108 }
109
110 BlockData &getBlockData(BasicBlock *BB) {
111 return Block[Mapping.blockToIndex(BB)];
112 }
113
114 /// Compute the BlockData for the current function in one iteration.
115 /// Initialize - Whether this is the first iteration, we can optimize
116 /// the initial case a little bit by manual loop switch.
117 /// Returns whether the BlockData changes in this iteration.
118 template <bool Initialize = false>
119 bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
120
121public:
122#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
123 void dump() const;
124 void dump(StringRef Label, BitVector const &BV) const;
125#endif
126
127 SuspendCrossingInfo(Function &F, coro::Shape &Shape);
128
129 /// Returns true if there is a path from \p From to \p To crossing a suspend
130 /// point without crossing \p From a 2nd time.
131 bool hasPathCrossingSuspendPoint(BasicBlock *From, BasicBlock *To) const {
132 size_t const FromIndex = Mapping.blockToIndex(BB: From);
133 size_t const ToIndex = Mapping.blockToIndex(BB: To);
134 bool const Result = Block[ToIndex].Kills[FromIndex];
135 LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
136 << " answer is " << Result << "\n");
137 return Result;
138 }
139
140 /// Returns true if there is a path from \p From to \p To crossing a suspend
141 /// point without crossing \p From a 2nd time. If \p From is the same as \p To
142 /// this will also check if there is a looping path crossing a suspend point.
143 bool hasPathOrLoopCrossingSuspendPoint(BasicBlock *From,
144 BasicBlock *To) const {
145 size_t const FromIndex = Mapping.blockToIndex(BB: From);
146 size_t const ToIndex = Mapping.blockToIndex(BB: To);
147 bool Result = Block[ToIndex].Kills[FromIndex] ||
148 (From == To && Block[ToIndex].KillLoop);
149 LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
150 << " answer is " << Result << " (path or loop)\n");
151 return Result;
152 }
153
154 bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
155 auto *I = cast<Instruction>(Val: U);
156
157 // We rewrote PHINodes, so that only the ones with exactly one incoming
158 // value need to be analyzed.
159 if (auto *PN = dyn_cast<PHINode>(Val: I))
160 if (PN->getNumIncomingValues() > 1)
161 return false;
162
163 BasicBlock *UseBB = I->getParent();
164
165 // As a special case, treat uses by an llvm.coro.suspend.retcon or an
166 // llvm.coro.suspend.async as if they were uses in the suspend's single
167 // predecessor: the uses conceptually occur before the suspend.
168 if (isa<CoroSuspendRetconInst>(Val: I) || isa<CoroSuspendAsyncInst>(Val: I)) {
169 UseBB = UseBB->getSinglePredecessor();
170 assert(UseBB && "should have split coro.suspend into its own block");
171 }
172
173 return hasPathCrossingSuspendPoint(From: DefBB, To: UseBB);
174 }
175
176 bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
177 return isDefinitionAcrossSuspend(DefBB: &A.getParent()->getEntryBlock(), U);
178 }
179
180 bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
181 auto *DefBB = I.getParent();
182
183 // As a special case, treat values produced by an llvm.coro.suspend.*
184 // as if they were defined in the single successor: the uses
185 // conceptually occur after the suspend.
186 if (isa<AnyCoroSuspendInst>(Val: I)) {
187 DefBB = DefBB->getSingleSuccessor();
188 assert(DefBB && "should have split coro.suspend into its own block");
189 }
190
191 return isDefinitionAcrossSuspend(DefBB, U);
192 }
193
194 bool isDefinitionAcrossSuspend(Value &V, User *U) const {
195 if (auto *Arg = dyn_cast<Argument>(Val: &V))
196 return isDefinitionAcrossSuspend(A&: *Arg, U);
197 if (auto *Inst = dyn_cast<Instruction>(Val: &V))
198 return isDefinitionAcrossSuspend(I&: *Inst, U);
199
200 llvm_unreachable(
201 "Coroutine could only collect Argument and Instruction now.");
202 }
203};
204} // end anonymous namespace
205
206#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
207LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
208 BitVector const &BV) const {
209 dbgs() << Label << ":";
210 for (size_t I = 0, N = BV.size(); I < N; ++I)
211 if (BV[I])
212 dbgs() << " " << Mapping.indexToBlock(Index: I)->getName();
213 dbgs() << "\n";
214}
215
216LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
217 for (size_t I = 0, N = Block.size(); I < N; ++I) {
218 BasicBlock *const B = Mapping.indexToBlock(Index: I);
219 dbgs() << B->getName() << ":\n";
220 dump(Label: " Consumes", BV: Block[I].Consumes);
221 dump(Label: " Kills", BV: Block[I].Kills);
222 }
223 dbgs() << "\n";
224}
225#endif
226
227template <bool Initialize>
228bool SuspendCrossingInfo::computeBlockData(
229 const ReversePostOrderTraversal<Function *> &RPOT) {
230 bool Changed = false;
231
232 for (const BasicBlock *BB : RPOT) {
233 auto BBNo = Mapping.blockToIndex(BB);
234 auto &B = Block[BBNo];
235
236 // We don't need to count the predecessors when initialization.
237 if constexpr (!Initialize)
238 // If all the predecessors of the current Block don't change,
239 // the BlockData for the current block must not change too.
240 if (all_of(predecessors(BD: B), [this](BasicBlock *BB) {
241 return !Block[Mapping.blockToIndex(BB)].Changed;
242 })) {
243 B.Changed = false;
244 continue;
245 }
246
247 // Saved Consumes and Kills bitsets so that it is easy to see
248 // if anything changed after propagation.
249 auto SavedConsumes = B.Consumes;
250 auto SavedKills = B.Kills;
251
252 for (BasicBlock *PI : predecessors(BD: B)) {
253 auto PrevNo = Mapping.blockToIndex(BB: PI);
254 auto &P = Block[PrevNo];
255
256 // Propagate Kills and Consumes from predecessors into B.
257 B.Consumes |= P.Consumes;
258 B.Kills |= P.Kills;
259
260 // If block P is a suspend block, it should propagate kills into block
261 // B for every block P consumes.
262 if (P.Suspend)
263 B.Kills |= P.Consumes;
264 }
265
266 if (B.Suspend) {
267 // If block B is a suspend block, it should kill all of the blocks it
268 // consumes.
269 B.Kills |= B.Consumes;
270 } else if (B.End) {
271 // If block B is an end block, it should not propagate kills as the
272 // blocks following coro.end() are reached during initial invocation
273 // of the coroutine while all the data are still available on the
274 // stack or in the registers.
275 B.Kills.reset();
276 } else {
277 // This is reached when B block it not Suspend nor coro.end and it
278 // need to make sure that it is not in the kill set.
279 B.KillLoop |= B.Kills[BBNo];
280 B.Kills.reset(Idx: BBNo);
281 }
282
283 if constexpr (!Initialize) {
284 B.Changed = (B.Kills != SavedKills) || (B.Consumes != SavedConsumes);
285 Changed |= B.Changed;
286 }
287 }
288
289 return Changed;
290}
291
292SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
293 : Mapping(F) {
294 const size_t N = Mapping.size();
295 Block.resize(N);
296
297 // Initialize every block so that it consumes itself
298 for (size_t I = 0; I < N; ++I) {
299 auto &B = Block[I];
300 B.Consumes.resize(N);
301 B.Kills.resize(N);
302 B.Consumes.set(I);
303 B.Changed = true;
304 }
305
306 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
307 // the code beyond coro.end is reachable during initial invocation of the
308 // coroutine.
309 for (auto *CE : Shape.CoroEnds)
310 getBlockData(BB: CE->getParent()).End = true;
311
312 // Mark all suspend blocks and indicate that they kill everything they
313 // consume. Note, that crossing coro.save also requires a spill, as any code
314 // between coro.save and coro.suspend may resume the coroutine and all of the
315 // state needs to be saved by that time.
316 auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
317 BasicBlock *SuspendBlock = BarrierInst->getParent();
318 auto &B = getBlockData(BB: SuspendBlock);
319 B.Suspend = true;
320 B.Kills |= B.Consumes;
321 };
322 for (auto *CSI : Shape.CoroSuspends) {
323 markSuspendBlock(CSI);
324 if (auto *Save = CSI->getCoroSave())
325 markSuspendBlock(Save);
326 }
327
328 // It is considered to be faster to use RPO traversal for forward-edges
329 // dataflow analysis.
330 ReversePostOrderTraversal<Function *> RPOT(&F);
331 computeBlockData</*Initialize=*/true>(RPOT);
332 while (computeBlockData</*Initialize*/ false>(RPOT))
333 ;
334
335 LLVM_DEBUG(dump());
336}
337
338namespace {
339
340// RematGraph is used to construct a DAG for rematerializable instructions
341// When the constructor is invoked with a candidate instruction (which is
342// materializable) it builds a DAG of materializable instructions from that
343// point.
344// Typically, for each instruction identified as re-materializable across a
345// suspend point, a RematGraph will be created.
346struct RematGraph {
347 // Each RematNode in the graph contains the edges to instructions providing
348 // operands in the current node.
349 struct RematNode {
350 Instruction *Node;
351 SmallVector<RematNode *> Operands;
352 RematNode() = default;
353 RematNode(Instruction *V) : Node(V) {}
354 };
355
356 RematNode *EntryNode;
357 using RematNodeMap =
358 SmallMapVector<Instruction *, std::unique_ptr<RematNode>, 8>;
359 RematNodeMap Remats;
360 const std::function<bool(Instruction &)> &MaterializableCallback;
361 SuspendCrossingInfo &Checker;
362
363 RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
364 Instruction *I, SuspendCrossingInfo &Checker)
365 : MaterializableCallback(MaterializableCallback), Checker(Checker) {
366 std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(args&: I);
367 EntryNode = FirstNode.get();
368 std::deque<std::unique_ptr<RematNode>> WorkList;
369 addNode(NUPtr: std::move(FirstNode), WorkList, FirstUse: cast<User>(Val: I));
370 while (WorkList.size()) {
371 std::unique_ptr<RematNode> N = std::move(WorkList.front());
372 WorkList.pop_front();
373 addNode(NUPtr: std::move(N), WorkList, FirstUse: cast<User>(Val: I));
374 }
375 }
376
377 void addNode(std::unique_ptr<RematNode> NUPtr,
378 std::deque<std::unique_ptr<RematNode>> &WorkList,
379 User *FirstUse) {
380 RematNode *N = NUPtr.get();
381 if (Remats.count(Key: N->Node))
382 return;
383
384 // We haven't see this node yet - add to the list
385 Remats[N->Node] = std::move(NUPtr);
386 for (auto &Def : N->Node->operands()) {
387 Instruction *D = dyn_cast<Instruction>(Val: Def.get());
388 if (!D || !MaterializableCallback(*D) ||
389 !Checker.isDefinitionAcrossSuspend(I&: *D, U: FirstUse))
390 continue;
391
392 if (Remats.count(Key: D)) {
393 // Already have this in the graph
394 N->Operands.push_back(Elt: Remats[D].get());
395 continue;
396 }
397
398 bool NoMatch = true;
399 for (auto &I : WorkList) {
400 if (I->Node == D) {
401 NoMatch = false;
402 N->Operands.push_back(Elt: I.get());
403 break;
404 }
405 }
406 if (NoMatch) {
407 // Create a new node
408 std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(args&: D);
409 N->Operands.push_back(Elt: ChildNode.get());
410 WorkList.push_back(x: std::move(ChildNode));
411 }
412 }
413 }
414
415#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
416 void dump() const {
417 dbgs() << "Entry (";
418 if (EntryNode->Node->getParent()->hasName())
419 dbgs() << EntryNode->Node->getParent()->getName();
420 else
421 EntryNode->Node->getParent()->printAsOperand(O&: dbgs(), PrintType: false);
422 dbgs() << ") : " << *EntryNode->Node << "\n";
423 for (auto &E : Remats) {
424 dbgs() << *(E.first) << "\n";
425 for (RematNode *U : E.second->Operands)
426 dbgs() << " " << *U->Node << "\n";
427 }
428 }
429#endif
430};
431} // end anonymous namespace
432
433namespace llvm {
434
435template <> struct GraphTraits<RematGraph *> {
436 using NodeRef = RematGraph::RematNode *;
437 using ChildIteratorType = RematGraph::RematNode **;
438
439 static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; }
440 static ChildIteratorType child_begin(NodeRef N) {
441 return N->Operands.begin();
442 }
443 static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
444};
445
446} // end namespace llvm
447
448#undef DEBUG_TYPE // "coro-suspend-crossing"
449#define DEBUG_TYPE "coro-frame"
450
451namespace {
452class FrameTypeBuilder;
453// Mapping from the to-be-spilled value to all the users that need reload.
454using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>;
455struct AllocaInfo {
456 AllocaInst *Alloca;
457 DenseMap<Instruction *, std::optional<APInt>> Aliases;
458 bool MayWriteBeforeCoroBegin;
459 AllocaInfo(AllocaInst *Alloca,
460 DenseMap<Instruction *, std::optional<APInt>> Aliases,
461 bool MayWriteBeforeCoroBegin)
462 : Alloca(Alloca), Aliases(std::move(Aliases)),
463 MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
464};
465struct FrameDataInfo {
466 // All the values (that are not allocas) that needs to be spilled to the
467 // frame.
468 SpillInfo Spills;
469 // Allocas contains all values defined as allocas that need to live in the
470 // frame.
471 SmallVector<AllocaInfo, 8> Allocas;
472
473 SmallVector<Value *, 8> getAllDefs() const {
474 SmallVector<Value *, 8> Defs;
475 for (const auto &P : Spills)
476 Defs.push_back(Elt: P.first);
477 for (const auto &A : Allocas)
478 Defs.push_back(Elt: A.Alloca);
479 return Defs;
480 }
481
482 uint32_t getFieldIndex(Value *V) const {
483 auto Itr = FieldIndexMap.find(Val: V);
484 assert(Itr != FieldIndexMap.end() &&
485 "Value does not have a frame field index");
486 return Itr->second;
487 }
488
489 void setFieldIndex(Value *V, uint32_t Index) {
490 assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
491 "Cannot set the index for the same field twice.");
492 FieldIndexMap[V] = Index;
493 }
494
495 Align getAlign(Value *V) const {
496 auto Iter = FieldAlignMap.find(Val: V);
497 assert(Iter != FieldAlignMap.end());
498 return Iter->second;
499 }
500
501 void setAlign(Value *V, Align AL) {
502 assert(FieldAlignMap.count(V) == 0);
503 FieldAlignMap.insert(KV: {V, AL});
504 }
505
506 uint64_t getDynamicAlign(Value *V) const {
507 auto Iter = FieldDynamicAlignMap.find(Val: V);
508 assert(Iter != FieldDynamicAlignMap.end());
509 return Iter->second;
510 }
511
512 void setDynamicAlign(Value *V, uint64_t Align) {
513 assert(FieldDynamicAlignMap.count(V) == 0);
514 FieldDynamicAlignMap.insert(KV: {V, Align});
515 }
516
517 uint64_t getOffset(Value *V) const {
518 auto Iter = FieldOffsetMap.find(Val: V);
519 assert(Iter != FieldOffsetMap.end());
520 return Iter->second;
521 }
522
523 void setOffset(Value *V, uint64_t Offset) {
524 assert(FieldOffsetMap.count(V) == 0);
525 FieldOffsetMap.insert(KV: {V, Offset});
526 }
527
528 // Remap the index of every field in the frame, using the final layout index.
529 void updateLayoutIndex(FrameTypeBuilder &B);
530
531private:
532 // LayoutIndexUpdateStarted is used to avoid updating the index of any field
533 // twice by mistake.
534 bool LayoutIndexUpdateStarted = false;
535 // Map from values to their slot indexes on the frame. They will be first set
536 // with their original insertion field index. After the frame is built, their
537 // indexes will be updated into the final layout index.
538 DenseMap<Value *, uint32_t> FieldIndexMap;
539 // Map from values to their alignment on the frame. They would be set after
540 // the frame is built.
541 DenseMap<Value *, Align> FieldAlignMap;
542 DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
543 // Map from values to their offset on the frame. They would be set after
544 // the frame is built.
545 DenseMap<Value *, uint64_t> FieldOffsetMap;
546};
547} // namespace
548
549#ifndef NDEBUG
550static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
551 dbgs() << "------------- " << Title << "--------------\n";
552 for (const auto &E : Spills) {
553 E.first->dump();
554 dbgs() << " user: ";
555 for (auto *I : E.second)
556 I->dump();
557 }
558}
559static void dumpRemats(
560 StringRef Title,
561 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) {
562 dbgs() << "------------- " << Title << "--------------\n";
563 for (const auto &E : RM) {
564 E.second->dump();
565 dbgs() << "--\n";
566 }
567}
568
569static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
570 dbgs() << "------------- Allocas --------------\n";
571 for (const auto &A : Allocas) {
572 A.Alloca->dump();
573 }
574}
575#endif
576
577namespace {
578using FieldIDType = size_t;
579// We cannot rely solely on natural alignment of a type when building a
580// coroutine frame and if the alignment specified on the Alloca instruction
581// differs from the natural alignment of the alloca type we will need to insert
582// padding.
583class FrameTypeBuilder {
584private:
585 struct Field {
586 uint64_t Size;
587 uint64_t Offset;
588 Type *Ty;
589 FieldIDType LayoutFieldIndex;
590 Align Alignment;
591 Align TyAlignment;
592 uint64_t DynamicAlignBuffer;
593 };
594
595 const DataLayout &DL;
596 LLVMContext &Context;
597 uint64_t StructSize = 0;
598 Align StructAlign;
599 bool IsFinished = false;
600
601 std::optional<Align> MaxFrameAlignment;
602
603 SmallVector<Field, 8> Fields;
604 DenseMap<Value*, unsigned> FieldIndexByKey;
605
606public:
607 FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
608 std::optional<Align> MaxFrameAlignment)
609 : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
610
611 /// Add a field to this structure for the storage of an `alloca`
612 /// instruction.
613 [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
614 bool IsHeader = false) {
615 Type *Ty = AI->getAllocatedType();
616
617 // Make an array type if this is a static array allocation.
618 if (AI->isArrayAllocation()) {
619 if (auto *CI = dyn_cast<ConstantInt>(Val: AI->getArraySize()))
620 Ty = ArrayType::get(ElementType: Ty, NumElements: CI->getValue().getZExtValue());
621 else
622 report_fatal_error(reason: "Coroutines cannot handle non static allocas yet");
623 }
624
625 return addField(Ty, MaybeFieldAlignment: AI->getAlign(), IsHeader);
626 }
627
628 /// We want to put the allocas whose lifetime-ranges are not overlapped
629 /// into one slot of coroutine frame.
630 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
631 ///
632 /// cppcoro::task<void> alternative_paths(bool cond) {
633 /// if (cond) {
634 /// big_structure a;
635 /// process(a);
636 /// co_await something();
637 /// } else {
638 /// big_structure b;
639 /// process2(b);
640 /// co_await something();
641 /// }
642 /// }
643 ///
644 /// We want to put variable a and variable b in the same slot to
645 /// reduce the size of coroutine frame.
646 ///
647 /// This function use StackLifetime algorithm to partition the AllocaInsts in
648 /// Spills to non-overlapped sets in order to put Alloca in the same
649 /// non-overlapped set into the same slot in the Coroutine Frame. Then add
650 /// field for the allocas in the same non-overlapped set by using the largest
651 /// type as the field type.
652 ///
653 /// Side Effects: Because We sort the allocas, the order of allocas in the
654 /// frame may be different with the order in the source code.
655 void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
656 coro::Shape &Shape);
657
658 /// Add a field to this structure.
659 [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
660 bool IsHeader = false,
661 bool IsSpillOfValue = false) {
662 assert(!IsFinished && "adding fields to a finished builder");
663 assert(Ty && "must provide a type for a field");
664
665 // The field size is always the alloc size of the type.
666 uint64_t FieldSize = DL.getTypeAllocSize(Ty);
667
668 // For an alloca with size=0, we don't need to add a field and they
669 // can just point to any index in the frame. Use index 0.
670 if (FieldSize == 0) {
671 return 0;
672 }
673
674 // The field alignment might not be the type alignment, but we need
675 // to remember the type alignment anyway to build the type.
676 // If we are spilling values we don't need to worry about ABI alignment
677 // concerns.
678 Align ABIAlign = DL.getABITypeAlign(Ty);
679 Align TyAlignment = ABIAlign;
680 if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
681 TyAlignment = *MaxFrameAlignment;
682 Align FieldAlignment = MaybeFieldAlignment.value_or(u&: TyAlignment);
683
684 // The field alignment could be bigger than the max frame case, in that case
685 // we request additional storage to be able to dynamically align the
686 // pointer.
687 uint64_t DynamicAlignBuffer = 0;
688 if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
689 DynamicAlignBuffer =
690 offsetToAlignment(Value: MaxFrameAlignment->value(), Alignment: FieldAlignment);
691 FieldAlignment = *MaxFrameAlignment;
692 FieldSize = FieldSize + DynamicAlignBuffer;
693 }
694
695 // Lay out header fields immediately.
696 uint64_t Offset;
697 if (IsHeader) {
698 Offset = alignTo(Size: StructSize, A: FieldAlignment);
699 StructSize = Offset + FieldSize;
700
701 // Everything else has a flexible offset.
702 } else {
703 Offset = OptimizedStructLayoutField::FlexibleOffset;
704 }
705
706 Fields.push_back(Elt: {.Size: FieldSize, .Offset: Offset, .Ty: Ty, .LayoutFieldIndex: 0, .Alignment: FieldAlignment, .TyAlignment: TyAlignment,
707 .DynamicAlignBuffer: DynamicAlignBuffer});
708 return Fields.size() - 1;
709 }
710
711 /// Finish the layout and set the body on the given type.
712 void finish(StructType *Ty);
713
714 uint64_t getStructSize() const {
715 assert(IsFinished && "not yet finished!");
716 return StructSize;
717 }
718
719 Align getStructAlign() const {
720 assert(IsFinished && "not yet finished!");
721 return StructAlign;
722 }
723
724 FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
725 assert(IsFinished && "not yet finished!");
726 return Fields[Id].LayoutFieldIndex;
727 }
728
729 Field getLayoutField(FieldIDType Id) const {
730 assert(IsFinished && "not yet finished!");
731 return Fields[Id];
732 }
733};
734} // namespace
735
736void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
737 auto Updater = [&](Value *I) {
738 auto Field = B.getLayoutField(Id: getFieldIndex(V: I));
739 setFieldIndex(V: I, Index: Field.LayoutFieldIndex);
740 setAlign(V: I, AL: Field.Alignment);
741 uint64_t dynamicAlign =
742 Field.DynamicAlignBuffer
743 ? Field.DynamicAlignBuffer + Field.Alignment.value()
744 : 0;
745 setDynamicAlign(V: I, Align: dynamicAlign);
746 setOffset(V: I, Offset: Field.Offset);
747 };
748 LayoutIndexUpdateStarted = true;
749 for (auto &S : Spills)
750 Updater(S.first);
751 for (const auto &A : Allocas)
752 Updater(A.Alloca);
753 LayoutIndexUpdateStarted = false;
754}
755
756void FrameTypeBuilder::addFieldForAllocas(const Function &F,
757 FrameDataInfo &FrameData,
758 coro::Shape &Shape) {
759 using AllocaSetType = SmallVector<AllocaInst *, 4>;
760 SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
761
762 // We need to add field for allocas at the end of this function.
763 auto AddFieldForAllocasAtExit = make_scope_exit(F: [&]() {
764 for (auto AllocaList : NonOverlapedAllocas) {
765 auto *LargestAI = *AllocaList.begin();
766 FieldIDType Id = addFieldForAlloca(AI: LargestAI);
767 for (auto *Alloca : AllocaList)
768 FrameData.setFieldIndex(V: Alloca, Index: Id);
769 }
770 });
771
772 if (!Shape.OptimizeFrame) {
773 for (const auto &A : FrameData.Allocas) {
774 AllocaInst *Alloca = A.Alloca;
775 NonOverlapedAllocas.emplace_back(Args: AllocaSetType(1, Alloca));
776 }
777 return;
778 }
779
780 // Because there are paths from the lifetime.start to coro.end
781 // for each alloca, the liferanges for every alloca is overlaped
782 // in the blocks who contain coro.end and the successor blocks.
783 // So we choose to skip there blocks when we calculate the liferange
784 // for each alloca. It should be reasonable since there shouldn't be uses
785 // in these blocks and the coroutine frame shouldn't be used outside the
786 // coroutine body.
787 //
788 // Note that the user of coro.suspend may not be SwitchInst. However, this
789 // case seems too complex to handle. And it is harmless to skip these
790 // patterns since it just prevend putting the allocas to live in the same
791 // slot.
792 DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
793 for (auto *CoroSuspendInst : Shape.CoroSuspends) {
794 for (auto *U : CoroSuspendInst->users()) {
795 if (auto *ConstSWI = dyn_cast<SwitchInst>(Val: U)) {
796 auto *SWI = const_cast<SwitchInst *>(ConstSWI);
797 DefaultSuspendDest[SWI] = SWI->getDefaultDest();
798 SWI->setDefaultDest(SWI->getSuccessor(idx: 1));
799 }
800 }
801 }
802
803 auto ExtractAllocas = [&]() {
804 AllocaSetType Allocas;
805 Allocas.reserve(N: FrameData.Allocas.size());
806 for (const auto &A : FrameData.Allocas)
807 Allocas.push_back(Elt: A.Alloca);
808 return Allocas;
809 };
810 StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
811 StackLifetime::LivenessType::May);
812 StackLifetimeAnalyzer.run();
813 auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
814 return StackLifetimeAnalyzer.getLiveRange(AI: AI1).overlaps(
815 Other: StackLifetimeAnalyzer.getLiveRange(AI: AI2));
816 };
817 auto GetAllocaSize = [&](const AllocaInfo &A) {
818 std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
819 assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
820 assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
821 return RetSize->getFixedValue();
822 };
823 // Put larger allocas in the front. So the larger allocas have higher
824 // priority to merge, which can save more space potentially. Also each
825 // AllocaSet would be ordered. So we can get the largest Alloca in one
826 // AllocaSet easily.
827 sort(C&: FrameData.Allocas, Comp: [&](const auto &Iter1, const auto &Iter2) {
828 return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
829 });
830 for (const auto &A : FrameData.Allocas) {
831 AllocaInst *Alloca = A.Alloca;
832 bool Merged = false;
833 // Try to find if the Alloca is not inferenced with any existing
834 // NonOverlappedAllocaSet. If it is true, insert the alloca to that
835 // NonOverlappedAllocaSet.
836 for (auto &AllocaSet : NonOverlapedAllocas) {
837 assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
838 bool NoInference = none_of(Range&: AllocaSet, P: [&](auto Iter) {
839 return IsAllocaInferenre(Alloca, Iter);
840 });
841 // If the alignment of A is multiple of the alignment of B, the address
842 // of A should satisfy the requirement for aligning for B.
843 //
844 // There may be other more fine-grained strategies to handle the alignment
845 // infomation during the merging process. But it seems hard to handle
846 // these strategies and benefit little.
847 bool Alignable = [&]() -> bool {
848 auto *LargestAlloca = *AllocaSet.begin();
849 return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
850 0;
851 }();
852 bool CouldMerge = NoInference && Alignable;
853 if (!CouldMerge)
854 continue;
855 AllocaSet.push_back(Elt: Alloca);
856 Merged = true;
857 break;
858 }
859 if (!Merged) {
860 NonOverlapedAllocas.emplace_back(Args: AllocaSetType(1, Alloca));
861 }
862 }
863 // Recover the default target destination for each Switch statement
864 // reserved.
865 for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
866 SwitchInst *SWI = SwitchAndDefaultDest.first;
867 BasicBlock *DestBB = SwitchAndDefaultDest.second;
868 SWI->setDefaultDest(DestBB);
869 }
870 // This Debug Info could tell us which allocas are merged into one slot.
871 LLVM_DEBUG(for (auto &AllocaSet
872 : NonOverlapedAllocas) {
873 if (AllocaSet.size() > 1) {
874 dbgs() << "In Function:" << F.getName() << "\n";
875 dbgs() << "Find Union Set "
876 << "\n";
877 dbgs() << "\tAllocas are \n";
878 for (auto Alloca : AllocaSet)
879 dbgs() << "\t\t" << *Alloca << "\n";
880 }
881 });
882}
883
884void FrameTypeBuilder::finish(StructType *Ty) {
885 assert(!IsFinished && "already finished!");
886
887 // Prepare the optimal-layout field array.
888 // The Id in the layout field is a pointer to our Field for it.
889 SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
890 LayoutFields.reserve(N: Fields.size());
891 for (auto &Field : Fields) {
892 LayoutFields.emplace_back(Args: &Field, Args&: Field.Size, Args&: Field.Alignment,
893 Args&: Field.Offset);
894 }
895
896 // Perform layout.
897 auto SizeAndAlign = performOptimizedStructLayout(Fields: LayoutFields);
898 StructSize = SizeAndAlign.first;
899 StructAlign = SizeAndAlign.second;
900
901 auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
902 return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
903 };
904
905 // We need to produce a packed struct type if there's a field whose
906 // assigned offset isn't a multiple of its natural type alignment.
907 bool Packed = [&] {
908 for (auto &LayoutField : LayoutFields) {
909 auto &F = getField(LayoutField);
910 if (!isAligned(Lhs: F.TyAlignment, SizeInBytes: LayoutField.Offset))
911 return true;
912 }
913 return false;
914 }();
915
916 // Build the struct body.
917 SmallVector<Type*, 16> FieldTypes;
918 FieldTypes.reserve(N: LayoutFields.size() * 3 / 2);
919 uint64_t LastOffset = 0;
920 for (auto &LayoutField : LayoutFields) {
921 auto &F = getField(LayoutField);
922
923 auto Offset = LayoutField.Offset;
924
925 // Add a padding field if there's a padding gap and we're either
926 // building a packed struct or the padding gap is more than we'd
927 // get from aligning to the field type's natural alignment.
928 assert(Offset >= LastOffset);
929 if (Offset != LastOffset) {
930 if (Packed || alignTo(Size: LastOffset, A: F.TyAlignment) != Offset)
931 FieldTypes.push_back(Elt: ArrayType::get(ElementType: Type::getInt8Ty(C&: Context),
932 NumElements: Offset - LastOffset));
933 }
934
935 F.Offset = Offset;
936 F.LayoutFieldIndex = FieldTypes.size();
937
938 FieldTypes.push_back(Elt: F.Ty);
939 if (F.DynamicAlignBuffer) {
940 FieldTypes.push_back(
941 Elt: ArrayType::get(ElementType: Type::getInt8Ty(C&: Context), NumElements: F.DynamicAlignBuffer));
942 }
943 LastOffset = Offset + F.Size;
944 }
945
946 Ty->setBody(Elements: FieldTypes, isPacked: Packed);
947
948#ifndef NDEBUG
949 // Check that the IR layout matches the offsets we expect.
950 auto Layout = DL.getStructLayout(Ty);
951 for (auto &F : Fields) {
952 assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
953 assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
954 }
955#endif
956
957 IsFinished = true;
958}
959
960static void cacheDIVar(FrameDataInfo &FrameData,
961 DenseMap<Value *, DILocalVariable *> &DIVarCache) {
962 for (auto *V : FrameData.getAllDefs()) {
963 if (DIVarCache.contains(Val: V))
964 continue;
965
966 auto CacheIt = [&DIVarCache, V](const auto &Container) {
967 auto *I = llvm::find_if(Container, [](auto *DDI) {
968 return DDI->getExpression()->getNumElements() == 0;
969 });
970 if (I != Container.end())
971 DIVarCache.insert({V, (*I)->getVariable()});
972 };
973 CacheIt(findDbgDeclares(V));
974 CacheIt(findDVRDeclares(V));
975 }
976}
977
978/// Create name for Type. It uses MDString to store new created string to
979/// avoid memory leak.
980static StringRef solveTypeName(Type *Ty) {
981 if (Ty->isIntegerTy()) {
982 // The longest name in common may be '__int_128', which has 9 bits.
983 SmallString<16> Buffer;
984 raw_svector_ostream OS(Buffer);
985 OS << "__int_" << cast<IntegerType>(Val: Ty)->getBitWidth();
986 auto *MDName = MDString::get(Context&: Ty->getContext(), Str: OS.str());
987 return MDName->getString();
988 }
989
990 if (Ty->isFloatingPointTy()) {
991 if (Ty->isFloatTy())
992 return "__float_";
993 if (Ty->isDoubleTy())
994 return "__double_";
995 return "__floating_type_";
996 }
997
998 if (Ty->isPointerTy())
999 return "PointerType";
1000
1001 if (Ty->isStructTy()) {
1002 if (!cast<StructType>(Val: Ty)->hasName())
1003 return "__LiteralStructType_";
1004
1005 auto Name = Ty->getStructName();
1006
1007 SmallString<16> Buffer(Name);
1008 for (auto &Iter : Buffer)
1009 if (Iter == '.' || Iter == ':')
1010 Iter = '_';
1011 auto *MDName = MDString::get(Context&: Ty->getContext(), Str: Buffer.str());
1012 return MDName->getString();
1013 }
1014
1015 return "UnknownType";
1016}
1017
1018static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
1019 const DataLayout &Layout, DIScope *Scope,
1020 unsigned LineNum,
1021 DenseMap<Type *, DIType *> &DITypeCache) {
1022 if (DIType *DT = DITypeCache.lookup(Val: Ty))
1023 return DT;
1024
1025 StringRef Name = solveTypeName(Ty);
1026
1027 DIType *RetType = nullptr;
1028
1029 if (Ty->isIntegerTy()) {
1030 auto BitWidth = cast<IntegerType>(Val: Ty)->getBitWidth();
1031 RetType = Builder.createBasicType(Name, SizeInBits: BitWidth, Encoding: dwarf::DW_ATE_signed,
1032 Flags: llvm::DINode::FlagArtificial);
1033 } else if (Ty->isFloatingPointTy()) {
1034 RetType = Builder.createBasicType(Name, SizeInBits: Layout.getTypeSizeInBits(Ty),
1035 Encoding: dwarf::DW_ATE_float,
1036 Flags: llvm::DINode::FlagArtificial);
1037 } else if (Ty->isPointerTy()) {
1038 // Construct PointerType points to null (aka void *) instead of exploring
1039 // pointee type to avoid infinite search problem. For example, we would be
1040 // in trouble if we traverse recursively:
1041 //
1042 // struct Node {
1043 // Node* ptr;
1044 // };
1045 RetType =
1046 Builder.createPointerType(PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty),
1047 AlignInBits: Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
1048 /*DWARFAddressSpace=*/std::nullopt, Name);
1049 } else if (Ty->isStructTy()) {
1050 auto *DIStruct = Builder.createStructType(
1051 Scope, Name, File: Scope->getFile(), LineNumber: LineNum, SizeInBits: Layout.getTypeSizeInBits(Ty),
1052 AlignInBits: Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
1053 Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr, Elements: llvm::DINodeArray());
1054
1055 auto *StructTy = cast<StructType>(Val: Ty);
1056 SmallVector<Metadata *, 16> Elements;
1057 for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
1058 DIType *DITy = solveDIType(Builder, Ty: StructTy->getElementType(N: I), Layout,
1059 Scope, LineNum, DITypeCache);
1060 assert(DITy);
1061 Elements.push_back(Elt: Builder.createMemberType(
1062 Scope, Name: DITy->getName(), File: Scope->getFile(), LineNo: LineNum,
1063 SizeInBits: DITy->getSizeInBits(), AlignInBits: DITy->getAlignInBits(),
1064 OffsetInBits: Layout.getStructLayout(Ty: StructTy)->getElementOffsetInBits(Idx: I),
1065 Flags: llvm::DINode::FlagArtificial, Ty: DITy));
1066 }
1067
1068 Builder.replaceArrays(T&: DIStruct, Elements: Builder.getOrCreateArray(Elements));
1069
1070 RetType = DIStruct;
1071 } else {
1072 LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
1073 TypeSize Size = Layout.getTypeSizeInBits(Ty);
1074 auto *CharSizeType = Builder.createBasicType(
1075 Name, SizeInBits: 8, Encoding: dwarf::DW_ATE_unsigned_char, Flags: llvm::DINode::FlagArtificial);
1076
1077 if (Size <= 8)
1078 RetType = CharSizeType;
1079 else {
1080 if (Size % 8 != 0)
1081 Size = TypeSize::getFixed(ExactSize: Size + 8 - (Size % 8));
1082
1083 RetType = Builder.createArrayType(
1084 Size, AlignInBits: Layout.getPrefTypeAlign(Ty).value(), Ty: CharSizeType,
1085 Subscripts: Builder.getOrCreateArray(Elements: Builder.getOrCreateSubrange(Lo: 0, Count: Size / 8)));
1086 }
1087 }
1088
1089 DITypeCache.insert(KV: {Ty, RetType});
1090 return RetType;
1091}
1092
1093/// Build artificial debug info for C++ coroutine frames to allow users to
1094/// inspect the contents of the frame directly
1095///
1096/// Create Debug information for coroutine frame with debug name "__coro_frame".
1097/// The debug information for the fields of coroutine frame is constructed from
1098/// the following way:
1099/// 1. For all the value in the Frame, we search the use of dbg.declare to find
1100/// the corresponding debug variables for the value. If we can find the
1101/// debug variable, we can get full and accurate debug information.
1102/// 2. If we can't get debug information in step 1 and 2, we could only try to
1103/// build the DIType by Type. We did this in solveDIType. We only handle
1104/// integer, float, double, integer type and struct type for now.
1105static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
1106 FrameDataInfo &FrameData) {
1107 DISubprogram *DIS = F.getSubprogram();
1108 // If there is no DISubprogram for F, it implies the Function are not compiled
1109 // with debug info. So we also don't need to generate debug info for the frame
1110 // neither.
1111 if (!DIS || !DIS->getUnit() ||
1112 !dwarf::isCPlusPlus(
1113 S: (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
1114 return;
1115
1116 assert(Shape.ABI == coro::ABI::Switch &&
1117 "We could only build debug infomation for C++ coroutine now.\n");
1118
1119 DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
1120
1121 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1122 assert(PromiseAlloca &&
1123 "Coroutine with switch ABI should own Promise alloca");
1124
1125 TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(V: PromiseAlloca);
1126 TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(V: PromiseAlloca);
1127
1128 DILocalVariable *PromiseDIVariable = nullptr;
1129 DILocation *DILoc = nullptr;
1130 if (!DIs.empty()) {
1131 DbgDeclareInst *PromiseDDI = DIs.front();
1132 PromiseDIVariable = PromiseDDI->getVariable();
1133 DILoc = PromiseDDI->getDebugLoc().get();
1134 } else if (!DVRs.empty()) {
1135 DbgVariableRecord *PromiseDVR = DVRs.front();
1136 PromiseDIVariable = PromiseDVR->getVariable();
1137 DILoc = PromiseDVR->getDebugLoc().get();
1138 } else {
1139 return;
1140 }
1141
1142 DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
1143 DIFile *DFile = PromiseDIScope->getFile();
1144 unsigned LineNum = PromiseDIVariable->getLine();
1145
1146 DICompositeType *FrameDITy = DBuilder.createStructType(
1147 Scope: DIS->getUnit(), Name: Twine(F.getName() + ".coro_frame_ty").str(),
1148 File: DFile, LineNumber: LineNum, SizeInBits: Shape.FrameSize * 8,
1149 AlignInBits: Shape.FrameAlign.value() * 8, Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr,
1150 Elements: llvm::DINodeArray());
1151 StructType *FrameTy = Shape.FrameTy;
1152 SmallVector<Metadata *, 16> Elements;
1153 DataLayout Layout = F.getParent()->getDataLayout();
1154
1155 DenseMap<Value *, DILocalVariable *> DIVarCache;
1156 cacheDIVar(FrameData, DIVarCache);
1157
1158 unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
1159 unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
1160 unsigned IndexIndex = Shape.SwitchLowering.IndexField;
1161
1162 DenseMap<unsigned, StringRef> NameCache;
1163 NameCache.insert(KV: {ResumeIndex, "__resume_fn"});
1164 NameCache.insert(KV: {DestroyIndex, "__destroy_fn"});
1165 NameCache.insert(KV: {IndexIndex, "__coro_index"});
1166
1167 Type *ResumeFnTy = FrameTy->getElementType(N: ResumeIndex),
1168 *DestroyFnTy = FrameTy->getElementType(N: DestroyIndex),
1169 *IndexTy = FrameTy->getElementType(N: IndexIndex);
1170
1171 DenseMap<unsigned, DIType *> TyCache;
1172 TyCache.insert(
1173 KV: {ResumeIndex, DBuilder.createPointerType(
1174 PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty: ResumeFnTy))});
1175 TyCache.insert(
1176 KV: {DestroyIndex, DBuilder.createPointerType(
1177 PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty: DestroyFnTy))});
1178
1179 /// FIXME: If we fill the field `SizeInBits` with the actual size of
1180 /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
1181 TyCache.insert(KV: {IndexIndex, DBuilder.createBasicType(
1182 Name: "__coro_index",
1183 SizeInBits: (Layout.getTypeSizeInBits(Ty: IndexTy) < 8)
1184 ? 8
1185 : Layout.getTypeSizeInBits(Ty: IndexTy),
1186 Encoding: dwarf::DW_ATE_unsigned_char)});
1187
1188 for (auto *V : FrameData.getAllDefs()) {
1189 if (!DIVarCache.contains(Val: V))
1190 continue;
1191
1192 auto Index = FrameData.getFieldIndex(V);
1193
1194 NameCache.insert(KV: {Index, DIVarCache[V]->getName()});
1195 TyCache.insert(KV: {Index, DIVarCache[V]->getType()});
1196 }
1197
1198 // Cache from index to (Align, Offset Pair)
1199 DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
1200 // The Align and Offset of Resume function and Destroy function are fixed.
1201 OffsetCache.insert(KV: {ResumeIndex, {8, 0}});
1202 OffsetCache.insert(KV: {DestroyIndex, {8, 8}});
1203 OffsetCache.insert(
1204 KV: {IndexIndex,
1205 {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
1206
1207 for (auto *V : FrameData.getAllDefs()) {
1208 auto Index = FrameData.getFieldIndex(V);
1209
1210 OffsetCache.insert(
1211 KV: {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
1212 }
1213
1214 DenseMap<Type *, DIType *> DITypeCache;
1215 // This counter is used to avoid same type names. e.g., there would be
1216 // many i32 and i64 types in one coroutine. And we would use i32_0 and
1217 // i32_1 to avoid the same type. Since it makes no sense the name of the
1218 // fields confilicts with each other.
1219 unsigned UnknownTypeNum = 0;
1220 for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
1221 if (!OffsetCache.contains(Val: Index))
1222 continue;
1223
1224 std::string Name;
1225 uint64_t SizeInBits;
1226 uint32_t AlignInBits;
1227 uint64_t OffsetInBits;
1228 DIType *DITy = nullptr;
1229
1230 Type *Ty = FrameTy->getElementType(N: Index);
1231 assert(Ty->isSized() && "We can't handle type which is not sized.\n");
1232 SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
1233 AlignInBits = OffsetCache[Index].first * 8;
1234 OffsetInBits = OffsetCache[Index].second * 8;
1235
1236 if (NameCache.contains(Val: Index)) {
1237 Name = NameCache[Index].str();
1238 DITy = TyCache[Index];
1239 } else {
1240 DITy = solveDIType(Builder&: DBuilder, Ty, Layout, Scope: FrameDITy, LineNum, DITypeCache);
1241 assert(DITy && "SolveDIType shouldn't return nullptr.\n");
1242 Name = DITy->getName().str();
1243 Name += "_" + std::to_string(val: UnknownTypeNum);
1244 UnknownTypeNum++;
1245 }
1246
1247 Elements.push_back(Elt: DBuilder.createMemberType(
1248 Scope: FrameDITy, Name, File: DFile, LineNo: LineNum, SizeInBits, AlignInBits, OffsetInBits,
1249 Flags: llvm::DINode::FlagArtificial, Ty: DITy));
1250 }
1251
1252 DBuilder.replaceArrays(T&: FrameDITy, Elements: DBuilder.getOrCreateArray(Elements));
1253
1254 auto *FrameDIVar = DBuilder.createAutoVariable(Scope: PromiseDIScope, Name: "__coro_frame",
1255 File: DFile, LineNo: LineNum, Ty: FrameDITy,
1256 AlwaysPreserve: true, Flags: DINode::FlagArtificial);
1257 assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
1258
1259 // Subprogram would have ContainedNodes field which records the debug
1260 // variables it contained. So we need to add __coro_frame to the
1261 // ContainedNodes of it.
1262 //
1263 // If we don't add __coro_frame to the RetainedNodes, user may get
1264 // `no symbol __coro_frame in context` rather than `__coro_frame`
1265 // is optimized out, which is more precise.
1266 if (auto *SubProgram = dyn_cast<DISubprogram>(Val: PromiseDIScope)) {
1267 auto RetainedNodes = SubProgram->getRetainedNodes();
1268 SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
1269 RetainedNodes.end());
1270 RetainedNodesVec.push_back(Elt: FrameDIVar);
1271 SubProgram->replaceOperandWith(
1272 I: 7, New: (MDTuple::get(Context&: F.getContext(), MDs: RetainedNodesVec)));
1273 }
1274
1275 if (UseNewDbgInfoFormat) {
1276 DbgVariableRecord *NewDVR =
1277 new DbgVariableRecord(ValueAsMetadata::get(V: Shape.FramePtr), FrameDIVar,
1278 DBuilder.createExpression(), DILoc,
1279 DbgVariableRecord::LocationType::Declare);
1280 BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
1281 It->getParent()->insertDbgRecordBefore(DR: NewDVR, Here: It);
1282 } else {
1283 DBuilder.insertDeclare(Storage: Shape.FramePtr, VarInfo: FrameDIVar,
1284 Expr: DBuilder.createExpression(), DL: DILoc,
1285 InsertBefore: &*Shape.getInsertPtAfterFramePtr());
1286 }
1287}
1288
1289// Build a struct that will keep state for an active coroutine.
1290// struct f.frame {
1291// ResumeFnTy ResumeFnAddr;
1292// ResumeFnTy DestroyFnAddr;
1293// ... promise (if present) ...
1294// int ResumeIndex;
1295// ... spills ...
1296// };
1297static StructType *buildFrameType(Function &F, coro::Shape &Shape,
1298 FrameDataInfo &FrameData) {
1299 LLVMContext &C = F.getContext();
1300 const DataLayout &DL = F.getParent()->getDataLayout();
1301 StructType *FrameTy = [&] {
1302 SmallString<32> Name(F.getName());
1303 Name.append(RHS: ".Frame");
1304 return StructType::create(Context&: C, Name);
1305 }();
1306
1307 // We will use this value to cap the alignment of spilled values.
1308 std::optional<Align> MaxFrameAlignment;
1309 if (Shape.ABI == coro::ABI::Async)
1310 MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
1311 FrameTypeBuilder B(C, DL, MaxFrameAlignment);
1312
1313 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1314 std::optional<FieldIDType> SwitchIndexFieldId;
1315
1316 if (Shape.ABI == coro::ABI::Switch) {
1317 auto *FnPtrTy = PointerType::getUnqual(C);
1318
1319 // Add header fields for the resume and destroy functions.
1320 // We can rely on these being perfectly packed.
1321 (void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: std::nullopt, /*header*/ IsHeader: true);
1322 (void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: std::nullopt, /*header*/ IsHeader: true);
1323
1324 // PromiseAlloca field needs to be explicitly added here because it's
1325 // a header field with a fixed offset based on its alignment. Hence it
1326 // needs special handling and cannot be added to FrameData.Allocas.
1327 if (PromiseAlloca)
1328 FrameData.setFieldIndex(
1329 V: PromiseAlloca, Index: B.addFieldForAlloca(AI: PromiseAlloca, /*header*/ IsHeader: true));
1330
1331 // Add a field to store the suspend index. This doesn't need to
1332 // be in the header.
1333 unsigned IndexBits = std::max(a: 1U, b: Log2_64_Ceil(Value: Shape.CoroSuspends.size()));
1334 Type *IndexType = Type::getIntNTy(C, N: IndexBits);
1335
1336 SwitchIndexFieldId = B.addField(Ty: IndexType, MaybeFieldAlignment: std::nullopt);
1337 } else {
1338 assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
1339 }
1340
1341 // Because multiple allocas may own the same field slot,
1342 // we add allocas to field here.
1343 B.addFieldForAllocas(F, FrameData, Shape);
1344 // Add PromiseAlloca to Allocas list so that
1345 // 1. updateLayoutIndex could update its index after
1346 // `performOptimizedStructLayout`
1347 // 2. it is processed in insertSpills.
1348 if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
1349 // We assume that the promise alloca won't be modified before
1350 // CoroBegin and no alias will be create before CoroBegin.
1351 FrameData.Allocas.emplace_back(
1352 Args&: PromiseAlloca, Args: DenseMap<Instruction *, std::optional<APInt>>{}, Args: false);
1353 // Create an entry for every spilled value.
1354 for (auto &S : FrameData.Spills) {
1355 Type *FieldType = S.first->getType();
1356 // For byval arguments, we need to store the pointed value in the frame,
1357 // instead of the pointer itself.
1358 if (const Argument *A = dyn_cast<Argument>(Val: S.first))
1359 if (A->hasByValAttr())
1360 FieldType = A->getParamByValType();
1361 FieldIDType Id = B.addField(Ty: FieldType, MaybeFieldAlignment: std::nullopt, IsHeader: false /*header*/,
1362 IsSpillOfValue: true /*IsSpillOfValue*/);
1363 FrameData.setFieldIndex(V: S.first, Index: Id);
1364 }
1365
1366 B.finish(Ty: FrameTy);
1367 FrameData.updateLayoutIndex(B);
1368 Shape.FrameAlign = B.getStructAlign();
1369 Shape.FrameSize = B.getStructSize();
1370
1371 switch (Shape.ABI) {
1372 case coro::ABI::Switch: {
1373 // In the switch ABI, remember the switch-index field.
1374 auto IndexField = B.getLayoutField(Id: *SwitchIndexFieldId);
1375 Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
1376 Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
1377 Shape.SwitchLowering.IndexOffset = IndexField.Offset;
1378
1379 // Also round the frame size up to a multiple of its alignment, as is
1380 // generally expected in C/C++.
1381 Shape.FrameSize = alignTo(Size: Shape.FrameSize, A: Shape.FrameAlign);
1382 break;
1383 }
1384
1385 // In the retcon ABI, remember whether the frame is inline in the storage.
1386 case coro::ABI::Retcon:
1387 case coro::ABI::RetconOnce: {
1388 auto Id = Shape.getRetconCoroId();
1389 Shape.RetconLowering.IsFrameInlineInStorage
1390 = (B.getStructSize() <= Id->getStorageSize() &&
1391 B.getStructAlign() <= Id->getStorageAlignment());
1392 break;
1393 }
1394 case coro::ABI::Async: {
1395 Shape.AsyncLowering.FrameOffset =
1396 alignTo(Size: Shape.AsyncLowering.ContextHeaderSize, A: Shape.FrameAlign);
1397 // Also make the final context size a multiple of the context alignment to
1398 // make allocation easier for allocators.
1399 Shape.AsyncLowering.ContextSize =
1400 alignTo(Size: Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
1401 A: Shape.AsyncLowering.getContextAlignment());
1402 if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
1403 report_fatal_error(
1404 reason: "The alignment requirment of frame variables cannot be higher than "
1405 "the alignment of the async function context");
1406 }
1407 break;
1408 }
1409 }
1410
1411 return FrameTy;
1412}
1413
1414// We use a pointer use visitor to track how an alloca is being used.
1415// The goal is to be able to answer the following three questions:
1416// 1. Should this alloca be allocated on the frame instead.
1417// 2. Could the content of the alloca be modified prior to CoroBegn, which would
1418// require copying the data from alloca to the frame after CoroBegin.
1419// 3. Is there any alias created for this alloca prior to CoroBegin, but used
1420// after CoroBegin. In that case, we will need to recreate the alias after
1421// CoroBegin based off the frame. To answer question 1, we track two things:
1422// a. List of all BasicBlocks that use this alloca or any of the aliases of
1423// the alloca. In the end, we check if there exists any two basic blocks that
1424// cross suspension points. If so, this alloca must be put on the frame. b.
1425// Whether the alloca or any alias of the alloca is escaped at some point,
1426// either by storing the address somewhere, or the address is used in a
1427// function call that might capture. If it's ever escaped, this alloca must be
1428// put on the frame conservatively.
1429// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
1430// Whenever a potential write happens, either through a store instruction, a
1431// function call or any of the memory intrinsics, we check whether this
1432// instruction is prior to CoroBegin. To answer question 3, we track the offsets
1433// of all aliases created for the alloca prior to CoroBegin but used after
1434// CoroBegin. std::optional is used to be able to represent the case when the
1435// offset is unknown (e.g. when you have a PHINode that takes in different
1436// offset values). We cannot handle unknown offsets and will assert. This is the
1437// potential issue left out. An ideal solution would likely require a
1438// significant redesign.
1439namespace {
1440struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
1441 using Base = PtrUseVisitor<AllocaUseVisitor>;
1442 AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
1443 const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
1444 bool ShouldUseLifetimeStartInfo)
1445 : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
1446 ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
1447
1448 void visit(Instruction &I) {
1449 Users.insert(Ptr: &I);
1450 Base::visit(I);
1451 // If the pointer is escaped prior to CoroBegin, we have to assume it would
1452 // be written into before CoroBegin as well.
1453 if (PI.isEscaped() && !DT.dominates(Def: &CoroBegin, User: PI.getEscapingInst())) {
1454 MayWriteBeforeCoroBegin = true;
1455 }
1456 }
1457 // We need to provide this overload as PtrUseVisitor uses a pointer based
1458 // visiting function.
1459 void visit(Instruction *I) { return visit(I&: *I); }
1460
1461 void visitPHINode(PHINode &I) {
1462 enqueueUsers(I);
1463 handleAlias(I);
1464 }
1465
1466 void visitSelectInst(SelectInst &I) {
1467 enqueueUsers(I);
1468 handleAlias(I);
1469 }
1470
1471 void visitStoreInst(StoreInst &SI) {
1472 // Regardless whether the alias of the alloca is the value operand or the
1473 // pointer operand, we need to assume the alloca is been written.
1474 handleMayWrite(I: SI);
1475
1476 if (SI.getValueOperand() != U->get())
1477 return;
1478
1479 // We are storing the pointer into a memory location, potentially escaping.
1480 // As an optimization, we try to detect simple cases where it doesn't
1481 // actually escape, for example:
1482 // %ptr = alloca ..
1483 // %addr = alloca ..
1484 // store %ptr, %addr
1485 // %x = load %addr
1486 // ..
1487 // If %addr is only used by loading from it, we could simply treat %x as
1488 // another alias of %ptr, and not considering %ptr being escaped.
1489 auto IsSimpleStoreThenLoad = [&]() {
1490 auto *AI = dyn_cast<AllocaInst>(Val: SI.getPointerOperand());
1491 // If the memory location we are storing to is not an alloca, it
1492 // could be an alias of some other memory locations, which is difficult
1493 // to analyze.
1494 if (!AI)
1495 return false;
1496 // StoreAliases contains aliases of the memory location stored into.
1497 SmallVector<Instruction *, 4> StoreAliases = {AI};
1498 while (!StoreAliases.empty()) {
1499 Instruction *I = StoreAliases.pop_back_val();
1500 for (User *U : I->users()) {
1501 // If we are loading from the memory location, we are creating an
1502 // alias of the original pointer.
1503 if (auto *LI = dyn_cast<LoadInst>(Val: U)) {
1504 enqueueUsers(I&: *LI);
1505 handleAlias(I&: *LI);
1506 continue;
1507 }
1508 // If we are overriding the memory location, the pointer certainly
1509 // won't escape.
1510 if (auto *S = dyn_cast<StoreInst>(Val: U))
1511 if (S->getPointerOperand() == I)
1512 continue;
1513 if (auto *II = dyn_cast<IntrinsicInst>(Val: U))
1514 if (II->isLifetimeStartOrEnd())
1515 continue;
1516 // BitCastInst creats aliases of the memory location being stored
1517 // into.
1518 if (auto *BI = dyn_cast<BitCastInst>(Val: U)) {
1519 StoreAliases.push_back(Elt: BI);
1520 continue;
1521 }
1522 return false;
1523 }
1524 }
1525
1526 return true;
1527 };
1528
1529 if (!IsSimpleStoreThenLoad())
1530 PI.setEscaped(&SI);
1531 }
1532
1533 // All mem intrinsics modify the data.
1534 void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(I: MI); }
1535
1536 void visitBitCastInst(BitCastInst &BC) {
1537 Base::visitBitCastInst(BC);
1538 handleAlias(I&: BC);
1539 }
1540
1541 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1542 Base::visitAddrSpaceCastInst(ASC);
1543 handleAlias(I&: ASC);
1544 }
1545
1546 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1547 // The base visitor will adjust Offset accordingly.
1548 Base::visitGetElementPtrInst(GEPI);
1549 handleAlias(I&: GEPI);
1550 }
1551
1552 void visitIntrinsicInst(IntrinsicInst &II) {
1553 // When we found the lifetime markers refers to a
1554 // subrange of the original alloca, ignore the lifetime
1555 // markers to avoid misleading the analysis.
1556 if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
1557 !Offset.isZero())
1558 return Base::visitIntrinsicInst(II);
1559 LifetimeStarts.insert(Ptr: &II);
1560 }
1561
1562 void visitCallBase(CallBase &CB) {
1563 for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op)
1564 if (U->get() == CB.getArgOperand(i: Op) && !CB.doesNotCapture(OpNo: Op))
1565 PI.setEscaped(&CB);
1566 handleMayWrite(I: CB);
1567 }
1568
1569 bool getShouldLiveOnFrame() const {
1570 if (!ShouldLiveOnFrame)
1571 ShouldLiveOnFrame = computeShouldLiveOnFrame();
1572 return *ShouldLiveOnFrame;
1573 }
1574
1575 bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
1576
1577 DenseMap<Instruction *, std::optional<APInt>> getAliasesCopy() const {
1578 assert(getShouldLiveOnFrame() && "This method should only be called if the "
1579 "alloca needs to live on the frame.");
1580 for (const auto &P : AliasOffetMap)
1581 if (!P.second)
1582 report_fatal_error(reason: "Unable to handle an alias with unknown offset "
1583 "created before CoroBegin.");
1584 return AliasOffetMap;
1585 }
1586
1587private:
1588 const DominatorTree &DT;
1589 const CoroBeginInst &CoroBegin;
1590 const SuspendCrossingInfo &Checker;
1591 // All alias to the original AllocaInst, created before CoroBegin and used
1592 // after CoroBegin. Each entry contains the instruction and the offset in the
1593 // original Alloca. They need to be recreated after CoroBegin off the frame.
1594 DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
1595 SmallPtrSet<Instruction *, 4> Users{};
1596 SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
1597 bool MayWriteBeforeCoroBegin{false};
1598 bool ShouldUseLifetimeStartInfo{true};
1599
1600 mutable std::optional<bool> ShouldLiveOnFrame{};
1601
1602 bool computeShouldLiveOnFrame() const {
1603 // If lifetime information is available, we check it first since it's
1604 // more precise. We look at every pair of lifetime.start intrinsic and
1605 // every basic block that uses the pointer to see if they cross suspension
1606 // points. The uses cover both direct uses as well as indirect uses.
1607 if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
1608 for (auto *I : Users)
1609 for (auto *S : LifetimeStarts)
1610 if (Checker.isDefinitionAcrossSuspend(I&: *S, U: I))
1611 return true;
1612 // Addresses are guaranteed to be identical after every lifetime.start so
1613 // we cannot use the local stack if the address escaped and there is a
1614 // suspend point between lifetime markers. This should also cover the
1615 // case of a single lifetime.start intrinsic in a loop with suspend point.
1616 if (PI.isEscaped()) {
1617 for (auto *A : LifetimeStarts) {
1618 for (auto *B : LifetimeStarts) {
1619 if (Checker.hasPathOrLoopCrossingSuspendPoint(From: A->getParent(),
1620 To: B->getParent()))
1621 return true;
1622 }
1623 }
1624 }
1625 return false;
1626 }
1627 // FIXME: Ideally the isEscaped check should come at the beginning.
1628 // However there are a few loose ends that need to be fixed first before
1629 // we can do that. We need to make sure we are not over-conservative, so
1630 // that the data accessed in-between await_suspend and symmetric transfer
1631 // is always put on the stack, and also data accessed after coro.end is
1632 // always put on the stack (esp the return object). To fix that, we need
1633 // to:
1634 // 1) Potentially treat sret as nocapture in calls
1635 // 2) Special handle the return object and put it on the stack
1636 // 3) Utilize lifetime.end intrinsic
1637 if (PI.isEscaped())
1638 return true;
1639
1640 for (auto *U1 : Users)
1641 for (auto *U2 : Users)
1642 if (Checker.isDefinitionAcrossSuspend(I&: *U1, U: U2))
1643 return true;
1644
1645 return false;
1646 }
1647
1648 void handleMayWrite(const Instruction &I) {
1649 if (!DT.dominates(Def: &CoroBegin, User: &I))
1650 MayWriteBeforeCoroBegin = true;
1651 }
1652
1653 bool usedAfterCoroBegin(Instruction &I) {
1654 for (auto &U : I.uses())
1655 if (DT.dominates(Def: &CoroBegin, U))
1656 return true;
1657 return false;
1658 }
1659
1660 void handleAlias(Instruction &I) {
1661 // We track all aliases created prior to CoroBegin but used after.
1662 // These aliases may need to be recreated after CoroBegin if the alloca
1663 // need to live on the frame.
1664 if (DT.dominates(Def: &CoroBegin, User: &I) || !usedAfterCoroBegin(I))
1665 return;
1666
1667 if (!IsOffsetKnown) {
1668 AliasOffetMap[&I].reset();
1669 } else {
1670 auto Itr = AliasOffetMap.find(Val: &I);
1671 if (Itr == AliasOffetMap.end()) {
1672 AliasOffetMap[&I] = Offset;
1673 } else if (Itr->second && *Itr->second != Offset) {
1674 // If we have seen two different possible values for this alias, we set
1675 // it to empty.
1676 AliasOffetMap[&I].reset();
1677 }
1678 }
1679 }
1680};
1681} // namespace
1682
1683// We need to make room to insert a spill after initial PHIs, but before
1684// catchswitch instruction. Placing it before violates the requirement that
1685// catchswitch, like all other EHPads must be the first nonPHI in a block.
1686//
1687// Split away catchswitch into a separate block and insert in its place:
1688//
1689// cleanuppad <InsertPt> cleanupret.
1690//
1691// cleanupret instruction will act as an insert point for the spill.
1692static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
1693 BasicBlock *CurrentBlock = CatchSwitch->getParent();
1694 BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(I: CatchSwitch);
1695 CurrentBlock->getTerminator()->eraseFromParent();
1696
1697 auto *CleanupPad =
1698 CleanupPadInst::Create(ParentPad: CatchSwitch->getParentPad(), Args: {}, NameStr: "", InsertAtEnd: CurrentBlock);
1699 auto *CleanupRet =
1700 CleanupReturnInst::Create(CleanupPad, UnwindBB: NewBlock, InsertAtEnd: CurrentBlock);
1701 return CleanupRet;
1702}
1703
1704// Replace all alloca and SSA values that are accessed across suspend points
1705// with GetElementPointer from coroutine frame + loads and stores. Create an
1706// AllocaSpillBB that will become the new entry block for the resume parts of
1707// the coroutine:
1708//
1709// %hdl = coro.begin(...)
1710// whatever
1711//
1712// becomes:
1713//
1714// %hdl = coro.begin(...)
1715// br label %AllocaSpillBB
1716//
1717// AllocaSpillBB:
1718// ; geps corresponding to allocas that were moved to coroutine frame
1719// br label PostSpill
1720//
1721// PostSpill:
1722// whatever
1723//
1724//
1725static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1726 auto *CB = Shape.CoroBegin;
1727 LLVMContext &C = CB->getContext();
1728 Function *F = CB->getFunction();
1729 IRBuilder<> Builder(C);
1730 StructType *FrameTy = Shape.FrameTy;
1731 Value *FramePtr = Shape.FramePtr;
1732 DominatorTree DT(*F);
1733 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1734
1735 // Create a GEP with the given index into the coroutine frame for the original
1736 // value Orig. Appends an extra 0 index for array-allocas, preserving the
1737 // original type.
1738 auto GetFramePointer = [&](Value *Orig) -> Value * {
1739 FieldIDType Index = FrameData.getFieldIndex(V: Orig);
1740 SmallVector<Value *, 3> Indices = {
1741 ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0),
1742 ConstantInt::get(Ty: Type::getInt32Ty(C), V: Index),
1743 };
1744
1745 if (auto *AI = dyn_cast<AllocaInst>(Val: Orig)) {
1746 if (auto *CI = dyn_cast<ConstantInt>(Val: AI->getArraySize())) {
1747 auto Count = CI->getValue().getZExtValue();
1748 if (Count > 1) {
1749 Indices.push_back(Elt: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 0));
1750 }
1751 } else {
1752 report_fatal_error(reason: "Coroutines cannot handle non static allocas yet");
1753 }
1754 }
1755
1756 auto GEP = cast<GetElementPtrInst>(
1757 Val: Builder.CreateInBoundsGEP(Ty: FrameTy, Ptr: FramePtr, IdxList: Indices));
1758 if (auto *AI = dyn_cast<AllocaInst>(Val: Orig)) {
1759 if (FrameData.getDynamicAlign(V: Orig) != 0) {
1760 assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1761 auto *M = AI->getModule();
1762 auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1763 auto *PtrValue = Builder.CreatePtrToInt(V: GEP, DestTy: IntPtrTy);
1764 auto *AlignMask =
1765 ConstantInt::get(Ty: IntPtrTy, V: AI->getAlign().value() - 1);
1766 PtrValue = Builder.CreateAdd(LHS: PtrValue, RHS: AlignMask);
1767 PtrValue = Builder.CreateAnd(LHS: PtrValue, RHS: Builder.CreateNot(V: AlignMask));
1768 return Builder.CreateIntToPtr(V: PtrValue, DestTy: AI->getType());
1769 }
1770 // If the type of GEP is not equal to the type of AllocaInst, it implies
1771 // that the AllocaInst may be reused in the Frame slot of other
1772 // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1773 // the Frame storage.
1774 //
1775 // Note: If we change the strategy dealing with alignment, we need to refine
1776 // this casting.
1777 if (GEP->getType() != Orig->getType())
1778 return Builder.CreateAddrSpaceCast(V: GEP, DestTy: Orig->getType(),
1779 Name: Orig->getName() + Twine(".cast"));
1780 }
1781 return GEP;
1782 };
1783
1784 for (auto const &E : FrameData.Spills) {
1785 Value *Def = E.first;
1786 auto SpillAlignment = Align(FrameData.getAlign(V: Def));
1787 // Create a store instruction storing the value into the
1788 // coroutine frame.
1789 BasicBlock::iterator InsertPt;
1790 Type *ByValTy = nullptr;
1791 if (auto *Arg = dyn_cast<Argument>(Val: Def)) {
1792 // For arguments, we will place the store instruction right after
1793 // the coroutine frame pointer instruction, i.e. coro.begin.
1794 InsertPt = Shape.getInsertPtAfterFramePtr();
1795
1796 // If we're spilling an Argument, make sure we clear 'nocapture'
1797 // from the coroutine function.
1798 Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1799
1800 if (Arg->hasByValAttr())
1801 ByValTy = Arg->getParamByValType();
1802 } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Val: Def)) {
1803 // Don't spill immediately after a suspend; splitting assumes
1804 // that the suspend will be followed by a branch.
1805 InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
1806 } else {
1807 auto *I = cast<Instruction>(Val: Def);
1808 if (!DT.dominates(Def: CB, User: I)) {
1809 // If it is not dominated by CoroBegin, then spill should be
1810 // inserted immediately after CoroFrame is computed.
1811 InsertPt = Shape.getInsertPtAfterFramePtr();
1812 } else if (auto *II = dyn_cast<InvokeInst>(Val: I)) {
1813 // If we are spilling the result of the invoke instruction, split
1814 // the normal edge and insert the spill in the new block.
1815 auto *NewBB = SplitEdge(From: II->getParent(), To: II->getNormalDest());
1816 InsertPt = NewBB->getTerminator()->getIterator();
1817 } else if (isa<PHINode>(Val: I)) {
1818 // Skip the PHINodes and EH pads instructions.
1819 BasicBlock *DefBlock = I->getParent();
1820 if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: DefBlock->getTerminator()))
1821 InsertPt = splitBeforeCatchSwitch(CatchSwitch: CSI)->getIterator();
1822 else
1823 InsertPt = DefBlock->getFirstInsertionPt();
1824 } else {
1825 assert(!I->isTerminator() && "unexpected terminator");
1826 // For all other values, the spill is placed immediately after
1827 // the definition.
1828 InsertPt = I->getNextNode()->getIterator();
1829 }
1830 }
1831
1832 auto Index = FrameData.getFieldIndex(V: Def);
1833 Builder.SetInsertPoint(TheBB: InsertPt->getParent(), IP: InsertPt);
1834 auto *G = Builder.CreateConstInBoundsGEP2_32(
1835 Ty: FrameTy, Ptr: FramePtr, Idx0: 0, Idx1: Index, Name: Def->getName() + Twine(".spill.addr"));
1836 if (ByValTy) {
1837 // For byval arguments, we need to store the pointed value in the frame,
1838 // instead of the pointer itself.
1839 auto *Value = Builder.CreateLoad(Ty: ByValTy, Ptr: Def);
1840 Builder.CreateAlignedStore(Val: Value, Ptr: G, Align: SpillAlignment);
1841 } else {
1842 Builder.CreateAlignedStore(Val: Def, Ptr: G, Align: SpillAlignment);
1843 }
1844
1845 BasicBlock *CurrentBlock = nullptr;
1846 Value *CurrentReload = nullptr;
1847 for (auto *U : E.second) {
1848 // If we have not seen the use block, create a load instruction to reload
1849 // the spilled value from the coroutine frame. Populates the Value pointer
1850 // reference provided with the frame GEP.
1851 if (CurrentBlock != U->getParent()) {
1852 CurrentBlock = U->getParent();
1853 Builder.SetInsertPoint(TheBB: CurrentBlock,
1854 IP: CurrentBlock->getFirstInsertionPt());
1855
1856 auto *GEP = GetFramePointer(E.first);
1857 GEP->setName(E.first->getName() + Twine(".reload.addr"));
1858 if (ByValTy)
1859 CurrentReload = GEP;
1860 else
1861 CurrentReload = Builder.CreateAlignedLoad(
1862 Ty: FrameTy->getElementType(N: FrameData.getFieldIndex(V: E.first)), Ptr: GEP,
1863 Align: SpillAlignment, Name: E.first->getName() + Twine(".reload"));
1864
1865 TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(V: Def);
1866 TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(V: Def);
1867 // Try best to find dbg.declare. If the spill is a temp, there may not
1868 // be a direct dbg.declare. Walk up the load chain to find one from an
1869 // alias.
1870 if (F->getSubprogram()) {
1871 auto *CurDef = Def;
1872 while (DIs.empty() && DVRs.empty() && isa<LoadInst>(Val: CurDef)) {
1873 auto *LdInst = cast<LoadInst>(Val: CurDef);
1874 // Only consider ptr to ptr same type load.
1875 if (LdInst->getPointerOperandType() != LdInst->getType())
1876 break;
1877 CurDef = LdInst->getPointerOperand();
1878 if (!isa<AllocaInst, LoadInst>(Val: CurDef))
1879 break;
1880 DIs = findDbgDeclares(V: CurDef);
1881 DVRs = findDVRDeclares(V: CurDef);
1882 }
1883 }
1884
1885 auto SalvageOne = [&](auto *DDI) {
1886 bool AllowUnresolved = false;
1887 // This dbg.declare is preserved for all coro-split function
1888 // fragments. It will be unreachable in the main function, and
1889 // processed by coro::salvageDebugInfo() by CoroCloner.
1890 if (UseNewDbgInfoFormat) {
1891 DbgVariableRecord *NewDVR = new DbgVariableRecord(
1892 ValueAsMetadata::get(V: CurrentReload), DDI->getVariable(),
1893 DDI->getExpression(), DDI->getDebugLoc(),
1894 DbgVariableRecord::LocationType::Declare);
1895 Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1896 DR: NewDVR, Here: Builder.GetInsertPoint());
1897 } else {
1898 DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1899 .insertDeclare(CurrentReload, DDI->getVariable(),
1900 DDI->getExpression(), DDI->getDebugLoc(),
1901 &*Builder.GetInsertPoint());
1902 }
1903 // This dbg.declare is for the main function entry point. It
1904 // will be deleted in all coro-split functions.
1905 coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
1906 false /*UseEntryValue*/);
1907 };
1908 for_each(Range&: DIs, F: SalvageOne);
1909 for_each(Range&: DVRs, F: SalvageOne);
1910 }
1911
1912 // If we have a single edge PHINode, remove it and replace it with a
1913 // reload from the coroutine frame. (We already took care of multi edge
1914 // PHINodes by rewriting them in the rewritePHIs function).
1915 if (auto *PN = dyn_cast<PHINode>(Val: U)) {
1916 assert(PN->getNumIncomingValues() == 1 &&
1917 "unexpected number of incoming "
1918 "values in the PHINode");
1919 PN->replaceAllUsesWith(V: CurrentReload);
1920 PN->eraseFromParent();
1921 continue;
1922 }
1923
1924 // Replace all uses of CurrentValue in the current instruction with
1925 // reload.
1926 U->replaceUsesOfWith(From: Def, To: CurrentReload);
1927 // Instructions are added to Def's user list if the attached
1928 // debug records use Def. Update those now.
1929 for (DbgVariableRecord &DVR : filterDbgVars(R: U->getDbgRecordRange()))
1930 DVR.replaceVariableLocationOp(OldValue: Def, NewValue: CurrentReload, AllowEmpty: true);
1931 }
1932 }
1933
1934 BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1935
1936 auto SpillBlock = FramePtrBB->splitBasicBlock(
1937 I: Shape.getInsertPtAfterFramePtr(), BBName: "AllocaSpillBB");
1938 SpillBlock->splitBasicBlock(I: &SpillBlock->front(), BBName: "PostSpill");
1939 Shape.AllocaSpillBlock = SpillBlock;
1940
1941 // retcon and retcon.once lowering assumes all uses have been sunk.
1942 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1943 Shape.ABI == coro::ABI::Async) {
1944 // If we found any allocas, replace all of their remaining uses with Geps.
1945 Builder.SetInsertPoint(TheBB: SpillBlock, IP: SpillBlock->begin());
1946 for (const auto &P : FrameData.Allocas) {
1947 AllocaInst *Alloca = P.Alloca;
1948 auto *G = GetFramePointer(Alloca);
1949
1950 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1951 // here, as we are changing location of the instruction.
1952 G->takeName(V: Alloca);
1953 Alloca->replaceAllUsesWith(V: G);
1954 Alloca->eraseFromParent();
1955 }
1956 return;
1957 }
1958
1959 // If we found any alloca, replace all of their remaining uses with GEP
1960 // instructions. To remain debugbility, we replace the uses of allocas for
1961 // dbg.declares and dbg.values with the reload from the frame.
1962 // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1963 // as some of the uses may not be dominated by CoroBegin.
1964 Builder.SetInsertPoint(TheBB: Shape.AllocaSpillBlock,
1965 IP: Shape.AllocaSpillBlock->begin());
1966 SmallVector<Instruction *, 4> UsersToUpdate;
1967 for (const auto &A : FrameData.Allocas) {
1968 AllocaInst *Alloca = A.Alloca;
1969 UsersToUpdate.clear();
1970 for (User *U : Alloca->users()) {
1971 auto *I = cast<Instruction>(Val: U);
1972 if (DT.dominates(Def: CB, User: I))
1973 UsersToUpdate.push_back(Elt: I);
1974 }
1975 if (UsersToUpdate.empty())
1976 continue;
1977 auto *G = GetFramePointer(Alloca);
1978 G->setName(Alloca->getName() + Twine(".reload.addr"));
1979
1980 SmallVector<DbgVariableIntrinsic *, 4> DIs;
1981 SmallVector<DbgVariableRecord *> DbgVariableRecords;
1982 findDbgUsers(DbgInsts&: DIs, V: Alloca, DbgVariableRecords: &DbgVariableRecords);
1983 for (auto *DVI : DIs)
1984 DVI->replaceUsesOfWith(From: Alloca, To: G);
1985 for (auto *DVR : DbgVariableRecords)
1986 DVR->replaceVariableLocationOp(OldValue: Alloca, NewValue: G);
1987
1988 for (Instruction *I : UsersToUpdate) {
1989 // It is meaningless to retain the lifetime intrinsics refer for the
1990 // member of coroutine frames and the meaningless lifetime intrinsics
1991 // are possible to block further optimizations.
1992 if (I->isLifetimeStartOrEnd()) {
1993 I->eraseFromParent();
1994 continue;
1995 }
1996
1997 I->replaceUsesOfWith(From: Alloca, To: G);
1998 }
1999 }
2000 Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2001 for (const auto &A : FrameData.Allocas) {
2002 AllocaInst *Alloca = A.Alloca;
2003 if (A.MayWriteBeforeCoroBegin) {
2004 // isEscaped really means potentially modified before CoroBegin.
2005 if (Alloca->isArrayAllocation())
2006 report_fatal_error(
2007 reason: "Coroutines cannot handle copying of array allocas yet");
2008
2009 auto *G = GetFramePointer(Alloca);
2010 auto *Value = Builder.CreateLoad(Ty: Alloca->getAllocatedType(), Ptr: Alloca);
2011 Builder.CreateStore(Val: Value, Ptr: G);
2012 }
2013 // For each alias to Alloca created before CoroBegin but used after
2014 // CoroBegin, we recreate them after CoroBegin by appplying the offset
2015 // to the pointer in the frame.
2016 for (const auto &Alias : A.Aliases) {
2017 auto *FramePtr = GetFramePointer(Alloca);
2018 auto &Value = *Alias.second;
2019 auto ITy = IntegerType::get(C, NumBits: Value.getBitWidth());
2020 auto *AliasPtr =
2021 Builder.CreatePtrAdd(Ptr: FramePtr, Offset: ConstantInt::get(Ty: ITy, V: Value));
2022 Alias.first->replaceUsesWithIf(
2023 New: AliasPtr, ShouldReplace: [&](Use &U) { return DT.dominates(Def: CB, U); });
2024 }
2025 }
2026
2027 // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
2028 // the case that the PromiseAlloca may have writes before CoroBegin in the
2029 // above codes. And it may be problematic in edge cases. See
2030 // https://github.com/llvm/llvm-project/issues/57861 for an example.
2031 if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
2032 AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
2033 // If there is memory accessing to promise alloca before CoroBegin;
2034 bool HasAccessingPromiseBeforeCB = llvm::any_of(Range: PA->uses(), P: [&](Use &U) {
2035 auto *Inst = dyn_cast<Instruction>(Val: U.getUser());
2036 if (!Inst || DT.dominates(Def: CB, User: Inst))
2037 return false;
2038
2039 if (auto *CI = dyn_cast<CallInst>(Val: Inst)) {
2040 // It is fine if the call wouldn't write to the Promise.
2041 // This is possible for @llvm.coro.id intrinsics, which
2042 // would take the promise as the second argument as a
2043 // marker.
2044 if (CI->onlyReadsMemory() ||
2045 CI->onlyReadsMemory(OpNo: CI->getArgOperandNo(U: &U)))
2046 return false;
2047 return true;
2048 }
2049
2050 return isa<StoreInst>(Val: Inst) ||
2051 // It may take too much time to track the uses.
2052 // Be conservative about the case the use may escape.
2053 isa<GetElementPtrInst>(Val: Inst) ||
2054 // There would always be a bitcast for the promise alloca
2055 // before we enabled Opaque pointers. And now given
2056 // opaque pointers are enabled by default. This should be
2057 // fine.
2058 isa<BitCastInst>(Val: Inst);
2059 });
2060 if (HasAccessingPromiseBeforeCB) {
2061 Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2062 auto *G = GetFramePointer(PA);
2063 auto *Value = Builder.CreateLoad(Ty: PA->getAllocatedType(), Ptr: PA);
2064 Builder.CreateStore(Val: Value, Ptr: G);
2065 }
2066 }
2067}
2068
2069// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
2070// PHI in InsertedBB.
2071static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
2072 BasicBlock *InsertedBB,
2073 BasicBlock *PredBB,
2074 PHINode *UntilPHI = nullptr) {
2075 auto *PN = cast<PHINode>(Val: &SuccBB->front());
2076 do {
2077 int Index = PN->getBasicBlockIndex(BB: InsertedBB);
2078 Value *V = PN->getIncomingValue(i: Index);
2079 PHINode *InputV = PHINode::Create(
2080 Ty: V->getType(), NumReservedValues: 1, NameStr: V->getName() + Twine(".") + SuccBB->getName());
2081 InputV->insertBefore(InsertPos: InsertedBB->begin());
2082 InputV->addIncoming(V, BB: PredBB);
2083 PN->setIncomingValue(i: Index, V: InputV);
2084 PN = dyn_cast<PHINode>(Val: PN->getNextNode());
2085 } while (PN != UntilPHI);
2086}
2087
2088// Rewrites the PHI Nodes in a cleanuppad.
2089static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
2090 CleanupPadInst *CleanupPad) {
2091 // For every incoming edge to a CleanupPad we will create a new block holding
2092 // all incoming values in single-value PHI nodes. We will then create another
2093 // block to act as a dispather (as all unwind edges for related EH blocks
2094 // must be the same).
2095 //
2096 // cleanuppad:
2097 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
2098 // %3 = cleanuppad within none []
2099 //
2100 // It will create:
2101 //
2102 // cleanuppad.corodispatch
2103 // %2 = phi i8[0, %catchswitch], [1, %catch.1]
2104 // %3 = cleanuppad within none []
2105 // switch i8 % 2, label %unreachable
2106 // [i8 0, label %cleanuppad.from.catchswitch
2107 // i8 1, label %cleanuppad.from.catch.1]
2108 // cleanuppad.from.catchswitch:
2109 // %4 = phi i32 [%0, %catchswitch]
2110 // br %label cleanuppad
2111 // cleanuppad.from.catch.1:
2112 // %6 = phi i32 [%1, %catch.1]
2113 // br %label cleanuppad
2114 // cleanuppad:
2115 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
2116 // [%6, %cleanuppad.from.catch.1]
2117
2118 // Unreachable BB, in case switching on an invalid value in the dispatcher.
2119 auto *UnreachBB = BasicBlock::Create(
2120 Context&: CleanupPadBB->getContext(), Name: "unreachable", Parent: CleanupPadBB->getParent());
2121 IRBuilder<> Builder(UnreachBB);
2122 Builder.CreateUnreachable();
2123
2124 // Create a new cleanuppad which will be the dispatcher.
2125 auto *NewCleanupPadBB =
2126 BasicBlock::Create(Context&: CleanupPadBB->getContext(),
2127 Name: CleanupPadBB->getName() + Twine(".corodispatch"),
2128 Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
2129 Builder.SetInsertPoint(NewCleanupPadBB);
2130 auto *SwitchType = Builder.getInt8Ty();
2131 auto *SetDispatchValuePN =
2132 Builder.CreatePHI(Ty: SwitchType, NumReservedValues: pred_size(BB: CleanupPadBB));
2133 CleanupPad->removeFromParent();
2134 CleanupPad->insertAfter(InsertPos: SetDispatchValuePN);
2135 auto *SwitchOnDispatch = Builder.CreateSwitch(V: SetDispatchValuePN, Dest: UnreachBB,
2136 NumCases: pred_size(BB: CleanupPadBB));
2137
2138 int SwitchIndex = 0;
2139 SmallVector<BasicBlock *, 8> Preds(predecessors(BB: CleanupPadBB));
2140 for (BasicBlock *Pred : Preds) {
2141 // Create a new cleanuppad and move the PHI values to there.
2142 auto *CaseBB = BasicBlock::Create(Context&: CleanupPadBB->getContext(),
2143 Name: CleanupPadBB->getName() +
2144 Twine(".from.") + Pred->getName(),
2145 Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
2146 updatePhiNodes(DestBB: CleanupPadBB, OldPred: Pred, NewPred: CaseBB);
2147 CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
2148 Pred->getName());
2149 Builder.SetInsertPoint(CaseBB);
2150 Builder.CreateBr(Dest: CleanupPadBB);
2151 movePHIValuesToInsertedBlock(SuccBB: CleanupPadBB, InsertedBB: CaseBB, PredBB: NewCleanupPadBB);
2152
2153 // Update this Pred to the new unwind point.
2154 setUnwindEdgeTo(TI: Pred->getTerminator(), Succ: NewCleanupPadBB);
2155
2156 // Setup the switch in the dispatcher.
2157 auto *SwitchConstant = ConstantInt::get(Ty: SwitchType, V: SwitchIndex);
2158 SetDispatchValuePN->addIncoming(V: SwitchConstant, BB: Pred);
2159 SwitchOnDispatch->addCase(OnVal: SwitchConstant, Dest: CaseBB);
2160 SwitchIndex++;
2161 }
2162}
2163
2164static void cleanupSinglePredPHIs(Function &F) {
2165 SmallVector<PHINode *, 32> Worklist;
2166 for (auto &BB : F) {
2167 for (auto &Phi : BB.phis()) {
2168 if (Phi.getNumIncomingValues() == 1) {
2169 Worklist.push_back(Elt: &Phi);
2170 } else
2171 break;
2172 }
2173 }
2174 while (!Worklist.empty()) {
2175 auto *Phi = Worklist.pop_back_val();
2176 auto *OriginalValue = Phi->getIncomingValue(i: 0);
2177 Phi->replaceAllUsesWith(V: OriginalValue);
2178 }
2179}
2180
2181static void rewritePHIs(BasicBlock &BB) {
2182 // For every incoming edge we will create a block holding all
2183 // incoming values in a single PHI nodes.
2184 //
2185 // loop:
2186 // %n.val = phi i32[%n, %entry], [%inc, %loop]
2187 //
2188 // It will create:
2189 //
2190 // loop.from.entry:
2191 // %n.loop.pre = phi i32 [%n, %entry]
2192 // br %label loop
2193 // loop.from.loop:
2194 // %inc.loop.pre = phi i32 [%inc, %loop]
2195 // br %label loop
2196 //
2197 // After this rewrite, further analysis will ignore any phi nodes with more
2198 // than one incoming edge.
2199
2200 // TODO: Simplify PHINodes in the basic block to remove duplicate
2201 // predecessors.
2202
2203 // Special case for CleanupPad: all EH blocks must have the same unwind edge
2204 // so we need to create an additional "dispatcher" block.
2205 if (auto *CleanupPad =
2206 dyn_cast_or_null<CleanupPadInst>(Val: BB.getFirstNonPHI())) {
2207 SmallVector<BasicBlock *, 8> Preds(predecessors(BB: &BB));
2208 for (BasicBlock *Pred : Preds) {
2209 if (CatchSwitchInst *CS =
2210 dyn_cast<CatchSwitchInst>(Val: Pred->getTerminator())) {
2211 // CleanupPad with a CatchSwitch predecessor: therefore this is an
2212 // unwind destination that needs to be handle specially.
2213 assert(CS->getUnwindDest() == &BB);
2214 (void)CS;
2215 rewritePHIsForCleanupPad(CleanupPadBB: &BB, CleanupPad);
2216 return;
2217 }
2218 }
2219 }
2220
2221 LandingPadInst *LandingPad = nullptr;
2222 PHINode *ReplPHI = nullptr;
2223 if ((LandingPad = dyn_cast_or_null<LandingPadInst>(Val: BB.getFirstNonPHI()))) {
2224 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
2225 // We replace the original landing pad with a PHINode that will collect the
2226 // results from all of them.
2227 ReplPHI = PHINode::Create(Ty: LandingPad->getType(), NumReservedValues: 1, NameStr: "");
2228 ReplPHI->insertBefore(InsertPos: LandingPad->getIterator());
2229 ReplPHI->takeName(V: LandingPad);
2230 LandingPad->replaceAllUsesWith(V: ReplPHI);
2231 // We will erase the original landing pad at the end of this function after
2232 // ehAwareSplitEdge cloned it in the transition blocks.
2233 }
2234
2235 SmallVector<BasicBlock *, 8> Preds(predecessors(BB: &BB));
2236 for (BasicBlock *Pred : Preds) {
2237 auto *IncomingBB = ehAwareSplitEdge(BB: Pred, Succ: &BB, OriginalPad: LandingPad, LandingPadReplacement: ReplPHI);
2238 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
2239
2240 // Stop the moving of values at ReplPHI, as this is either null or the PHI
2241 // that replaced the landing pad.
2242 movePHIValuesToInsertedBlock(SuccBB: &BB, InsertedBB: IncomingBB, PredBB: Pred, UntilPHI: ReplPHI);
2243 }
2244
2245 if (LandingPad) {
2246 // Calls to ehAwareSplitEdge function cloned the original lading pad.
2247 // No longer need it.
2248 LandingPad->eraseFromParent();
2249 }
2250}
2251
2252static void rewritePHIs(Function &F) {
2253 SmallVector<BasicBlock *, 8> WorkList;
2254
2255 for (BasicBlock &BB : F)
2256 if (auto *PN = dyn_cast<PHINode>(Val: &BB.front()))
2257 if (PN->getNumIncomingValues() > 1)
2258 WorkList.push_back(Elt: &BB);
2259
2260 for (BasicBlock *BB : WorkList)
2261 rewritePHIs(BB&: *BB);
2262}
2263
2264/// Default materializable callback
2265// Check for instructions that we can recreate on resume as opposed to spill
2266// the result into a coroutine frame.
2267bool coro::defaultMaterializable(Instruction &V) {
2268 return (isa<CastInst>(Val: &V) || isa<GetElementPtrInst>(Val: &V) ||
2269 isa<BinaryOperator>(Val: &V) || isa<CmpInst>(Val: &V) || isa<SelectInst>(Val: &V));
2270}
2271
2272// Check for structural coroutine intrinsics that should not be spilled into
2273// the coroutine frame.
2274static bool isCoroutineStructureIntrinsic(Instruction &I) {
2275 return isa<CoroIdInst>(Val: &I) || isa<CoroSaveInst>(Val: &I) ||
2276 isa<CoroSuspendInst>(Val: &I);
2277}
2278
2279// For each instruction identified as materializable across the suspend point,
2280// and its associated DAG of other rematerializable instructions,
2281// recreate the DAG of instructions after the suspend point.
2282static void rewriteMaterializableInstructions(
2283 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8>
2284 &AllRemats) {
2285 // This has to be done in 2 phases
2286 // Do the remats and record the required defs to be replaced in the
2287 // original use instructions
2288 // Once all the remats are complete, replace the uses in the final
2289 // instructions with the new defs
2290 typedef struct {
2291 Instruction *Use;
2292 Instruction *Def;
2293 Instruction *Remat;
2294 } ProcessNode;
2295
2296 SmallVector<ProcessNode> FinalInstructionsToProcess;
2297
2298 for (const auto &E : AllRemats) {
2299 Instruction *Use = E.first;
2300 Instruction *CurrentMaterialization = nullptr;
2301 RematGraph *RG = E.second.get();
2302 ReversePostOrderTraversal<RematGraph *> RPOT(RG);
2303 SmallVector<Instruction *> InstructionsToProcess;
2304
2305 // If the target use is actually a suspend instruction then we have to
2306 // insert the remats into the end of the predecessor (there should only be
2307 // one). This is so that suspend blocks always have the suspend instruction
2308 // as the first instruction.
2309 auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
2310 if (isa<AnyCoroSuspendInst>(Val: Use)) {
2311 BasicBlock *SuspendPredecessorBlock =
2312 Use->getParent()->getSinglePredecessor();
2313 assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
2314 InsertPoint = SuspendPredecessorBlock->getTerminator();
2315 }
2316
2317 // Note: skip the first instruction as this is the actual use that we're
2318 // rematerializing everything for.
2319 auto I = RPOT.begin();
2320 ++I;
2321 for (; I != RPOT.end(); ++I) {
2322 Instruction *D = (*I)->Node;
2323 CurrentMaterialization = D->clone();
2324 CurrentMaterialization->setName(D->getName());
2325 CurrentMaterialization->insertBefore(InsertPos: InsertPoint);
2326 InsertPoint = CurrentMaterialization;
2327
2328 // Replace all uses of Def in the instructions being added as part of this
2329 // rematerialization group
2330 for (auto &I : InstructionsToProcess)
2331 I->replaceUsesOfWith(From: D, To: CurrentMaterialization);
2332
2333 // Don't replace the final use at this point as this can cause problems
2334 // for other materializations. Instead, for any final use that uses a
2335 // define that's being rematerialized, record the replace values
2336 for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i)
2337 if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
2338 FinalInstructionsToProcess.push_back(
2339 Elt: {.Use: Use, .Def: D, .Remat: CurrentMaterialization});
2340
2341 InstructionsToProcess.push_back(Elt: CurrentMaterialization);
2342 }
2343 }
2344
2345 // Finally, replace the uses with the defines that we've just rematerialized
2346 for (auto &R : FinalInstructionsToProcess) {
2347 if (auto *PN = dyn_cast<PHINode>(Val: R.Use)) {
2348 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
2349 "values in the PHINode");
2350 PN->replaceAllUsesWith(V: R.Remat);
2351 PN->eraseFromParent();
2352 continue;
2353 }
2354 R.Use->replaceUsesOfWith(From: R.Def, To: R.Remat);
2355 }
2356}
2357
2358// Splits the block at a particular instruction unless it is the first
2359// instruction in the block with a single predecessor.
2360static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
2361 auto *BB = I->getParent();
2362 if (&BB->front() == I) {
2363 if (BB->getSinglePredecessor()) {
2364 BB->setName(Name);
2365 return BB;
2366 }
2367 }
2368 return BB->splitBasicBlock(I, BBName: Name);
2369}
2370
2371// Split above and below a particular instruction so that it
2372// will be all alone by itself in a block.
2373static void splitAround(Instruction *I, const Twine &Name) {
2374 splitBlockIfNotFirst(I, Name);
2375 splitBlockIfNotFirst(I: I->getNextNode(), Name: "After" + Name);
2376}
2377
2378static bool isSuspendBlock(BasicBlock *BB) {
2379 return isa<AnyCoroSuspendInst>(Val: BB->front());
2380}
2381
2382typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
2383
2384/// Does control flow starting at the given block ever reach a suspend
2385/// instruction before reaching a block in VisitedOrFreeBBs?
2386static bool isSuspendReachableFrom(BasicBlock *From,
2387 VisitedBlocksSet &VisitedOrFreeBBs) {
2388 // Eagerly try to add this block to the visited set. If it's already
2389 // there, stop recursing; this path doesn't reach a suspend before
2390 // either looping or reaching a freeing block.
2391 if (!VisitedOrFreeBBs.insert(Ptr: From).second)
2392 return false;
2393
2394 // We assume that we'll already have split suspends into their own blocks.
2395 if (isSuspendBlock(BB: From))
2396 return true;
2397
2398 // Recurse on the successors.
2399 for (auto *Succ : successors(BB: From)) {
2400 if (isSuspendReachableFrom(From: Succ, VisitedOrFreeBBs))
2401 return true;
2402 }
2403
2404 return false;
2405}
2406
2407/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
2408/// suspend point?
2409static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
2410 // Seed the visited set with all the basic blocks containing a free
2411 // so that we won't pass them up.
2412 VisitedBlocksSet VisitedOrFreeBBs;
2413 for (auto *User : AI->users()) {
2414 if (auto FI = dyn_cast<CoroAllocaFreeInst>(Val: User))
2415 VisitedOrFreeBBs.insert(Ptr: FI->getParent());
2416 }
2417
2418 return !isSuspendReachableFrom(From: AI->getParent(), VisitedOrFreeBBs);
2419}
2420
2421/// After we split the coroutine, will the given basic block be along
2422/// an obvious exit path for the resumption function?
2423static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
2424 unsigned depth = 3) {
2425 // If we've bottomed out our depth count, stop searching and assume
2426 // that the path might loop back.
2427 if (depth == 0) return false;
2428
2429 // If this is a suspend block, we're about to exit the resumption function.
2430 if (isSuspendBlock(BB)) return true;
2431
2432 // Recurse into the successors.
2433 for (auto *Succ : successors(BB)) {
2434 if (!willLeaveFunctionImmediatelyAfter(BB: Succ, depth: depth - 1))
2435 return false;
2436 }
2437
2438 // If none of the successors leads back in a loop, we're on an exit/abort.
2439 return true;
2440}
2441
2442static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
2443 // Look for a free that isn't sufficiently obviously followed by
2444 // either a suspend or a termination, i.e. something that will leave
2445 // the coro resumption frame.
2446 for (auto *U : AI->users()) {
2447 auto FI = dyn_cast<CoroAllocaFreeInst>(Val: U);
2448 if (!FI) continue;
2449
2450 if (!willLeaveFunctionImmediatelyAfter(BB: FI->getParent()))
2451 return true;
2452 }
2453
2454 // If we never found one, we don't need a stack save.
2455 return false;
2456}
2457
2458/// Turn each of the given local allocas into a normal (dynamic) alloca
2459/// instruction.
2460static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
2461 SmallVectorImpl<Instruction*> &DeadInsts) {
2462 for (auto *AI : LocalAllocas) {
2463 IRBuilder<> Builder(AI);
2464
2465 // Save the stack depth. Try to avoid doing this if the stackrestore
2466 // is going to immediately precede a return or something.
2467 Value *StackSave = nullptr;
2468 if (localAllocaNeedsStackSave(AI))
2469 StackSave = Builder.CreateStackSave();
2470
2471 // Allocate memory.
2472 auto Alloca = Builder.CreateAlloca(Ty: Builder.getInt8Ty(), ArraySize: AI->getSize());
2473 Alloca->setAlignment(AI->getAlignment());
2474
2475 for (auto *U : AI->users()) {
2476 // Replace gets with the allocation.
2477 if (isa<CoroAllocaGetInst>(Val: U)) {
2478 U->replaceAllUsesWith(V: Alloca);
2479
2480 // Replace frees with stackrestores. This is safe because
2481 // alloca.alloc is required to obey a stack discipline, although we
2482 // don't enforce that structurally.
2483 } else {
2484 auto FI = cast<CoroAllocaFreeInst>(Val: U);
2485 if (StackSave) {
2486 Builder.SetInsertPoint(FI);
2487 Builder.CreateStackRestore(Ptr: StackSave);
2488 }
2489 }
2490 DeadInsts.push_back(Elt: cast<Instruction>(Val: U));
2491 }
2492
2493 DeadInsts.push_back(Elt: AI);
2494 }
2495}
2496
2497/// Turn the given coro.alloca.alloc call into a dynamic allocation.
2498/// This happens during the all-instructions iteration, so it must not
2499/// delete the call.
2500static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
2501 coro::Shape &Shape,
2502 SmallVectorImpl<Instruction*> &DeadInsts) {
2503 IRBuilder<> Builder(AI);
2504 auto Alloc = Shape.emitAlloc(Builder, Size: AI->getSize(), CG: nullptr);
2505
2506 for (User *U : AI->users()) {
2507 if (isa<CoroAllocaGetInst>(Val: U)) {
2508 U->replaceAllUsesWith(V: Alloc);
2509 } else {
2510 auto FI = cast<CoroAllocaFreeInst>(Val: U);
2511 Builder.SetInsertPoint(FI);
2512 Shape.emitDealloc(Builder, Ptr: Alloc, CG: nullptr);
2513 }
2514 DeadInsts.push_back(Elt: cast<Instruction>(Val: U));
2515 }
2516
2517 // Push this on last so that it gets deleted after all the others.
2518 DeadInsts.push_back(Elt: AI);
2519
2520 // Return the new allocation value so that we can check for needed spills.
2521 return cast<Instruction>(Val: Alloc);
2522}
2523
2524/// Get the current swifterror value.
2525static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
2526 coro::Shape &Shape) {
2527 // Make a fake function pointer as a sort of intrinsic.
2528 auto FnTy = FunctionType::get(Result: ValueTy, Params: {}, isVarArg: false);
2529 auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
2530
2531 auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: {});
2532 Shape.SwiftErrorOps.push_back(Elt: Call);
2533
2534 return Call;
2535}
2536
2537/// Set the given value as the current swifterror value.
2538///
2539/// Returns a slot that can be used as a swifterror slot.
2540static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
2541 coro::Shape &Shape) {
2542 // Make a fake function pointer as a sort of intrinsic.
2543 auto FnTy = FunctionType::get(Result: Builder.getPtrTy(),
2544 Params: {V->getType()}, isVarArg: false);
2545 auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
2546
2547 auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: { V });
2548 Shape.SwiftErrorOps.push_back(Elt: Call);
2549
2550 return Call;
2551}
2552
2553/// Set the swifterror value from the given alloca before a call,
2554/// then put in back in the alloca afterwards.
2555///
2556/// Returns an address that will stand in for the swifterror slot
2557/// until splitting.
2558static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
2559 AllocaInst *Alloca,
2560 coro::Shape &Shape) {
2561 auto ValueTy = Alloca->getAllocatedType();
2562 IRBuilder<> Builder(Call);
2563
2564 // Load the current value from the alloca and set it as the
2565 // swifterror value.
2566 auto ValueBeforeCall = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
2567 auto Addr = emitSetSwiftErrorValue(Builder, V: ValueBeforeCall, Shape);
2568
2569 // Move to after the call. Since swifterror only has a guaranteed
2570 // value on normal exits, we can ignore implicit and explicit unwind
2571 // edges.
2572 if (isa<CallInst>(Val: Call)) {
2573 Builder.SetInsertPoint(Call->getNextNode());
2574 } else {
2575 auto Invoke = cast<InvokeInst>(Val: Call);
2576 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
2577 }
2578
2579 // Get the current swifterror value and store it to the alloca.
2580 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
2581 Builder.CreateStore(Val: ValueAfterCall, Ptr: Alloca);
2582
2583 return Addr;
2584}
2585
2586/// Eliminate a formerly-swifterror alloca by inserting the get/set
2587/// intrinsics and attempting to MemToReg the alloca away.
2588static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
2589 coro::Shape &Shape) {
2590 for (Use &Use : llvm::make_early_inc_range(Range: Alloca->uses())) {
2591 // swifterror values can only be used in very specific ways.
2592 // We take advantage of that here.
2593 auto User = Use.getUser();
2594 if (isa<LoadInst>(Val: User) || isa<StoreInst>(Val: User))
2595 continue;
2596
2597 assert(isa<CallInst>(User) || isa<InvokeInst>(User));
2598 auto Call = cast<Instruction>(Val: User);
2599
2600 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
2601
2602 // Use the returned slot address as the call argument.
2603 Use.set(Addr);
2604 }
2605
2606 // All the uses should be loads and stores now.
2607 assert(isAllocaPromotable(Alloca));
2608}
2609
2610/// "Eliminate" a swifterror argument by reducing it to the alloca case
2611/// and then loading and storing in the prologue and epilog.
2612///
2613/// The argument keeps the swifterror flag.
2614static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
2615 coro::Shape &Shape,
2616 SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
2617 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
2618
2619 auto ArgTy = cast<PointerType>(Val: Arg.getType());
2620 auto ValueTy = PointerType::getUnqual(C&: F.getContext());
2621
2622 // Reduce to the alloca case:
2623
2624 // Create an alloca and replace all uses of the arg with it.
2625 auto Alloca = Builder.CreateAlloca(Ty: ValueTy, AddrSpace: ArgTy->getAddressSpace());
2626 Arg.replaceAllUsesWith(V: Alloca);
2627
2628 // Set an initial value in the alloca. swifterror is always null on entry.
2629 auto InitialValue = Constant::getNullValue(Ty: ValueTy);
2630 Builder.CreateStore(Val: InitialValue, Ptr: Alloca);
2631
2632 // Find all the suspends in the function and save and restore around them.
2633 for (auto *Suspend : Shape.CoroSuspends) {
2634 (void) emitSetAndGetSwiftErrorValueAround(Call: Suspend, Alloca, Shape);
2635 }
2636
2637 // Find all the coro.ends in the function and restore the error value.
2638 for (auto *End : Shape.CoroEnds) {
2639 Builder.SetInsertPoint(End);
2640 auto FinalValue = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
2641 (void) emitSetSwiftErrorValue(Builder, V: FinalValue, Shape);
2642 }
2643
2644 // Now we can use the alloca logic.
2645 AllocasToPromote.push_back(Elt: Alloca);
2646 eliminateSwiftErrorAlloca(F, Alloca, Shape);
2647}
2648
2649/// Eliminate all problematic uses of swifterror arguments and allocas
2650/// from the function. We'll fix them up later when splitting the function.
2651static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
2652 SmallVector<AllocaInst*, 4> AllocasToPromote;
2653
2654 // Look for a swifterror argument.
2655 for (auto &Arg : F.args()) {
2656 if (!Arg.hasSwiftErrorAttr()) continue;
2657
2658 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
2659 break;
2660 }
2661
2662 // Look for swifterror allocas.
2663 for (auto &Inst : F.getEntryBlock()) {
2664 auto Alloca = dyn_cast<AllocaInst>(Val: &Inst);
2665 if (!Alloca || !Alloca->isSwiftError()) continue;
2666
2667 // Clear the swifterror flag.
2668 Alloca->setSwiftError(false);
2669
2670 AllocasToPromote.push_back(Elt: Alloca);
2671 eliminateSwiftErrorAlloca(F, Alloca, Shape);
2672 }
2673
2674 // If we have any allocas to promote, compute a dominator tree and
2675 // promote them en masse.
2676 if (!AllocasToPromote.empty()) {
2677 DominatorTree DT(F);
2678 PromoteMemToReg(Allocas: AllocasToPromote, DT);
2679 }
2680}
2681
2682/// retcon and retcon.once conventions assume that all spill uses can be sunk
2683/// after the coro.begin intrinsic.
2684static void sinkSpillUsesAfterCoroBegin(Function &F,
2685 const FrameDataInfo &FrameData,
2686 CoroBeginInst *CoroBegin) {
2687 DominatorTree Dom(F);
2688
2689 SmallSetVector<Instruction *, 32> ToMove;
2690 SmallVector<Instruction *, 32> Worklist;
2691
2692 // Collect all users that precede coro.begin.
2693 for (auto *Def : FrameData.getAllDefs()) {
2694 for (User *U : Def->users()) {
2695 auto Inst = cast<Instruction>(Val: U);
2696 if (Inst->getParent() != CoroBegin->getParent() ||
2697 Dom.dominates(Def: CoroBegin, User: Inst))
2698 continue;
2699 if (ToMove.insert(X: Inst))
2700 Worklist.push_back(Elt: Inst);
2701 }
2702 }
2703 // Recursively collect users before coro.begin.
2704 while (!Worklist.empty()) {
2705 auto *Def = Worklist.pop_back_val();
2706 for (User *U : Def->users()) {
2707 auto Inst = cast<Instruction>(Val: U);
2708 if (Dom.dominates(Def: CoroBegin, User: Inst))
2709 continue;
2710 if (ToMove.insert(X: Inst))
2711 Worklist.push_back(Elt: Inst);
2712 }
2713 }
2714
2715 // Sort by dominance.
2716 SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
2717 llvm::sort(C&: InsertionList, Comp: [&Dom](Instruction *A, Instruction *B) -> bool {
2718 // If a dominates b it should preceed (<) b.
2719 return Dom.dominates(Def: A, User: B);
2720 });
2721
2722 Instruction *InsertPt = CoroBegin->getNextNode();
2723 for (Instruction *Inst : InsertionList)
2724 Inst->moveBefore(MovePos: InsertPt);
2725}
2726
2727/// For each local variable that all of its user are only used inside one of
2728/// suspended region, we sink their lifetime.start markers to the place where
2729/// after the suspend block. Doing so minimizes the lifetime of each variable,
2730/// hence minimizing the amount of data we end up putting on the frame.
2731static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
2732 SuspendCrossingInfo &Checker) {
2733 if (F.hasOptNone())
2734 return;
2735
2736 DominatorTree DT(F);
2737
2738 // Collect all possible basic blocks which may dominate all uses of allocas.
2739 SmallPtrSet<BasicBlock *, 4> DomSet;
2740 DomSet.insert(Ptr: &F.getEntryBlock());
2741 for (auto *CSI : Shape.CoroSuspends) {
2742 BasicBlock *SuspendBlock = CSI->getParent();
2743 assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
2744 "should have split coro.suspend into its own block");
2745 DomSet.insert(Ptr: SuspendBlock->getSingleSuccessor());
2746 }
2747
2748 for (Instruction &I : instructions(F)) {
2749 AllocaInst* AI = dyn_cast<AllocaInst>(Val: &I);
2750 if (!AI)
2751 continue;
2752
2753 for (BasicBlock *DomBB : DomSet) {
2754 bool Valid = true;
2755 SmallVector<Instruction *, 1> Lifetimes;
2756
2757 auto isLifetimeStart = [](Instruction* I) {
2758 if (auto* II = dyn_cast<IntrinsicInst>(I))
2759 return II->getIntrinsicID() == Intrinsic::lifetime_start;
2760 return false;
2761 };
2762
2763 auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
2764 if (isLifetimeStart(U)) {
2765 Lifetimes.push_back(Elt: U);
2766 return true;
2767 }
2768 if (!U->hasOneUse() || U->stripPointerCasts() != AI)
2769 return false;
2770 if (isLifetimeStart(U->user_back())) {
2771 Lifetimes.push_back(Elt: U->user_back());
2772 return true;
2773 }
2774 return false;
2775 };
2776
2777 for (User *U : AI->users()) {
2778 Instruction *UI = cast<Instruction>(Val: U);
2779 // For all users except lifetime.start markers, if they are all
2780 // dominated by one of the basic blocks and do not cross
2781 // suspend points as well, then there is no need to spill the
2782 // instruction.
2783 if (!DT.dominates(A: DomBB, B: UI->getParent()) ||
2784 Checker.isDefinitionAcrossSuspend(DefBB: DomBB, U: UI)) {
2785 // Skip lifetime.start, GEP and bitcast used by lifetime.start
2786 // markers.
2787 if (collectLifetimeStart(UI, AI))
2788 continue;
2789 Valid = false;
2790 break;
2791 }
2792 }
2793 // Sink lifetime.start markers to dominate block when they are
2794 // only used outside the region.
2795 if (Valid && Lifetimes.size() != 0) {
2796 auto *NewLifetime = Lifetimes[0]->clone();
2797 NewLifetime->replaceUsesOfWith(From: NewLifetime->getOperand(i: 1), To: AI);
2798 NewLifetime->insertBefore(InsertPos: DomBB->getTerminator());
2799
2800 // All the outsided lifetime.start markers are no longer necessary.
2801 for (Instruction *S : Lifetimes)
2802 S->eraseFromParent();
2803
2804 break;
2805 }
2806 }
2807 }
2808}
2809
2810static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
2811 const SuspendCrossingInfo &Checker,
2812 SmallVectorImpl<AllocaInfo> &Allocas,
2813 const DominatorTree &DT) {
2814 if (Shape.CoroSuspends.empty())
2815 return;
2816
2817 // The PromiseAlloca will be specially handled since it needs to be in a
2818 // fixed position in the frame.
2819 if (AI == Shape.SwitchLowering.PromiseAlloca)
2820 return;
2821
2822 // The __coro_gro alloca should outlive the promise, make sure we
2823 // keep it outside the frame.
2824 if (AI->hasMetadata(KindID: LLVMContext::MD_coro_outside_frame))
2825 return;
2826
2827 // The code that uses lifetime.start intrinsic does not work for functions
2828 // with loops without exit. Disable it on ABIs we know to generate such
2829 // code.
2830 bool ShouldUseLifetimeStartInfo =
2831 (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2832 Shape.ABI != coro::ABI::RetconOnce);
2833 AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT,
2834 *Shape.CoroBegin, Checker,
2835 ShouldUseLifetimeStartInfo};
2836 Visitor.visitPtr(I&: *AI);
2837 if (!Visitor.getShouldLiveOnFrame())
2838 return;
2839 Allocas.emplace_back(Args&: AI, Args: Visitor.getAliasesCopy(),
2840 Args: Visitor.getMayWriteBeforeCoroBegin());
2841}
2842
2843static std::optional<std::pair<Value &, DIExpression &>>
2844salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
2845 bool OptimizeFrame, bool UseEntryValue, Function *F,
2846 Value *Storage, DIExpression *Expr,
2847 bool SkipOutermostLoad) {
2848 IRBuilder<> Builder(F->getContext());
2849 auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
2850 while (isa<IntrinsicInst>(Val: InsertPt))
2851 ++InsertPt;
2852 Builder.SetInsertPoint(TheBB: &F->getEntryBlock(), IP: InsertPt);
2853
2854 while (auto *Inst = dyn_cast_or_null<Instruction>(Val: Storage)) {
2855 if (auto *LdInst = dyn_cast<LoadInst>(Val: Inst)) {
2856 Storage = LdInst->getPointerOperand();
2857 // FIXME: This is a heuristic that works around the fact that
2858 // LLVM IR debug intrinsics cannot yet distinguish between
2859 // memory and value locations: Because a dbg.declare(alloca) is
2860 // implicitly a memory location no DW_OP_deref operation for the
2861 // last direct load from an alloca is necessary. This condition
2862 // effectively drops the *last* DW_OP_deref in the expression.
2863 if (!SkipOutermostLoad)
2864 Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
2865 } else if (auto *StInst = dyn_cast<StoreInst>(Val: Inst)) {
2866 Storage = StInst->getValueOperand();
2867 } else {
2868 SmallVector<uint64_t, 16> Ops;
2869 SmallVector<Value *, 0> AdditionalValues;
2870 Value *Op = llvm::salvageDebugInfoImpl(
2871 I&: *Inst, CurrentLocOps: Expr ? Expr->getNumLocationOperands() : 0, Ops,
2872 AdditionalValues);
2873 if (!Op || !AdditionalValues.empty()) {
2874 // If salvaging failed or salvaging produced more than one location
2875 // operand, give up.
2876 break;
2877 }
2878 Storage = Op;
2879 Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: 0, /*StackValue*/ false);
2880 }
2881 SkipOutermostLoad = false;
2882 }
2883 if (!Storage)
2884 return std::nullopt;
2885
2886 auto *StorageAsArg = dyn_cast<Argument>(Val: Storage);
2887 const bool IsSwiftAsyncArg =
2888 StorageAsArg && StorageAsArg->hasAttribute(Attribute::Kind: SwiftAsync);
2889
2890 // Swift async arguments are described by an entry value of the ABI-defined
2891 // register containing the coroutine context.
2892 // Entry values in variadic expressions are not supported.
2893 if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
2894 Expr->isSingleLocationExpression())
2895 Expr = DIExpression::prepend(Expr, Flags: DIExpression::EntryValue);
2896
2897 // If the coroutine frame is an Argument, store it in an alloca to improve
2898 // its availability (e.g. registers may be clobbered).
2899 // Avoid this if optimizations are enabled (they would remove the alloca) or
2900 // if the value is guaranteed to be available through other means (e.g. swift
2901 // ABI guarantees).
2902 if (StorageAsArg && !OptimizeFrame && !IsSwiftAsyncArg) {
2903 auto &Cached = ArgToAllocaMap[StorageAsArg];
2904 if (!Cached) {
2905 Cached = Builder.CreateAlloca(Ty: Storage->getType(), AddrSpace: 0, ArraySize: nullptr,
2906 Name: Storage->getName() + ".debug");
2907 Builder.CreateStore(Val: Storage, Ptr: Cached);
2908 }
2909 Storage = Cached;
2910 // FIXME: LLVM lacks nuanced semantics to differentiate between
2911 // memory and direct locations at the IR level. The backend will
2912 // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2913 // location. Thus, if there are deref and offset operations in the
2914 // expression, we need to add a DW_OP_deref at the *start* of the
2915 // expression to first load the contents of the alloca before
2916 // adjusting it with the expression.
2917 Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
2918 }
2919
2920 return {{*Storage, *Expr}};
2921}
2922
2923void coro::salvageDebugInfo(
2924 SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
2925 DbgVariableIntrinsic &DVI, bool OptimizeFrame, bool UseEntryValue) {
2926
2927 Function *F = DVI.getFunction();
2928 // Follow the pointer arithmetic all the way to the incoming
2929 // function argument and convert into a DIExpression.
2930 bool SkipOutermostLoad = !isa<DbgValueInst>(Val: DVI);
2931 Value *OriginalStorage = DVI.getVariableLocationOp(OpIdx: 0);
2932
2933 auto SalvagedInfo = ::salvageDebugInfoImpl(
2934 ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, Storage: OriginalStorage,
2935 Expr: DVI.getExpression(), SkipOutermostLoad);
2936 if (!SalvagedInfo)
2937 return;
2938
2939 Value *Storage = &SalvagedInfo->first;
2940 DIExpression *Expr = &SalvagedInfo->second;
2941
2942 DVI.replaceVariableLocationOp(OldValue: OriginalStorage, NewValue: Storage);
2943 DVI.setExpression(Expr);
2944 // We only hoist dbg.declare today since it doesn't make sense to hoist
2945 // dbg.value since it does not have the same function wide guarantees that
2946 // dbg.declare does.
2947 if (isa<DbgDeclareInst>(Val: DVI)) {
2948 std::optional<BasicBlock::iterator> InsertPt;
2949 if (auto *I = dyn_cast<Instruction>(Val: Storage)) {
2950 InsertPt = I->getInsertionPointAfterDef();
2951 // Update DILocation only in O0 since it is easy to get out of sync in
2952 // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
2953 // an example.
2954 if (!OptimizeFrame && I->getDebugLoc())
2955 DVI.setDebugLoc(I->getDebugLoc());
2956 } else if (isa<Argument>(Val: Storage))
2957 InsertPt = F->getEntryBlock().begin();
2958 if (InsertPt)
2959 DVI.moveBefore(BB&: *(*InsertPt)->getParent(), I: *InsertPt);
2960 }
2961}
2962
2963void coro::salvageDebugInfo(
2964 SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
2965 DbgVariableRecord &DVR, bool OptimizeFrame, bool UseEntryValue) {
2966
2967 Function *F = DVR.getFunction();
2968 // Follow the pointer arithmetic all the way to the incoming
2969 // function argument and convert into a DIExpression.
2970 bool SkipOutermostLoad = DVR.isDbgDeclare();
2971 Value *OriginalStorage = DVR.getVariableLocationOp(OpIdx: 0);
2972
2973 auto SalvagedInfo = ::salvageDebugInfoImpl(
2974 ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, Storage: OriginalStorage,
2975 Expr: DVR.getExpression(), SkipOutermostLoad);
2976 if (!SalvagedInfo)
2977 return;
2978
2979 Value *Storage = &SalvagedInfo->first;
2980 DIExpression *Expr = &SalvagedInfo->second;
2981
2982 DVR.replaceVariableLocationOp(OldValue: OriginalStorage, NewValue: Storage);
2983 DVR.setExpression(Expr);
2984 // We only hoist dbg.declare today since it doesn't make sense to hoist
2985 // dbg.value since it does not have the same function wide guarantees that
2986 // dbg.declare does.
2987 if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
2988 std::optional<BasicBlock::iterator> InsertPt;
2989 if (auto *I = dyn_cast<Instruction>(Val: Storage)) {
2990 InsertPt = I->getInsertionPointAfterDef();
2991 // Update DILocation only in O0 since it is easy to get out of sync in
2992 // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
2993 // an example.
2994 if (!OptimizeFrame && I->getDebugLoc())
2995 DVR.setDebugLoc(I->getDebugLoc());
2996 } else if (isa<Argument>(Val: Storage))
2997 InsertPt = F->getEntryBlock().begin();
2998 if (InsertPt) {
2999 DVR.removeFromParent();
3000 (*InsertPt)->getParent()->insertDbgRecordBefore(DR: &DVR, Here: *InsertPt);
3001 }
3002 }
3003}
3004
3005static void doRematerializations(
3006 Function &F, SuspendCrossingInfo &Checker,
3007 const std::function<bool(Instruction &)> &MaterializableCallback) {
3008 if (F.hasOptNone())
3009 return;
3010
3011 SpillInfo Spills;
3012
3013 // See if there are materializable instructions across suspend points
3014 // We record these as the starting point to also identify materializable
3015 // defs of uses in these operations
3016 for (Instruction &I : instructions(F)) {
3017 if (!MaterializableCallback(I))
3018 continue;
3019 for (User *U : I.users())
3020 if (Checker.isDefinitionAcrossSuspend(I, U))
3021 Spills[&I].push_back(Elt: cast<Instruction>(Val: U));
3022 }
3023
3024 // Process each of the identified rematerializable instructions
3025 // and add predecessor instructions that can also be rematerialized.
3026 // This is actually a graph of instructions since we could potentially
3027 // have multiple uses of a def in the set of predecessor instructions.
3028 // The approach here is to maintain a graph of instructions for each bottom
3029 // level instruction - where we have a unique set of instructions (nodes)
3030 // and edges between them. We then walk the graph in reverse post-dominator
3031 // order to insert them past the suspend point, but ensure that ordering is
3032 // correct. We also rely on CSE removing duplicate defs for remats of
3033 // different instructions with a def in common (rather than maintaining more
3034 // complex graphs for each suspend point)
3035
3036 // We can do this by adding new nodes to the list for each suspend
3037 // point. Then using standard GraphTraits to give a reverse post-order
3038 // traversal when we insert the nodes after the suspend
3039 SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> AllRemats;
3040 for (auto &E : Spills) {
3041 for (Instruction *U : E.second) {
3042 // Don't process a user twice (this can happen if the instruction uses
3043 // more than one rematerializable def)
3044 if (AllRemats.count(Key: U))
3045 continue;
3046
3047 // Constructor creates the whole RematGraph for the given Use
3048 auto RematUPtr =
3049 std::make_unique<RematGraph>(args: MaterializableCallback, args&: U, args&: Checker);
3050
3051 LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
3052 ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
3053 for (auto I = RPOT.begin(); I != RPOT.end();
3054 ++I) { (*I)->Node->dump(); } dbgs()
3055 << "\n";);
3056
3057 AllRemats[U] = std::move(RematUPtr);
3058 }
3059 }
3060
3061 // Rewrite materializable instructions to be materialized at the use
3062 // point.
3063 LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
3064 rewriteMaterializableInstructions(AllRemats);
3065}
3066
3067void coro::buildCoroutineFrame(
3068 Function &F, Shape &Shape, TargetTransformInfo &TTI,
3069 const std::function<bool(Instruction &)> &MaterializableCallback) {
3070 // Don't eliminate swifterror in async functions that won't be split.
3071 if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
3072 eliminateSwiftError(F, Shape);
3073
3074 if (Shape.ABI == coro::ABI::Switch &&
3075 Shape.SwitchLowering.PromiseAlloca) {
3076 Shape.getSwitchCoroId()->clearPromise();
3077 }
3078
3079 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
3080 // intrinsics are in their own blocks to simplify the logic of building up
3081 // SuspendCrossing data.
3082 for (auto *CSI : Shape.CoroSuspends) {
3083 if (auto *Save = CSI->getCoroSave())
3084 splitAround(I: Save, Name: "CoroSave");
3085 splitAround(I: CSI, Name: "CoroSuspend");
3086 }
3087
3088 // Put CoroEnds into their own blocks.
3089 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
3090 splitAround(I: CE, Name: "CoroEnd");
3091
3092 // Emit the musttail call function in a new block before the CoroEnd.
3093 // We do this here so that the right suspend crossing info is computed for
3094 // the uses of the musttail call function call. (Arguments to the coro.end
3095 // instructions would be ignored)
3096 if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(Val: CE)) {
3097 auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
3098 if (!MustTailCallFn)
3099 continue;
3100 IRBuilder<> Builder(AsyncEnd);
3101 SmallVector<Value *, 8> Args(AsyncEnd->args());
3102 auto Arguments = ArrayRef<Value *>(Args).drop_front(N: 3);
3103 auto *Call = createMustTailCall(Loc: AsyncEnd->getDebugLoc(), MustTailCallFn,
3104 TTI, Arguments, Builder);
3105 splitAround(I: Call, Name: "MustTailCall.Before.CoroEnd");
3106 }
3107 }
3108
3109 // Later code makes structural assumptions about single predecessors phis e.g
3110 // that they are not live across a suspend point.
3111 cleanupSinglePredPHIs(F);
3112
3113 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
3114 // never has its definition separated from the PHI by the suspend point.
3115 rewritePHIs(F);
3116
3117 // Build suspend crossing info.
3118 SuspendCrossingInfo Checker(F, Shape);
3119
3120 doRematerializations(F, Checker, MaterializableCallback);
3121
3122 FrameDataInfo FrameData;
3123 SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
3124 SmallVector<Instruction*, 4> DeadInstructions;
3125 if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
3126 Shape.ABI != coro::ABI::RetconOnce)
3127 sinkLifetimeStartMarkers(F, Shape, Checker);
3128
3129 // Collect the spills for arguments and other not-materializable values.
3130 for (Argument &A : F.args())
3131 for (User *U : A.users())
3132 if (Checker.isDefinitionAcrossSuspend(A, U))
3133 FrameData.Spills[&A].push_back(Elt: cast<Instruction>(Val: U));
3134
3135 const DominatorTree DT(F);
3136 for (Instruction &I : instructions(F)) {
3137 // Values returned from coroutine structure intrinsics should not be part
3138 // of the Coroutine Frame.
3139 if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
3140 continue;
3141
3142 // Handle alloca.alloc specially here.
3143 if (auto AI = dyn_cast<CoroAllocaAllocInst>(Val: &I)) {
3144 // Check whether the alloca's lifetime is bounded by suspend points.
3145 if (isLocalAlloca(AI)) {
3146 LocalAllocas.push_back(Elt: AI);
3147 continue;
3148 }
3149
3150 // If not, do a quick rewrite of the alloca and then add spills of
3151 // the rewritten value. The rewrite doesn't invalidate anything in
3152 // Spills because the other alloca intrinsics have no other operands
3153 // besides AI, and it doesn't invalidate the iteration because we delay
3154 // erasing AI.
3155 auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInsts&: DeadInstructions);
3156
3157 for (User *U : Alloc->users()) {
3158 if (Checker.isDefinitionAcrossSuspend(I&: *Alloc, U))
3159 FrameData.Spills[Alloc].push_back(Elt: cast<Instruction>(Val: U));
3160 }
3161 continue;
3162 }
3163
3164 // Ignore alloca.get; we process this as part of coro.alloca.alloc.
3165 if (isa<CoroAllocaGetInst>(Val: I))
3166 continue;
3167
3168 if (auto *AI = dyn_cast<AllocaInst>(Val: &I)) {
3169 collectFrameAlloca(AI, Shape, Checker, Allocas&: FrameData.Allocas, DT);
3170 continue;
3171 }
3172
3173 for (User *U : I.users())
3174 if (Checker.isDefinitionAcrossSuspend(I, U)) {
3175 // We cannot spill a token.
3176 if (I.getType()->isTokenTy())
3177 report_fatal_error(
3178 reason: "token definition is separated from the use by a suspend point");
3179 FrameData.Spills[&I].push_back(Elt: cast<Instruction>(Val: U));
3180 }
3181 }
3182
3183 LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
3184
3185 // We don't want the layout of coroutine frame to be affected
3186 // by debug information. So we only choose to salvage DbgValueInst for
3187 // whose value is already in the frame.
3188 // We would handle the dbg.values for allocas specially
3189 for (auto &Iter : FrameData.Spills) {
3190 auto *V = Iter.first;
3191 SmallVector<DbgValueInst *, 16> DVIs;
3192 SmallVector<DbgVariableRecord *, 16> DVRs;
3193 findDbgValues(DbgValues&: DVIs, V, DbgVariableRecords: &DVRs);
3194 for (DbgValueInst *DVI : DVIs)
3195 if (Checker.isDefinitionAcrossSuspend(V&: *V, U: DVI))
3196 FrameData.Spills[V].push_back(Elt: DVI);
3197 // Add the instructions which carry debug info that is in the frame.
3198 for (DbgVariableRecord *DVR : DVRs)
3199 if (Checker.isDefinitionAcrossSuspend(V&: *V, U: DVR->Marker->MarkedInstr))
3200 FrameData.Spills[V].push_back(Elt: DVR->Marker->MarkedInstr);
3201 }
3202
3203 LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
3204 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
3205 Shape.ABI == coro::ABI::Async)
3206 sinkSpillUsesAfterCoroBegin(F, FrameData, CoroBegin: Shape.CoroBegin);
3207 Shape.FrameTy = buildFrameType(F, Shape, FrameData);
3208 Shape.FramePtr = Shape.CoroBegin;
3209 // For now, this works for C++ programs only.
3210 buildFrameDebugInfo(F, Shape, FrameData);
3211 insertSpills(FrameData, Shape);
3212 lowerLocalAllocas(LocalAllocas, DeadInsts&: DeadInstructions);
3213
3214 for (auto *I : DeadInstructions)
3215 I->eraseFromParent();
3216}
3217

source code of llvm/lib/Transforms/Coroutines/CoroFrame.cpp