1 | //===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a Union-find algorithm to compute Minimum Spanning Tree |
10 | // for a given CFG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CFGMST_H |
15 | #define LLVM_TRANSFORMS_INSTRUMENTATION_CFGMST_H |
16 | |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
20 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
21 | #include "llvm/Analysis/CFG.h" |
22 | #include "llvm/IR/Instructions.h" |
23 | #include "llvm/IR/IntrinsicInst.h" |
24 | #include "llvm/Support/BranchProbability.h" |
25 | #include "llvm/Support/Debug.h" |
26 | #include "llvm/Support/raw_ostream.h" |
27 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
28 | #include <utility> |
29 | #include <vector> |
30 | |
31 | #define DEBUG_TYPE "cfgmst" |
32 | |
33 | namespace llvm { |
34 | |
35 | /// An union-find based Minimum Spanning Tree for CFG |
36 | /// |
37 | /// Implements a Union-find algorithm to compute Minimum Spanning Tree |
38 | /// for a given CFG. |
39 | template <class Edge, class BBInfo> class CFGMST { |
40 | Function &F; |
41 | |
42 | // Store all the edges in CFG. It may contain some stale edges |
43 | // when Removed is set. |
44 | std::vector<std::unique_ptr<Edge>> AllEdges; |
45 | |
46 | // This map records the auxiliary information for each BB. |
47 | DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos; |
48 | |
49 | // Whehter the function has an exit block with no successors. |
50 | // (For function with an infinite loop, this block may be absent) |
51 | bool ExitBlockFound = false; |
52 | |
53 | BranchProbabilityInfo *const BPI; |
54 | BlockFrequencyInfo *const BFI; |
55 | |
56 | // If function entry will be always instrumented. |
57 | const bool InstrumentFuncEntry; |
58 | |
59 | // Find the root group of the G and compress the path from G to the root. |
60 | BBInfo *findAndCompressGroup(BBInfo *G) { |
61 | if (G->Group != G) |
62 | G->Group = findAndCompressGroup(G: static_cast<BBInfo *>(G->Group)); |
63 | return static_cast<BBInfo *>(G->Group); |
64 | } |
65 | |
66 | // Union BB1 and BB2 into the same group and return true. |
67 | // Returns false if BB1 and BB2 are already in the same group. |
68 | bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { |
69 | BBInfo *BB1G = findAndCompressGroup(G: &getBBInfo(BB: BB1)); |
70 | BBInfo *BB2G = findAndCompressGroup(G: &getBBInfo(BB: BB2)); |
71 | |
72 | if (BB1G == BB2G) |
73 | return false; |
74 | |
75 | // Make the smaller rank tree a direct child or the root of high rank tree. |
76 | if (BB1G->Rank < BB2G->Rank) |
77 | BB1G->Group = BB2G; |
78 | else { |
79 | BB2G->Group = BB1G; |
80 | // If the ranks are the same, increment root of one tree by one. |
81 | if (BB1G->Rank == BB2G->Rank) |
82 | BB1G->Rank++; |
83 | } |
84 | return true; |
85 | } |
86 | |
87 | void handleCoroSuspendEdge(Edge *E) { |
88 | // We must not add instrumentation to the BB representing the |
89 | // "suspend" path, else CoroSplit won't be able to lower |
90 | // llvm.coro.suspend to a tail call. We do want profiling info for |
91 | // the other branches (resume/destroy). So we do 2 things: |
92 | // 1. we prefer instrumenting those other edges by setting the weight |
93 | // of the "suspend" edge to max, and |
94 | // 2. we mark the edge as "Removed" to guarantee it is not considered |
95 | // for instrumentation. That could technically happen: |
96 | // (from test/Transforms/Coroutines/coro-split-musttail.ll) |
97 | // |
98 | // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) |
99 | // switch i8 %suspend, label %exit [ |
100 | // i8 0, label %await.ready |
101 | // i8 1, label %exit |
102 | // ] |
103 | if (!E->DestBB) |
104 | return; |
105 | assert(E->SrcBB); |
106 | if (llvm::isPresplitCoroSuspendExitEdge(Src: *E->SrcBB, Dest: *E->DestBB)) |
107 | E->Removed = true; |
108 | } |
109 | |
110 | // Traverse the CFG using a stack. Find all the edges and assign the weight. |
111 | // Edges with large weight will be put into MST first so they are less likely |
112 | // to be instrumented. |
113 | void buildEdges() { |
114 | LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n" ); |
115 | |
116 | BasicBlock *Entry = &(F.getEntryBlock()); |
117 | uint64_t EntryWeight = |
118 | (BFI != nullptr ? BFI->getEntryFreq().getFrequency() : 2); |
119 | // If we want to instrument the entry count, lower the weight to 0. |
120 | if (InstrumentFuncEntry) |
121 | EntryWeight = 0; |
122 | Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, |
123 | *ExitOutgoing = nullptr, *ExitIncoming = nullptr; |
124 | uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; |
125 | |
126 | // Add a fake edge to the entry. |
127 | EntryIncoming = &addEdge(Src: nullptr, Dest: Entry, W: EntryWeight); |
128 | LLVM_DEBUG(dbgs() << " Edge: from fake node to " << Entry->getName() |
129 | << " w = " << EntryWeight << "\n" ); |
130 | |
131 | // Special handling for single BB functions. |
132 | if (succ_empty(BB: Entry)) { |
133 | addEdge(Src: Entry, Dest: nullptr, W: EntryWeight); |
134 | return; |
135 | } |
136 | |
137 | static const uint32_t CriticalEdgeMultiplier = 1000; |
138 | |
139 | for (BasicBlock &BB : F) { |
140 | Instruction *TI = BB.getTerminator(); |
141 | uint64_t BBWeight = |
142 | (BFI != nullptr ? BFI->getBlockFreq(BB: &BB).getFrequency() : 2); |
143 | uint64_t Weight = 2; |
144 | if (int successors = TI->getNumSuccessors()) { |
145 | for (int i = 0; i != successors; ++i) { |
146 | BasicBlock *TargetBB = TI->getSuccessor(Idx: i); |
147 | bool Critical = isCriticalEdge(TI, SuccNum: i); |
148 | uint64_t scaleFactor = BBWeight; |
149 | if (Critical) { |
150 | if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) |
151 | scaleFactor *= CriticalEdgeMultiplier; |
152 | else |
153 | scaleFactor = UINT64_MAX; |
154 | } |
155 | if (BPI != nullptr) |
156 | Weight = BPI->getEdgeProbability(Src: &BB, Dst: TargetBB).scale(Num: scaleFactor); |
157 | if (Weight == 0) |
158 | Weight++; |
159 | auto *E = &addEdge(Src: &BB, Dest: TargetBB, W: Weight); |
160 | E->IsCritical = Critical; |
161 | handleCoroSuspendEdge(E); |
162 | LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to " |
163 | << TargetBB->getName() << " w=" << Weight << "\n" ); |
164 | |
165 | // Keep track of entry/exit edges: |
166 | if (&BB == Entry) { |
167 | if (Weight > MaxEntryOutWeight) { |
168 | MaxEntryOutWeight = Weight; |
169 | EntryOutgoing = E; |
170 | } |
171 | } |
172 | |
173 | auto *TargetTI = TargetBB->getTerminator(); |
174 | if (TargetTI && !TargetTI->getNumSuccessors()) { |
175 | if (Weight > MaxExitInWeight) { |
176 | MaxExitInWeight = Weight; |
177 | ExitIncoming = E; |
178 | } |
179 | } |
180 | } |
181 | } else { |
182 | ExitBlockFound = true; |
183 | Edge *ExitO = &addEdge(Src: &BB, Dest: nullptr, W: BBWeight); |
184 | if (BBWeight > MaxExitOutWeight) { |
185 | MaxExitOutWeight = BBWeight; |
186 | ExitOutgoing = ExitO; |
187 | } |
188 | LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to fake exit" |
189 | << " w = " << BBWeight << "\n" ); |
190 | } |
191 | } |
192 | |
193 | // Entry/exit edge adjustment heurisitic: |
194 | // prefer instrumenting entry edge over exit edge |
195 | // if possible. Those exit edges may never have a chance to be |
196 | // executed (for instance the program is an event handling loop) |
197 | // before the profile is asynchronously dumped. |
198 | // |
199 | // If EntryIncoming and ExitOutgoing has similar weight, make sure |
200 | // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing |
201 | // and ExitIncoming has similar weight, make sure ExitIncoming becomes |
202 | // the min-edge. |
203 | uint64_t EntryInWeight = EntryWeight; |
204 | |
205 | if (EntryInWeight >= MaxExitOutWeight && |
206 | EntryInWeight * 2 < MaxExitOutWeight * 3) { |
207 | EntryIncoming->Weight = MaxExitOutWeight; |
208 | ExitOutgoing->Weight = EntryInWeight + 1; |
209 | } |
210 | |
211 | if (MaxEntryOutWeight >= MaxExitInWeight && |
212 | MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { |
213 | EntryOutgoing->Weight = MaxExitInWeight; |
214 | ExitIncoming->Weight = MaxEntryOutWeight + 1; |
215 | } |
216 | } |
217 | |
218 | // Sort CFG edges based on its weight. |
219 | void sortEdgesByWeight() { |
220 | llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1, |
221 | const std::unique_ptr<Edge> &Edge2) { |
222 | return Edge1->Weight > Edge2->Weight; |
223 | }); |
224 | } |
225 | |
226 | // Traverse all the edges and compute the Minimum Weight Spanning Tree |
227 | // using union-find algorithm. |
228 | void computeMinimumSpanningTree() { |
229 | // First, put all the critical edge with landing-pad as the Dest to MST. |
230 | // This works around the insufficient support of critical edges split |
231 | // when destination BB is a landing pad. |
232 | for (auto &Ei : AllEdges) { |
233 | if (Ei->Removed) |
234 | continue; |
235 | if (Ei->IsCritical) { |
236 | if (Ei->DestBB && Ei->DestBB->isLandingPad()) { |
237 | if (unionGroups(BB1: Ei->SrcBB, BB2: Ei->DestBB)) |
238 | Ei->InMST = true; |
239 | } |
240 | } |
241 | } |
242 | |
243 | for (auto &Ei : AllEdges) { |
244 | if (Ei->Removed) |
245 | continue; |
246 | // If we detect infinite loops, force |
247 | // instrumenting the entry edge: |
248 | if (!ExitBlockFound && Ei->SrcBB == nullptr) |
249 | continue; |
250 | if (unionGroups(BB1: Ei->SrcBB, BB2: Ei->DestBB)) |
251 | Ei->InMST = true; |
252 | } |
253 | } |
254 | |
255 | public: |
256 | // Dump the Debug information about the instrumentation. |
257 | void dumpEdges(raw_ostream &OS, const Twine &Message) const { |
258 | if (!Message.str().empty()) |
259 | OS << Message << "\n" ; |
260 | OS << " Number of Basic Blocks: " << BBInfos.size() << "\n" ; |
261 | for (auto &BI : BBInfos) { |
262 | const BasicBlock *BB = BI.first; |
263 | OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " " |
264 | << BI.second->infoString() << "\n" ; |
265 | } |
266 | |
267 | OS << " Number of Edges: " << AllEdges.size() |
268 | << " (*: Instrument, C: CriticalEdge, -: Removed)\n" ; |
269 | uint32_t Count = 0; |
270 | for (auto &EI : AllEdges) |
271 | OS << " Edge " << Count++ << ": " << getBBInfo(BB: EI->SrcBB).Index << "-->" |
272 | << getBBInfo(BB: EI->DestBB).Index << EI->infoString() << "\n" ; |
273 | } |
274 | |
275 | // Add an edge to AllEdges with weight W. |
276 | Edge &addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W) { |
277 | uint32_t Index = BBInfos.size(); |
278 | auto Iter = BBInfos.end(); |
279 | bool Inserted; |
280 | std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(x&: Src, y: nullptr)); |
281 | if (Inserted) { |
282 | // Newly inserted, update the real info. |
283 | Iter->second = std::move(std::make_unique<BBInfo>(Index)); |
284 | Index++; |
285 | } |
286 | std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(x&: Dest, y: nullptr)); |
287 | if (Inserted) |
288 | // Newly inserted, update the real info. |
289 | Iter->second = std::move(std::make_unique<BBInfo>(Index)); |
290 | AllEdges.emplace_back(new Edge(Src, Dest, W)); |
291 | return *AllEdges.back(); |
292 | } |
293 | |
294 | CFGMST(Function &Func, bool InstrumentFuncEntry, |
295 | BranchProbabilityInfo *BPI = nullptr, |
296 | BlockFrequencyInfo *BFI = nullptr) |
297 | : F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) { |
298 | buildEdges(); |
299 | sortEdgesByWeight(); |
300 | computeMinimumSpanningTree(); |
301 | if (AllEdges.size() > 1 && InstrumentFuncEntry) |
302 | std::iter_swap(std::move(AllEdges.begin()), |
303 | std::move(AllEdges.begin() + AllEdges.size() - 1)); |
304 | } |
305 | |
306 | const std::vector<std::unique_ptr<Edge>> &allEdges() const { |
307 | return AllEdges; |
308 | } |
309 | |
310 | std::vector<std::unique_ptr<Edge>> &allEdges() { return AllEdges; } |
311 | |
312 | size_t numEdges() const { return AllEdges.size(); } |
313 | |
314 | size_t bbInfoSize() const { return BBInfos.size(); } |
315 | |
316 | // Give BB, return the auxiliary information. |
317 | BBInfo &getBBInfo(const BasicBlock *BB) const { |
318 | auto It = BBInfos.find(BB); |
319 | assert(It->second.get() != nullptr); |
320 | return *It->second.get(); |
321 | } |
322 | |
323 | // Give BB, return the auxiliary information if it's available. |
324 | BBInfo *findBBInfo(const BasicBlock *BB) const { |
325 | auto It = BBInfos.find(BB); |
326 | if (It == BBInfos.end()) |
327 | return nullptr; |
328 | return It->second.get(); |
329 | } |
330 | }; |
331 | |
332 | } // end namespace llvm |
333 | |
334 | #undef DEBUG_TYPE // "cfgmst" |
335 | |
336 | #endif // LLVM_TRANSFORMS_INSTRUMENTATION_CFGMST_H |
337 | |