BlockFrequencyInfoImpl.cpp source code [llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp]

1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loops should be simplified before this analysis.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
14	#include "llvm/ADT/APInt.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/SCCIterator.h"
17	#include "llvm/ADT/SmallString.h"
18	#include "llvm/Config/llvm-config.h"
19	#include "llvm/IR/Function.h"
20	#include "llvm/Support/BlockFrequency.h"
21	#include "llvm/Support/BranchProbability.h"
22	#include "llvm/Support/Compiler.h"
23	#include "llvm/Support/Debug.h"
24	#include "llvm/Support/MathExtras.h"
25	#include "llvm/Support/ScaledNumber.h"
26	#include "llvm/Support/raw_ostream.h"
27	#include <algorithm>
28	#include <cassert>
29	#include <cstddef>
30	#include <cstdint>
31	#include <iterator>
32	#include <list>
33	#include <numeric>
34	#include <optional>
35	#include <utility>
36	#include <vector>
37
38	using namespace llvm;
39	using namespace llvm::bfi_detail;
40
41	#define DEBUG_TYPE "block-freq"
42
43	namespace llvm {
44	cl::opt<bool> CheckBFIUnknownBlockQueries(
45	"check-bfi-unknown-block-queries",
46	cl::init(Val: false), cl::Hidden,
47	cl::desc ("Check if block frequency is queried for an unknown block "
48	"for debugging missed BFI updates"));
49
50	cl::opt<bool> UseIterativeBFIInference(
51	"use-iterative-bfi-inference", cl::Hidden,
52	cl::desc ("Apply an iterative post-processing to infer correct BFI counts"));
53
54	cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock(
55	"iterative-bfi-max-iterations-per-block", cl::init(Val: `1000`), cl::Hidden,
56	cl::desc ("Iterative inference: maximum number of update iterations "
57	"per block"));
58
59	cl::opt<double> IterativeBFIPrecision(
60	"iterative-bfi-precision", cl::init(Val: `1e-12`), cl::Hidden,
61	cl::desc ("Iterative inference: delta convergence precision; smaller values "
62	"typically lead to better results at the cost of worsen runtime"));
63	} // namespace llvm
64
65	ScaledNumber<uint64_t> BlockMass::toScaled() const {
66	if (isFull())
67	return ScaledNumber<uint64_t>(`1`, `0`);
68	return ScaledNumber<uint64_t>(getMass() + `1`, -`64`);
69	}
70
71	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
72	LLVM_DUMP_METHOD void BlockMass::dump() const { print(OS&: dbgs()); }
73	#endif
74
75	static char getHexDigit(int N) {
76	assert(N < `16`);
77	if (N < `10`)
78	return `'0'` + N;
79	return `'a'` + N - `10`;
80	}
81
82	raw_ostream &BlockMass::print(raw_ostream &OS) const {
83	for (int Digits = `0`; Digits < `16`; ++Digits)
84	OS << getHexDigit(N: Mass >> (`60` - Digits * `4`) & `0xf`);
85	return OS;
86	}
87
88	namespace {
89
90	using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
91	using Distribution = BlockFrequencyInfoImplBase::Distribution;
92	using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList;
93	using Scaled64 = BlockFrequencyInfoImplBase::Scaled64;
94	using LoopData = BlockFrequencyInfoImplBase::LoopData;
95	using Weight = BlockFrequencyInfoImplBase::Weight;
96	using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
97
98	/// Dithering mass distributer.
99	///
100	/// This class splits up a single mass into portions by weight, dithering to
101	/// spread out error. No mass is lost. The dithering precision depends on the
102	/// precision of the product of \a BlockMass and \a BranchProbability.
103	///
104	/// The distribution algorithm follows.
105	///
106	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
107	/// mass to distribute in \a RemMass.
108	///
109	/// 2. For each portion:
110	///
111	/// 1. Construct a branch probability, P, as the portion's weight divided
112	/// by the current value of \a RemWeight.
113	/// 2. Calculate the portion's mass as \a RemMass times P.
114	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
115	/// the current portion's weight and mass.
116	struct DitheringDistributer {
117	uint32_t RemWeight;
118	BlockMass RemMass;
119
120	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
121
122	BlockMass takeMass(uint32_t Weight);
123	};
124
125	} // end anonymous namespace
126
127	DitheringDistributer::DitheringDistributer(Distribution &Dist,
128	const BlockMass &Mass) {
129	Dist.normalize();
130	RemWeight = Dist.Total;
131	RemMass = Mass;
132	}
133
134	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
135	assert(Weight && "invalid weight");
136	assert(Weight <= RemWeight);
137	BlockMass Mass = RemMass * BranchProbability (Weight, RemWeight);
138
139	// Decrement totals (dither).
140	RemWeight -= Weight;
141	RemMass -= Mass;
142	return Mass;
143	}
144
145	void Distribution::add(const BlockNode &Node, uint64_t Amount,
146	Weight::DistType Type) {
147	assert(Amount && "invalid weight of 0");
148	uint64_t NewTotal = Total + Amount;
149
150	// Check for overflow. It should be impossible to overflow twice.
151	bool IsOverflow = NewTotal < Total;
152	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
153	DidOverflow \|= IsOverflow;
154
155	// Update the total.
156	Total = NewTotal;
157
158	// Save the weight.
159	Weights.push_back(Elt: Weight (Type, Node, Amount));
160	}
161
162	static void combineWeight(Weight &W, const Weight &OtherW) {
163	assert(OtherW.TargetNode.isValid());
164	if (!W.Amount) {
165	W = OtherW;
166	return;
167	}
168	assert(W.Type == OtherW.Type);
169	assert(W.TargetNode == OtherW.TargetNode);
170	assert(OtherW.Amount && "Expected non-zero weight");
171	if (W.Amount > W.Amount + OtherW.Amount)
172	// Saturate on overflow.
173	W.Amount = UINT64_MAX;
174	else
175	W.Amount += OtherW.Amount;
176	}
177
178	static void combineWeightsBySorting(WeightList &Weights) {
179	// Sort so edges to the same node are adjacent.
180	llvm::sort(C&: Weights, Comp: [](const Weight &L, const Weight &R) {
181	return L.TargetNode < R.TargetNode;
182	});
183
184	// Combine adjacent edges.
185	WeightList::iterator O = Weights.begin();
186	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
187	++O, (I = L)) {
188	O = I;
189
190	// Find the adjacent weights to the same node.
191	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
192	combineWeight(W&: O, OtherW: L);
193	}
194
195	// Erase extra entries.
196	Weights.erase(CS: O, CE: Weights.end());
197	}
198
199	static void combineWeightsByHashing(WeightList &Weights) {
200	// Collect weights into a DenseMap.
201	using HashTable = DenseMap<BlockNode::IndexType, Weight>;
202
203	HashTable Combined(NextPowerOf2(A: `2` * Weights.size()));
204	for (const Weight &W : Weights)
205	combineWeight(W&: Combined [W.TargetNode.Index], OtherW: W);
206
207	// Check whether anything changed.
208	if (Weights.size() == Combined.size())
209	return;
210
211	// Fill in the new weights.
212	Weights.clear();
213	Weights.reserve(N: Combined.size());
214	for (const auto &I : Combined)
215	Weights.push_back(Elt: I.second);
216	}
217
218	static void combineWeights(WeightList &Weights) {
219	// Use a hash table for many successors to keep this linear.
220	if (Weights.size() > `128`) {
221	combineWeightsByHashing(Weights);
222	return;
223	}
224
225	combineWeightsBySorting(Weights);
226	}
227
228	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
229	assert(Shift >= `0`);
230	assert(Shift < `64`);
231	if (!Shift)
232	return N;
233	return (N >> Shift) + (UINT64_C(`1`) & N >> (Shift - `1`));
234	}
235
236	void Distribution::normalize() {
237	// Early exit for termination nodes.
238	if (Weights.empty())
239	return;
240
241	// Only bother if there are multiple successors.
242	if (Weights.size() > `1`)
243	combineWeights(Weights);
244
245	// Early exit when combined into a single successor.
246	if (Weights.size() == `1`) {
247	Total = `1`;
248	Weights.front().Amount = `1`;
249	return;
250	}
251
252	// Determine how much to shift right so that the total fits into 32-bits.
253	//
254	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
255	// for each weight can cause a 32-bit overflow.
256	int Shift = `0`;
257	if (DidOverflow)
258	Shift = `33`;
259	else if (Total > UINT32_MAX)
260	Shift = `33` - llvm::countl_zero(Val: Total);
261
262	// Early exit if nothing needs to be scaled.
263	if (!Shift) {
264	// If we didn't overflow then combineWeights() shouldn't have changed the
265	// sum of the weights, but let's double-check.
266	assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(`0`),
267	[](uint64_t Sum, const Weight &W) {
268	return Sum + W.Amount;
269	}) &&
270	"Expected total to be correct");
271	return;
272	}
273
274	// Recompute the total through accumulation (rather than shifting it) so that
275	// it's accurate after shifting and any changes combineWeights() made above.
276	Total = `0`;
277
278	// Sum the weights to each node and shift right if necessary.
279	for (Weight &W : Weights) {
280	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
281	// can round here without concern about overflow.
282	assert(W.TargetNode.isValid());
283	W.Amount = std::max(UINT64_C(`1`), b: shiftRightAndRound(N: W.Amount, Shift));
284	assert(W.Amount <= UINT32_MAX);
285
286	// Update the total.
287	Total += W.Amount;
288	}
289	assert(Total <= UINT32_MAX);
290	}
291
292	void BlockFrequencyInfoImplBase::clear() {
293	// Swap with a default-constructed std::vector, since std::vector<>::clear()
294	// does not actually clear heap storage.
295	std::vector<FrequencyData>().swap(x&: Freqs);
296	IsIrrLoopHeader.clear();
297	std::vector<WorkingData>().swap(x&: Working);
298	Loops.clear();
299	}
300
301	/// Clear all memory not needed downstream.
302	///
303	/// Releases all memory not used downstream. In particular, saves Freqs.
304	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
305	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
306	SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
307	BFI.clear();
308	BFI.Freqs = std::move(SavedFreqs);
309	BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
310	}
311
312	bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
313	const LoopData *OuterLoop,
314	const BlockNode &Pred,
315	const BlockNode &Succ,
316	uint64_t Weight) {
317	if (!Weight)
318	Weight = `1`;
319
320	auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
321	return OuterLoop && OuterLoop->isHeader(Node);
322	};
323
324	BlockNode Resolved = Working [Succ.Index].getResolvedNode();
325
326	#ifndef NDEBUG
327	auto debugSuccessor = [&](const char *Type) {
328	dbgs() << " =>"
329	<< " [" << Type << "] weight = " << Weight;
330	if (!isLoopHeader (Resolved))
331	dbgs() << ", succ = " << getBlockName(Node: Succ);
332	if (Resolved != Succ)
333	dbgs() << ", resolved = " << getBlockName(Node: Resolved);
334	dbgs() << "\n";
335	};
336	(void)debugSuccessor;
337	#endif
338
339	if (isLoopHeader (Resolved)) {
340	LLVM_DEBUG(debugSuccessor("backedge"));
341	Dist.addBackedge(Node: Resolved, Amount: Weight);
342	return true;
343	}
344
345	if (Working [Resolved.Index].getContainingLoop() != OuterLoop) {
346	LLVM_DEBUG(debugSuccessor(" exit "));
347	Dist.addExit(Node: Resolved, Amount: Weight);
348	return true;
349	}
350
351	if (Resolved < Pred) {
352	if (!isLoopHeader (Pred)) {
353	// If OuterLoop is an irreducible loop, we can't actually handle this.
354	assert((!OuterLoop \|\| !OuterLoop->isIrreducible()) &&
355	"unhandled irreducible control flow");
356
357	// Irreducible backedge. Abort.
358	LLVM_DEBUG(debugSuccessor("abort!!!"));
359	return false;
360	}
361
362	// If "Pred" is a loop header, then this isn't really a backedge; rather,
363	// OuterLoop must be irreducible. These false backedges can come only from
364	// secondary loop headers.
365	assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
366	"unhandled irreducible control flow");
367	}
368
369	LLVM_DEBUG(debugSuccessor(" local "));
370	Dist.addLocal(Node: Resolved, Amount: Weight);
371	return true;
372	}
373
374	bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
375	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
376	// Copy the exit map into Dist.
377	for (const auto &I : Loop.Exits)
378	if (!addToDist(Dist, OuterLoop, Pred: Loop.getHeader(), Succ: I.first,
379	Weight: I.second.getMass()))
380	// Irreducible backedge.
381	return false;
382
383	return true;
384	}
385
386	/// Compute the loop scale for a loop.
387	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
388	// Compute loop scale.
389	LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
390
391	// Infinite loops need special handling. If we give the back edge an infinite
392	// mass, they may saturate all the other scales in the function down to 1,
393	// making all the other region temperatures look exactly the same. Choose an
394	// arbitrary scale to avoid these issues.
395	//
396	// FIXME: An alternate way would be to select a symbolic scale which is later
397	// replaced to be the maximum of all computed scales plus 1. This would
398	// appropriately describe the loop as having a large scale, without skewing
399	// the final frequency computation.
400	const Scaled64 InfiniteLoopScale(`1`, `12`);
401
402	// LoopScale == 1 / ExitMass
403	// ExitMass == HeadMass - BackedgeMass
404	BlockMass TotalBackedgeMass;
405	for (auto &Mass : Loop.BackedgeMass)
406	TotalBackedgeMass += Mass;
407	BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
408
409	// Block scale stores the inverse of the scale. If this is an infinite loop,
410	// its exit mass will be zero. In this case, use an arbitrary scale for the
411	// loop scale.
412	Loop.Scale =
413	ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
414
415	LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
416	<< BlockMass::getFull() << " - " << TotalBackedgeMass
417	<< ")\n"
418	<< " - scale = " << Loop.Scale << "\n");
419	}
420
421	/// Package up a loop.
422	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
423	LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
424
425	// Clear the subloop exits to prevent quadratic memory usage.
426	for (const BlockNode &M : Loop.Nodes) {
427	if (auto *Loop = Working [M.Index].getPackagedLoop())
428	Loop->Exits.clear();
429	LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
430	}
431	Loop.IsPackaged = true;
432	}
433
434	#ifndef NDEBUG
435	static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
436	const DitheringDistributer &D, const BlockNode &T,
437	const BlockMass &M, const char *Desc) {
438	dbgs() << " => assign " << M << " (" << D.RemMass << ")";
439	if (Desc)
440	dbgs() << " [" << Desc << "]";
441	if (T.isValid())
442	dbgs() << " to " << BFI.getBlockName(Node: T);
443	dbgs() << "\n";
444	}
445	#endif
446
447	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
448	LoopData *OuterLoop,
449	Distribution &Dist) {
450	BlockMass Mass = Working [Source.Index].getMass();
451	LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n");
452
453	// Distribute mass to successors as laid out in Dist.
454	DitheringDistributer D(Dist, Mass);
455
456	for (const Weight &W : Dist.Weights) {
457	// Check for a local edge (non-backedge and non-exit).
458	BlockMass Taken = D.takeMass(Weight: W.Amount);
459	if (W.Type == Weight::Local) {
460	Working [W.TargetNode.Index].getMass() += Taken;
461	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
462	continue;
463	}
464
465	// Backedges and exits only make sense if we're processing a loop.
466	assert(OuterLoop && "backedge or exit outside of loop");
467
468	// Check for a backedge.
469	if (W.Type == Weight::Backedge) {
470	OuterLoop->BackedgeMass [OuterLoop->getHeaderIndex(B: W.TargetNode)] += Taken;
471	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
472	continue;
473	}
474
475	// This must be an exit.
476	assert(W.Type == Weight::Exit);
477	OuterLoop->Exits.push_back(Elt: std::make_pair(x: W.TargetNode, y&: Taken));
478	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
479	}
480	}
481
482	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
483	const Scaled64 &Min, const Scaled64 &Max) {
484	// Scale the Factor to a size that creates integers. If possible scale
485	// integers so that Max == UINT64_MAX so that they can be best differentiated.
486	// Is is possible that the range between min and max cannot be accurately
487	// represented in a 64bit integer without either loosing precision for small
488	// values (so small unequal numbers all map to 1) or saturaturing big numbers
489	// loosing precision for big numbers (so unequal big numbers may map to
490	// UINT64_MAX). We choose to loose precision for small numbers.
491	const unsigned MaxBits = sizeof(Scaled64::DigitsType) * CHAR_BIT;
492	// Users often add up multiple BlockFrequency values or multiply them with
493	// things like instruction costs. Leave some room to avoid saturating
494	// operations reaching UIN64_MAX too early.
495	const unsigned Slack = `10`;
496	Scaled64 ScalingFactor = Scaled64 (`1`, MaxBits - Slack) / Max;
497
498	// Translate the floats to integers.
499	LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
500	<< ", factor = " << ScalingFactor << "\n");
501	(void)Min;
502	for (size_t Index = `0`; Index < BFI.Freqs.size(); ++Index) {
503	Scaled64 Scaled = BFI.Freqs [Index].Scaled * ScalingFactor;
504	BFI.Freqs [Index].Integer = std::max(UINT64_C(`1`), b: Scaled.toInt<uint64_t>());
505	LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
506	<< BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
507	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
508	}
509	}
510
511	/// Unwrap a loop package.
512	///
513	/// Visits all the members of a loop, adjusting their BlockData according to
514	/// the loop's pseudo-node.
515	static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
516	LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
517	<< ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
518	<< "\n");
519	Loop.Scale *= Loop.Mass.toScaled();
520	Loop.IsPackaged = false;
521	LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
522
523	// Propagate the head scale through the loop. Since members are visited in
524	// RPO, the head scale will be updated by the loop scale first, and then the
525	// final head scale will be used for updated the rest of the members.
526	for (const BlockNode &N : Loop.Nodes) {
527	const auto &Working = BFI.Working [N.Index];
528	Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
529	: BFI.Freqs [N.Index].Scaled;
530	Scaled64 New = Loop.Scale * F;
531	LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
532	<< New << "\n");
533	F = New;
534	}
535	}
536
537	void BlockFrequencyInfoImplBase::unwrapLoops() {
538	// Set initial frequencies from loop-local masses.
539	for (size_t Index = `0`; Index < Working.size(); ++Index)
540	Freqs [Index].Scaled = Working [Index].Mass.toScaled();
541
542	for (LoopData &Loop : Loops)
543	unwrapLoop(BFI&: *this, Loop);
544	}
545
546	void BlockFrequencyInfoImplBase::finalizeMetrics() {
547	// Unwrap loop packages in reverse post-order, tracking min and max
548	// frequencies.
549	auto Min = Scaled64::getLargest();
550	auto Max = Scaled64::getZero();
551	for (size_t Index = `0`; Index < Working.size(); ++Index) {
552	// Update min/max scale.
553	Min = std::min(a: Min, b: Freqs [Index].Scaled);
554	Max = std::max(a: Max, b: Freqs [Index].Scaled);
555	}
556
557	// Convert to integers.
558	convertFloatingToInteger(BFI&: *this, Min, Max);
559
560	// Clean up data structures.
561	cleanup(BFI&: *this);
562
563	// Print out the final stats.
564	LLVM_DEBUG(dump());
565	}
566
567	BlockFrequency
568	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
569	if (!Node.isValid()) {
570	#ifndef NDEBUG
571	if (CheckBFIUnknownBlockQueries) {
572	SmallString<`256`> Msg;
573	raw_svector_ostream OS(Msg);
574	OS << "*** Detected BFI query for unknown block " << getBlockName(Node);
575	report_fatal_error(reason: OS.str());
576	}
577	#endif
578	return BlockFrequency (`0`);
579	}
580	return BlockFrequency (Freqs [Node.Index].Integer);
581	}
582
583	std::optional<uint64_t>
584	BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
585	const BlockNode &Node,
586	bool AllowSynthetic) const {
587	return getProfileCountFromFreq(F, Freq: getBlockFreq(Node), AllowSynthetic);
588	}
589
590	std::optional<uint64_t> BlockFrequencyInfoImplBase::getProfileCountFromFreq(
591	const Function &F, BlockFrequency Freq, bool AllowSynthetic) const {
592	auto EntryCount = F.getEntryCount(AllowSynthetic);
593	if (!EntryCount)
594	return std::nullopt;
595	// Use 128 bit APInt to do the arithmetic to avoid overflow.
596	APInt BlockCount(`128`, EntryCount ->getCount());
597	APInt BlockFreq(`128`, Freq.getFrequency());
598	APInt EntryFreq(`128`, getEntryFreq().getFrequency());
599	BlockCount *= BlockFreq;
600	// Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
601	// lshr by 1 gives EntryFreq/2.
602	BlockCount = (BlockCount + EntryFreq.lshr(shiftAmt: `1`)).udiv(RHS: EntryFreq);
603	return BlockCount.getLimitedValue();
604	}
605
606	bool
607	BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
608	if (!Node.isValid())
609	return false;
610	return IsIrrLoopHeader.test(Idx: Node.Index);
611	}
612
613	Scaled64
614	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
615	if (!Node.isValid())
616	return Scaled64::getZero();
617	return Freqs [Node.Index].Scaled;
618	}
619
620	void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
621	BlockFrequency Freq) {
622	assert(Node.isValid() && "Expected valid node");
623	assert(Node.Index < Freqs.size() && "Expected legal index");
624	Freqs [Node.Index].Integer = Freq.getFrequency();
625	}
626
627	std::string
628	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
629	return {};
630	}
631
632	std::string
633	BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
634	return getBlockName(Node: Loop.getHeader()) + (Loop.isIrreducible() ? "*" : "");
635	}
636
637	void llvm::printBlockFreqImpl(raw_ostream &OS, BlockFrequency EntryFreq,
638	BlockFrequency Freq) {
639	if (Freq == BlockFrequency (`0`)) {
640	OS << "0";
641	return;
642	}
643	if (EntryFreq == BlockFrequency (`0`)) {
644	OS << "<invalid BFI>";
645	return;
646	}
647	Scaled64 Block(Freq.getFrequency(), `0`);
648	Scaled64 Entry(EntryFreq.getFrequency(), `0`);
649	OS << Block / Entry;
650	}
651
652	void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
653	Start = OuterLoop.getHeader();
654	Nodes.reserve(n: OuterLoop.Nodes.size());
655	for (auto N : OuterLoop.Nodes)
656	addNode(Node: N);
657	indexNodes();
658	}
659
660	void IrreducibleGraph::addNodesInFunction() {
661	Start = `0`;
662	for (uint32_t Index = `0`; Index < BFI.Working.size(); ++Index)
663	if (!BFI.Working [Index].isPackaged())
664	addNode(Node: Index);
665	indexNodes();
666	}
667
668	void IrreducibleGraph::indexNodes() {
669	for (auto &I : Nodes)
670	Lookup [I.Node.Index] = &I;
671	}
672
673	void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
674	const BFIBase::LoopData *OuterLoop) {
675	if (OuterLoop && OuterLoop->isHeader(Node: Succ))
676	return;
677	auto L = Lookup.find(Val: Succ.Index);
678	if (L == Lookup.end())
679	return;
680	IrrNode &SuccIrr = *L ->second;
681	Irr.Edges.push_back(x: &SuccIrr);
682	SuccIrr.Edges.push_front(x: &Irr);
683	++SuccIrr.NumIn;
684	}
685
686	namespace llvm {
687
688	template <> struct GraphTraits<IrreducibleGraph> {
689	using GraphT = bfi_detail::IrreducibleGraph;
690	using NodeRef = const GraphT::IrrNode *;
691	using ChildIteratorType = GraphT::IrrNode::iterator;
692
693	static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
694	static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
695	static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
696	};
697
698	} // end namespace llvm
699
700	/// Find extra irreducible headers.
701	///
702	/// Find entry blocks and other blocks with backedges, which exist when \c G
703	/// contains irreducible sub-SCCs.
704	static void findIrreducibleHeaders(
705	const BlockFrequencyInfoImplBase &BFI,
706	const IrreducibleGraph &G,
707	const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
708	LoopData::NodeList &Headers, LoopData::NodeList &Others) {
709	// Map from nodes in the SCC to whether it's an entry block.
710	SmallDenseMap<const IrreducibleGraph::IrrNode , bool*, `8`> InSCC;
711
712	// InSCC also acts the set of nodes in the graph. Seed it.
713	for (const auto *I : SCC)
714	InSCC [I] = false;
715
716	for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
717	auto &Irr = *I ->first;
718	for (const auto *P : make_range(x: Irr.pred_begin(), y: Irr.pred_end())) {
719	if (InSCC.count(Val: P))
720	continue;
721
722	// This is an entry block.
723	I ->second = true;
724	Headers.push_back(Elt: Irr.Node);
725	LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)
726	<< "\n");
727	break;
728	}
729	}
730	assert(Headers.size() >= `2` &&
731	"Expected irreducible CFG; -loop-info is likely invalid");
732	if (Headers.size() == InSCC.size()) {
733	// Every block is a header.
734	llvm::sort(C&: Headers);
735	return;
736	}
737
738	// Look for extra headers from irreducible sub-SCCs.
739	for (const auto &I : InSCC) {
740	// Entry blocks are already headers.
741	if (I.second)
742	continue;
743
744	auto &Irr = *I.first;
745	for (const auto *P : make_range(x: Irr.pred_begin(), y: Irr.pred_end())) {
746	// Skip forward edges.
747	if (P->Node < Irr.Node)
748	continue;
749
750	// Skip predecessors from entry blocks. These can have inverted
751	// ordering.
752	if (InSCC.lookup(Val: P))
753	continue;
754
755	// Store the extra header.
756	Headers.push_back(Elt: Irr.Node);
757	LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)
758	<< "\n");
759	break;
760	}
761	if (Headers.back() == Irr.Node)
762	// Added this as a header.
763	continue;
764
765	// This is not a header.
766	Others.push_back(Elt: Irr.Node);
767	LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
768	}
769	llvm::sort(C&: Headers);
770	llvm::sort(C&: Others);
771	}
772
773	static void createIrreducibleLoop(
774	BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
775	LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
776	const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
777	// Translate the SCC into RPO.
778	LLVM_DEBUG(dbgs() << " - found-scc\n");
779
780	LoopData::NodeList Headers;
781	LoopData::NodeList Others;
782	findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
783
784	auto Loop = BFI.Loops.emplace(position: Insert, args&: OuterLoop, args: Headers.begin(),
785	args: Headers.end(), args: Others.begin(), args: Others.end());
786
787	// Update loop hierarchy.
788	for (const auto &N : Loop ->Nodes)
789	if (BFI.Working [N.Index].isLoopHeader())
790	BFI.Working [N.Index].Loop->Parent = &*Loop;
791	else
792	BFI.Working [N.Index].Loop = &*Loop;
793	}
794
795	iterator_range<std::list<LoopData>::iterator>
796	BlockFrequencyInfoImplBase::analyzeIrreducible(
797	const IrreducibleGraph &G, LoopData *OuterLoop,
798	std::list<LoopData>::iterator Insert) {
799	assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
800	auto Prev = OuterLoop ? std::prev(x: Insert) : Loops.end();
801
802	for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
803	if (I ->size() < `2`)
804	continue;
805
806	// Translate the SCC into RPO.
807	createIrreducibleLoop(BFI&: *this, G, OuterLoop, Insert, SCC: *I);
808	}
809
810	if (OuterLoop)
811	return make_range(x: std::next(x: Prev), y: Insert);
812	return make_range(x: Loops.begin(), y: Insert);
813	}
814
815	void
816	BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
817	OuterLoop.Exits.clear();
818	for (auto &Mass : OuterLoop.BackedgeMass)
819	Mass = BlockMass::getEmpty();
820	auto O = OuterLoop.Nodes.begin() + `1`;
821	for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
822	if (!Working [I->Index].isPackaged())
823	O++ = I;
824	OuterLoop.Nodes.erase(CS: O, CE: OuterLoop.Nodes.end());
825	}
826
827	void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
828	assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
829
830	// Since the loop has more than one header block, the mass flowing back into
831	// each header will be different. Adjust the mass in each header loop to
832	// reflect the masses flowing through back edges.
833	//
834	// To do this, we distribute the initial mass using the backedge masses
835	// as weights for the distribution.
836	BlockMass LoopMass = BlockMass::getFull();
837	Distribution Dist;
838
839	LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
840	for (uint32_t H = `0`; H < Loop.NumHeaders; ++H) {
841	auto &HeaderNode = Loop.Nodes [H];
842	auto &BackedgeMass = Loop.BackedgeMass [Loop.getHeaderIndex(B: HeaderNode)];
843	LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
844	<< getBlockName(HeaderNode) << ": " << BackedgeMass
845	<< "\n");
846	if (BackedgeMass.getMass() > `0`)
847	Dist.addLocal(Node: HeaderNode, Amount: BackedgeMass.getMass());
848	else
849	LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
850	}
851
852	DitheringDistributer D(Dist, LoopMass);
853
854	LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
855	<< " to headers using above weights\n");
856	for (const Weight &W : Dist.Weights) {
857	BlockMass Taken = D.takeMass(Weight: W.Amount);
858	assert(W.Type == Weight::Local && "all weights should be local");
859	Working [W.TargetNode.Index].getMass() = Taken;
860	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
861	}
862	}
863
864	void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
865	BlockMass LoopMass = BlockMass::getFull();
866	DitheringDistributer D(Dist, LoopMass);
867	for (const Weight &W : Dist.Weights) {
868	BlockMass Taken = D.takeMass(Weight: W.Amount);
869	assert(W.Type == Weight::Local && "all weights should be local");
870	Working [W.TargetNode.Index].getMass() = Taken;
871	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
872	}
873	}
874

source code of llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp