BlockCoverageInference.cpp source code [llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp]

1	//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Our algorithm works by first identifying a subset of nodes that must always
10	// be instrumented. We call these nodes ambiguous because knowing the coverage
11	// of all remaining nodes is not enough to infer their coverage status.
12	//
13	// In general a node v is ambiguous if there exists two entry-to-terminal paths
14	// P_1 and P_2 such that:
15	// 1. v not in P_1 but P_1 visits a predecessor of v, and
16	// 2. v not in P_2 but P_2 visits a successor of v.
17	//
18	// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
19	// coverage from the coverage of its predecessors, or if condition 2 fails, we
20	// can infer v’s coverage from the coverage of its successors.
21	//
22	// Sadly, there are example CFGs where it is not possible to infer all nodes
23	// from the ambiguous nodes alone. Our algorithm selects a minimum number of
24	// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
25	//
26	// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
27	//
28	//===----------------------------------------------------------------------===//
29
30	#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
31	#include "llvm/ADT/DepthFirstIterator.h"
32	#include "llvm/ADT/Statistic.h"
33	#include "llvm/Support/CRC.h"
34	#include "llvm/Support/Debug.h"
35	#include "llvm/Support/GraphWriter.h"
36	#include "llvm/Support/raw_ostream.h"
37	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38
39	using namespace llvm;
40
41	#define DEBUG_TYPE "pgo-block-coverage"
42
43	STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
44	STATISTIC(NumIneligibleFunctions,
45	"Number of functions for which BCI cannot run on");
46	STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
47	STATISTIC(NumInstrumentedBlocks,
48	"Number of basic blocks instrumented for coverage");
49
50	BlockCoverageInference::BlockCoverageInference(const Function &F,
51	bool ForceInstrumentEntry)
52	: F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
53	findDependencies();
54	assert(!ForceInstrumentEntry \|\| shouldInstrumentBlock(F.getEntryBlock()));
55
56	++NumFunctions;
57	for (auto &BB : F) {
58	++NumBlocks;
59	if (shouldInstrumentBlock(BB))
60	++NumInstrumentedBlocks;
61	}
62	}
63
64	BlockCoverageInference::BlockSet
65	BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
66	assert(BB.getParent() == &F);
67	BlockSet Dependencies;
68	auto It = PredecessorDependencies.find(Val: &BB);
69	if (It != PredecessorDependencies.end())
70	Dependencies.set_union(It ->second);
71	It = SuccessorDependencies.find(Val: &BB);
72	if (It != SuccessorDependencies.end())
73	Dependencies.set_union(It ->second);
74	return Dependencies;
75	}
76
77	uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
78	JamCRC JC;
79	uint64_t Index = `0`;
80	for (auto &BB : F) {
81	if (shouldInstrumentBlock(BB)) {
82	uint8_t Data[`8`];
83	support::endian::write64le(P: Data, V: Index);
84	JC.update(Data);
85	}
86	Index++;
87	}
88	return JC.getCRC();
89	}
90
91	bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
92	assert(BB.getParent() == &F);
93	auto It = PredecessorDependencies.find(Val: &BB);
94	if (It != PredecessorDependencies.end() && It ->second.size())
95	return false;
96	It = SuccessorDependencies.find(Val: &BB);
97	if (It != SuccessorDependencies.end() && It ->second.size())
98	return false;
99	return true;
100	}
101
102	void BlockCoverageInference::findDependencies() {
103	assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
104	// Empirical analysis shows that this algorithm finishes within 5 seconds for
105	// functions with fewer than 1.5K blocks.
106	if (F.hasFnAttribute(Attribute::NoReturn) \|\| F.size() > `1500`) {
107	++NumIneligibleFunctions;
108	return;
109	}
110
111	SmallVector<const BasicBlock *, `4`> TerminalBlocks;
112	for (auto &BB : F)
113	if (succ_empty(BB: &BB))
114	TerminalBlocks.push_back(Elt: &BB);
115
116	// Traverse the CFG backwards from the terminal blocks to make sure every
117	// block can reach some terminal block. Otherwise this algorithm will not work
118	// and we must fall back to instrumenting every block.
119	df_iterator_default_set<const BasicBlock *> Visited;
120	for (auto *BB : TerminalBlocks)
121	for (auto *N : inverse_depth_first_ext(G: BB, S&: Visited))
122	(void)N;
123	if (F.size() != Visited.size()) {
124	++NumIneligibleFunctions;
125	return;
126	}
127
128	// The current implementation for computing `PredecessorDependencies` and
129	// `SuccessorDependencies` runs in quadratic time with respect to the number
130	// of basic blocks. While we do have a more complicated linear time algorithm
131	// in https://arxiv.org/abs/2208.13907 we do not know if it will give a
132	// significant speedup in practice given that most functions tend to be
133	// relatively small in size for intended use cases.
134	auto &EntryBlock = F.getEntryBlock();
135	for (auto &BB : F) {
136	// The set of blocks that are reachable while avoiding BB.
137	BlockSet ReachableFromEntry, ReachableFromTerminal;
138	getReachableAvoiding(Start: EntryBlock, Avoid: BB, /IsForward=/true,
139	Reachable&: ReachableFromEntry);
140	for (auto *TerminalBlock : TerminalBlocks)
141	getReachableAvoiding(Start: TerminalBlock, Avoid: BB, /IsForward=/*false,
142	Reachable&: ReachableFromTerminal);
143
144	auto Preds = predecessors(BB: &BB);
145	bool HasSuperReachablePred = llvm::any_of(Range&: Preds, P: [&](auto *Pred) {
146	return ReachableFromEntry.count(key: Pred) &&
147	ReachableFromTerminal.count(key: Pred);
148	});
149	if (!HasSuperReachablePred)
150	for (auto *Pred : Preds)
151	if (ReachableFromEntry.count(key: Pred))
152	PredecessorDependencies [&BB].insert(X: Pred);
153
154	auto Succs = successors(BB: &BB);
155	bool HasSuperReachableSucc = llvm::any_of(Range&: Succs, P: [&](auto *Succ) {
156	return ReachableFromEntry.count(key: Succ) &&
157	ReachableFromTerminal.count(key: Succ);
158	});
159	if (!HasSuperReachableSucc)
160	for (auto *Succ : Succs)
161	if (ReachableFromTerminal.count(key: Succ))
162	SuccessorDependencies [&BB].insert(X: Succ);
163	}
164
165	if (ForceInstrumentEntry) {
166	// Force the entry block to be instrumented by clearing the blocks it can
167	// infer coverage from.
168	PredecessorDependencies [&EntryBlock].clear();
169	SuccessorDependencies [&EntryBlock].clear();
170	}
171
172	// Construct a graph where blocks are connected if there is a mutual
173	// dependency between them. This graph has a special property that it contains
174	// only paths.
175	DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
176	for (auto &BB : F) {
177	for (auto *Succ : successors(BB: &BB)) {
178	if (SuccessorDependencies [&BB].count(key: Succ) &&
179	PredecessorDependencies [Succ].count(key: &BB)) {
180	AdjacencyList [&BB].insert(X: Succ);
181	AdjacencyList [Succ].insert(X: &BB);
182	}
183	}
184	}
185
186	// Given a path with at least one node, return the next node on the path.
187	auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
188	assert(Path.size());
189	auto &Neighbors = AdjacencyList [Path.back()];
190	if (Path.size() == `1`) {
191	// This is the first node on the path, return its neighbor.
192	assert(Neighbors.size() == `1`);
193	return Neighbors.front();
194	} else if (Neighbors.size() == `2`) {
195	// This is the middle of the path, find the neighbor that is not on the
196	// path already.
197	assert(Path.size() >= `2`);
198	return Path.count(key: Neighbors [`0`]) ? Neighbors [`1`] : Neighbors [`0`];
199	}
200	// This is the end of the path.
201	assert(Neighbors.size() == `1`);
202	return nullptr;
203	};
204
205	// Remove all cycles in the inferencing graph.
206	for (auto &BB : F) {
207	if (AdjacencyList [&BB].size() == `1`) {
208	// We found the head of some path.
209	BlockSet Path;
210	Path.insert(X: &BB);
211	while (const BasicBlock *Next = getNextOnPath (Path))
212	Path.insert(X: Next);
213	LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
214
215	// Remove these nodes from the graph so we don't discover this path again.
216	for (auto *BB : Path)
217	AdjacencyList [BB].clear();
218
219	// Finally, remove the cycles.
220	if (PredecessorDependencies [Path.front()].size()) {
221	for (auto *BB : Path)
222	if (BB != Path.back())
223	SuccessorDependencies [BB].clear();
224	} else {
225	for (auto *BB : Path)
226	if (BB != Path.front())
227	PredecessorDependencies [BB].clear();
228	}
229	}
230	}
231	LLVM_DEBUG(dump(dbgs()));
232	}
233
234	void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
235	const BasicBlock &Avoid,
236	bool IsForward,
237	BlockSet &Reachable) const {
238	df_iterator_default_set<const BasicBlock *> Visited;
239	Visited.insert(N: &Avoid);
240	if (IsForward) {
241	auto Range = depth_first_ext(G: &Start, S&: Visited);
242	Reachable.insert(Start: Range.begin(), End: Range.end());
243	} else {
244	auto Range = inverse_depth_first_ext(G: &Start, S&: Visited);
245	Reachable.insert(Start: Range.begin(), End: Range.end());
246	}
247	}
248
249	namespace llvm {
250	class DotFuncBCIInfo {
251	private:
252	const BlockCoverageInference *BCI;
253	const DenseMap<const BasicBlock , bool> Coverage;
254
255	public:
256	DotFuncBCIInfo(const BlockCoverageInference *BCI,
257	const DenseMap<const BasicBlock , bool> Coverage)
258	: BCI(BCI), Coverage(Coverage) {}
259
260	const Function &getFunction() { return BCI->F; }
261
262	bool isInstrumented(const BasicBlock BB) const* {
263	return BCI->shouldInstrumentBlock(BB: *BB);
264	}
265
266	bool isCovered(const BasicBlock BB) const* {
267	return Coverage && Coverage->lookup(Val: BB);
268	}
269
270	bool isDependent(const BasicBlock Src, const* BasicBlock Dest) const* {
271	return BCI->getDependencies(BB: *Src).count(key: Dest);
272	}
273	};
274
275	template <>
276	struct GraphTraits<DotFuncBCIInfo > : public* GraphTraits<const BasicBlock *> {
277	static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
278	return &(Info->getFunction().getEntryBlock());
279	}
280
281	// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
282	using nodes_iterator = pointer_iterator<Function::const_iterator>;
283
284	static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
285	return nodes_iterator (Info->getFunction().begin());
286	}
287
288	static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
289	return nodes_iterator (Info->getFunction().end());
290	}
291
292	static size_t size(DotFuncBCIInfo *Info) {
293	return Info->getFunction().size();
294	}
295	};
296
297	template <>
298	struct DOTGraphTraits<DotFuncBCIInfo > : public* DefaultDOTGraphTraits {
299
300	DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits (IsSimple) {}
301
302	static std::string getGraphName(DotFuncBCIInfo *Info) {
303	return "BCI CFG for " + Info->getFunction().getName().str();
304	}
305
306	std::string getNodeLabel(const BasicBlock Node, DotFuncBCIInfo Info) {
307	return Node->getName().str();
308	}
309
310	std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
311	DotFuncBCIInfo *Info) {
312	const BasicBlock Dest = I;
313	if (Info->isDependent(Src, Dest))
314	return "color=red";
315	if (Info->isDependent(Src: Dest, Dest: Src))
316	return "color=blue";
317	return "";
318	}
319
320	std::string getNodeAttributes(const BasicBlock Node, DotFuncBCIInfo Info) {
321	std::string Result;
322	if (Info->isInstrumented(BB: Node))
323	Result += "style=filled,fillcolor=gray";
324	if (Info->isCovered(BB: Node))
325	Result += std::string (Result.empty() ? "" : ",") + "color=red";
326	return Result;
327	}
328	};
329
330	} // namespace llvm
331
332	void BlockCoverageInference::viewBlockCoverageGraph(
333	const DenseMap<const BasicBlock , bool> Coverage) const {
334	DotFuncBCIInfo Info(this, Coverage);
335	WriteGraph(G: &Info, Name: "BCI", ShortNames: false,
336	Title: "Block Coverage Inference for " + F.getName());
337	}
338
339	void BlockCoverageInference::dump(raw_ostream &OS) const {
340	OS << "Minimal block coverage for function \'" << F.getName()
341	<< "\' (Instrumented=*)\n";
342	for (auto &BB : F) {
343	OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";
344	auto It = PredecessorDependencies.find(Val: &BB);
345	if (It != PredecessorDependencies.end() && It ->second.size())
346	OS << " PredDeps = " << getBlockNames(BBs: It ->second) << "\n";
347	It = SuccessorDependencies.find(Val: &BB);
348	if (It != SuccessorDependencies.end() && It ->second.size())
349	OS << " SuccDeps = " << getBlockNames(BBs: It ->second) << "\n";
350	}
351	OS << " Instrumented Blocks Hash = 0x"
352	<< Twine::utohexstr(Val: getInstrumentedBlocksHash()) << "\n";
353	}
354
355	std::string
356	BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
357	std::string Result;
358	raw_string_ostream OS(Result);
359	OS << "[";
360	if (!BBs.empty()) {
361	OS << BBs.front()->getName();
362	BBs = BBs.drop_front();
363	}
364	for (auto *BB : BBs)
365	OS << ", " << BB->getName();
366	OS << "]";
367	return OS.str();
368	}
369

source code of llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp