1//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Instrumentation-based profile-guided optimization
10//
11//===----------------------------------------------------------------------===//
12
13#include "CodeGenPGO.h"
14#include "CodeGenFunction.h"
15#include "CoverageMappingGen.h"
16#include "clang/AST/RecursiveASTVisitor.h"
17#include "clang/AST/StmtVisitor.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/MDBuilder.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/FileSystem.h"
23#include "llvm/Support/MD5.h"
24#include <optional>
25
26static llvm::cl::opt<bool>
27 EnableValueProfiling("enable-value-profiling",
28 llvm::cl::desc("Enable value profiling"),
29 llvm::cl::Hidden, llvm::cl::init(Val: false));
30
31extern llvm::cl::opt<bool> SystemHeadersCoverage;
32
33using namespace clang;
34using namespace CodeGen;
35
36void CodeGenPGO::setFuncName(StringRef Name,
37 llvm::GlobalValue::LinkageTypes Linkage) {
38 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
39 FuncName = llvm::getPGOFuncName(
40 RawFuncName: Name, Linkage, FileName: CGM.getCodeGenOpts().MainFileName,
41 Version: PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
42
43 // If we're generating a profile, create a variable for the name.
44 if (CGM.getCodeGenOpts().hasProfileClangInstr())
45 FuncNameVar = llvm::createPGOFuncNameVar(M&: CGM.getModule(), Linkage, PGOFuncName: FuncName);
46}
47
48void CodeGenPGO::setFuncName(llvm::Function *Fn) {
49 setFuncName(Name: Fn->getName(), Linkage: Fn->getLinkage());
50 // Create PGOFuncName meta data.
51 llvm::createPGOFuncNameMetadata(F&: *Fn, PGOFuncName: FuncName);
52}
53
54/// The version of the PGO hash algorithm.
55enum PGOHashVersion : unsigned {
56 PGO_HASH_V1,
57 PGO_HASH_V2,
58 PGO_HASH_V3,
59
60 // Keep this set to the latest hash version.
61 PGO_HASH_LATEST = PGO_HASH_V3
62};
63
64namespace {
65/// Stable hasher for PGO region counters.
66///
67/// PGOHash produces a stable hash of a given function's control flow.
68///
69/// Changing the output of this hash will invalidate all previously generated
70/// profiles -- i.e., don't do it.
71///
72/// \note When this hash does eventually change (years?), we still need to
73/// support old hashes. We'll need to pull in the version number from the
74/// profile data format and use the matching hash function.
75class PGOHash {
76 uint64_t Working;
77 unsigned Count;
78 PGOHashVersion HashVersion;
79 llvm::MD5 MD5;
80
81 static const int NumBitsPerType = 6;
82 static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
83 static const unsigned TooBig = 1u << NumBitsPerType;
84
85public:
86 /// Hash values for AST nodes.
87 ///
88 /// Distinct values for AST nodes that have region counters attached.
89 ///
90 /// These values must be stable. All new members must be added at the end,
91 /// and no members should be removed. Changing the enumeration value for an
92 /// AST node will affect the hash of every function that contains that node.
93 enum HashType : unsigned char {
94 None = 0,
95 LabelStmt = 1,
96 WhileStmt,
97 DoStmt,
98 ForStmt,
99 CXXForRangeStmt,
100 ObjCForCollectionStmt,
101 SwitchStmt,
102 CaseStmt,
103 DefaultStmt,
104 IfStmt,
105 CXXTryStmt,
106 CXXCatchStmt,
107 ConditionalOperator,
108 BinaryOperatorLAnd,
109 BinaryOperatorLOr,
110 BinaryConditionalOperator,
111 // The preceding values are available with PGO_HASH_V1.
112
113 EndOfScope,
114 IfThenBranch,
115 IfElseBranch,
116 GotoStmt,
117 IndirectGotoStmt,
118 BreakStmt,
119 ContinueStmt,
120 ReturnStmt,
121 ThrowExpr,
122 UnaryOperatorLNot,
123 BinaryOperatorLT,
124 BinaryOperatorGT,
125 BinaryOperatorLE,
126 BinaryOperatorGE,
127 BinaryOperatorEQ,
128 BinaryOperatorNE,
129 // The preceding values are available since PGO_HASH_V2.
130
131 // Keep this last. It's for the static assert that follows.
132 LastHashType
133 };
134 static_assert(LastHashType <= TooBig, "Too many types in HashType");
135
136 PGOHash(PGOHashVersion HashVersion)
137 : Working(0), Count(0), HashVersion(HashVersion) {}
138 void combine(HashType Type);
139 uint64_t finalize();
140 PGOHashVersion getHashVersion() const { return HashVersion; }
141};
142const int PGOHash::NumBitsPerType;
143const unsigned PGOHash::NumTypesPerWord;
144const unsigned PGOHash::TooBig;
145
146/// Get the PGO hash version used in the given indexed profile.
147static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
148 CodeGenModule &CGM) {
149 if (PGOReader->getVersion() <= 4)
150 return PGO_HASH_V1;
151 if (PGOReader->getVersion() <= 5)
152 return PGO_HASH_V2;
153 return PGO_HASH_V3;
154}
155
156/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
157struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
158 using Base = RecursiveASTVisitor<MapRegionCounters>;
159
160 /// The next counter value to assign.
161 unsigned NextCounter;
162 /// The function hash.
163 PGOHash Hash;
164 /// The map of statements to counters.
165 llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
166 /// The next bitmap byte index to assign.
167 unsigned NextMCDCBitmapIdx;
168 MCDC::State &MCDCState;
169 /// Maximum number of supported MC/DC conditions in a boolean expression.
170 unsigned MCDCMaxCond;
171 /// The profile version.
172 uint64_t ProfileVersion;
173 /// Diagnostics Engine used to report warnings.
174 DiagnosticsEngine &Diag;
175
176 MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion,
177 llvm::DenseMap<const Stmt *, unsigned> &CounterMap,
178 MCDC::State &MCDCState, unsigned MCDCMaxCond,
179 DiagnosticsEngine &Diag)
180 : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap),
181 NextMCDCBitmapIdx(0), MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond),
182 ProfileVersion(ProfileVersion), Diag(Diag) {}
183
184 // Blocks and lambdas are handled as separate functions, so we need not
185 // traverse them in the parent context.
186 bool TraverseBlockExpr(BlockExpr *BE) { return true; }
187 bool TraverseLambdaExpr(LambdaExpr *LE) {
188 // Traverse the captures, but not the body.
189 for (auto C : zip(t: LE->captures(), u: LE->capture_inits()))
190 TraverseLambdaCapture(LE, C: &std::get<0>(t&: C), Init: std::get<1>(t&: C));
191 return true;
192 }
193 bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
194
195 bool VisitDecl(const Decl *D) {
196 switch (D->getKind()) {
197 default:
198 break;
199 case Decl::Function:
200 case Decl::CXXMethod:
201 case Decl::CXXConstructor:
202 case Decl::CXXDestructor:
203 case Decl::CXXConversion:
204 case Decl::ObjCMethod:
205 case Decl::Block:
206 case Decl::Captured:
207 CounterMap[D->getBody()] = NextCounter++;
208 break;
209 }
210 return true;
211 }
212
213 /// If \p S gets a fresh counter, update the counter mappings. Return the
214 /// V1 hash of \p S.
215 PGOHash::HashType updateCounterMappings(Stmt *S) {
216 auto Type = getHashType(HashVersion: PGO_HASH_V1, S);
217 if (Type != PGOHash::None)
218 CounterMap[S] = NextCounter++;
219 return Type;
220 }
221
222 /// The following stacks are used with dataTraverseStmtPre() and
223 /// dataTraverseStmtPost() to track the depth of nested logical operators in a
224 /// boolean expression in a function. The ultimate purpose is to keep track
225 /// of the number of leaf-level conditions in the boolean expression so that a
226 /// profile bitmap can be allocated based on that number.
227 ///
228 /// The stacks are also used to find error cases and notify the user. A
229 /// standard logical operator nest for a boolean expression could be in a form
230 /// similar to this: "x = a && b && c && (d || f)"
231 unsigned NumCond = 0;
232 bool SplitNestedLogicalOp = false;
233 SmallVector<const Stmt *, 16> NonLogOpStack;
234 SmallVector<const BinaryOperator *, 16> LogOpStack;
235
236 // Hook: dataTraverseStmtPre() is invoked prior to visiting an AST Stmt node.
237 bool dataTraverseStmtPre(Stmt *S) {
238 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing.
239 if (MCDCMaxCond == 0)
240 return true;
241
242 /// At the top of the logical operator nest, reset the number of conditions.
243 if (LogOpStack.empty())
244 NumCond = 0;
245
246 if (const Expr *E = dyn_cast<Expr>(Val: S)) {
247 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: E->IgnoreParens());
248 if (BinOp && BinOp->isLogicalOp()) {
249 /// Check for "split-nested" logical operators. This happens when a new
250 /// boolean expression logical-op nest is encountered within an existing
251 /// boolean expression, separated by a non-logical operator. For
252 /// example, in "x = (a && b && c && foo(d && f))", the "d && f" case
253 /// starts a new boolean expression that is separated from the other
254 /// conditions by the operator foo(). Split-nested cases are not
255 /// supported by MC/DC.
256 SplitNestedLogicalOp = SplitNestedLogicalOp || !NonLogOpStack.empty();
257
258 LogOpStack.push_back(Elt: BinOp);
259 return true;
260 }
261 }
262
263 /// Keep track of non-logical operators. These are OK as long as we don't
264 /// encounter a new logical operator after seeing one.
265 if (!LogOpStack.empty())
266 NonLogOpStack.push_back(Elt: S);
267
268 return true;
269 }
270
271 // Hook: dataTraverseStmtPost() is invoked by the AST visitor after visiting
272 // an AST Stmt node. MC/DC will use it to to signal when the top of a
273 // logical operation (boolean expression) nest is encountered.
274 bool dataTraverseStmtPost(Stmt *S) {
275 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing.
276 if (MCDCMaxCond == 0)
277 return true;
278
279 if (const Expr *E = dyn_cast<Expr>(Val: S)) {
280 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: E->IgnoreParens());
281 if (BinOp && BinOp->isLogicalOp()) {
282 assert(LogOpStack.back() == BinOp);
283 LogOpStack.pop_back();
284
285 /// At the top of logical operator nest:
286 if (LogOpStack.empty()) {
287 /// Was the "split-nested" logical operator case encountered?
288 if (SplitNestedLogicalOp) {
289 unsigned DiagID = Diag.getCustomDiagID(
290 L: DiagnosticsEngine::Warning,
291 FormatString: "unsupported MC/DC boolean expression; "
292 "contains an operation with a nested boolean expression. "
293 "Expression will not be covered");
294 Diag.Report(Loc: S->getBeginLoc(), DiagID);
295 return false;
296 }
297
298 /// Was the maximum number of conditions encountered?
299 if (NumCond > MCDCMaxCond) {
300 unsigned DiagID = Diag.getCustomDiagID(
301 L: DiagnosticsEngine::Warning,
302 FormatString: "unsupported MC/DC boolean expression; "
303 "number of conditions (%0) exceeds max (%1). "
304 "Expression will not be covered");
305 Diag.Report(Loc: S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond;
306 return false;
307 }
308
309 // Otherwise, allocate the number of bytes required for the bitmap
310 // based on the number of conditions. Must be at least 1-byte long.
311 MCDCState.BitmapMap[BinOp] = NextMCDCBitmapIdx;
312 unsigned SizeInBits = std::max<unsigned>(a: 1L << NumCond, CHAR_BIT);
313 NextMCDCBitmapIdx += SizeInBits / CHAR_BIT;
314 }
315 return true;
316 }
317 }
318
319 if (!LogOpStack.empty())
320 NonLogOpStack.pop_back();
321
322 return true;
323 }
324
325 /// The RHS of all logical operators gets a fresh counter in order to count
326 /// how many times the RHS evaluates to true or false, depending on the
327 /// semantics of the operator. This is only valid for ">= v7" of the profile
328 /// version so that we facilitate backward compatibility. In addition, in
329 /// order to use MC/DC, count the number of total LHS and RHS conditions.
330 bool VisitBinaryOperator(BinaryOperator *S) {
331 if (S->isLogicalOp()) {
332 if (CodeGenFunction::isInstrumentedCondition(C: S->getLHS()))
333 NumCond++;
334
335 if (CodeGenFunction::isInstrumentedCondition(C: S->getRHS())) {
336 if (ProfileVersion >= llvm::IndexedInstrProf::Version7)
337 CounterMap[S->getRHS()] = NextCounter++;
338
339 NumCond++;
340 }
341 }
342 return Base::VisitBinaryOperator(S);
343 }
344
345 /// Include \p S in the function hash.
346 bool VisitStmt(Stmt *S) {
347 auto Type = updateCounterMappings(S);
348 if (Hash.getHashVersion() != PGO_HASH_V1)
349 Type = getHashType(HashVersion: Hash.getHashVersion(), S);
350 if (Type != PGOHash::None)
351 Hash.combine(Type);
352 return true;
353 }
354
355 bool TraverseIfStmt(IfStmt *If) {
356 // If we used the V1 hash, use the default traversal.
357 if (Hash.getHashVersion() == PGO_HASH_V1)
358 return Base::TraverseIfStmt(If);
359
360 // Otherwise, keep track of which branch we're in while traversing.
361 VisitStmt(If);
362 for (Stmt *CS : If->children()) {
363 if (!CS)
364 continue;
365 if (CS == If->getThen())
366 Hash.combine(Type: PGOHash::IfThenBranch);
367 else if (CS == If->getElse())
368 Hash.combine(Type: PGOHash::IfElseBranch);
369 TraverseStmt(S: CS);
370 }
371 Hash.combine(Type: PGOHash::EndOfScope);
372 return true;
373 }
374
375// If the statement type \p N is nestable, and its nesting impacts profile
376// stability, define a custom traversal which tracks the end of the statement
377// in the hash (provided we're not using the V1 hash).
378#define DEFINE_NESTABLE_TRAVERSAL(N) \
379 bool Traverse##N(N *S) { \
380 Base::Traverse##N(S); \
381 if (Hash.getHashVersion() != PGO_HASH_V1) \
382 Hash.combine(PGOHash::EndOfScope); \
383 return true; \
384 }
385
386 DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
387 DEFINE_NESTABLE_TRAVERSAL(DoStmt)
388 DEFINE_NESTABLE_TRAVERSAL(ForStmt)
389 DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
390 DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
391 DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
392 DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
393
394 /// Get version \p HashVersion of the PGO hash for \p S.
395 PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
396 switch (S->getStmtClass()) {
397 default:
398 break;
399 case Stmt::LabelStmtClass:
400 return PGOHash::LabelStmt;
401 case Stmt::WhileStmtClass:
402 return PGOHash::WhileStmt;
403 case Stmt::DoStmtClass:
404 return PGOHash::DoStmt;
405 case Stmt::ForStmtClass:
406 return PGOHash::ForStmt;
407 case Stmt::CXXForRangeStmtClass:
408 return PGOHash::CXXForRangeStmt;
409 case Stmt::ObjCForCollectionStmtClass:
410 return PGOHash::ObjCForCollectionStmt;
411 case Stmt::SwitchStmtClass:
412 return PGOHash::SwitchStmt;
413 case Stmt::CaseStmtClass:
414 return PGOHash::CaseStmt;
415 case Stmt::DefaultStmtClass:
416 return PGOHash::DefaultStmt;
417 case Stmt::IfStmtClass:
418 return PGOHash::IfStmt;
419 case Stmt::CXXTryStmtClass:
420 return PGOHash::CXXTryStmt;
421 case Stmt::CXXCatchStmtClass:
422 return PGOHash::CXXCatchStmt;
423 case Stmt::ConditionalOperatorClass:
424 return PGOHash::ConditionalOperator;
425 case Stmt::BinaryConditionalOperatorClass:
426 return PGOHash::BinaryConditionalOperator;
427 case Stmt::BinaryOperatorClass: {
428 const BinaryOperator *BO = cast<BinaryOperator>(Val: S);
429 if (BO->getOpcode() == BO_LAnd)
430 return PGOHash::BinaryOperatorLAnd;
431 if (BO->getOpcode() == BO_LOr)
432 return PGOHash::BinaryOperatorLOr;
433 if (HashVersion >= PGO_HASH_V2) {
434 switch (BO->getOpcode()) {
435 default:
436 break;
437 case BO_LT:
438 return PGOHash::BinaryOperatorLT;
439 case BO_GT:
440 return PGOHash::BinaryOperatorGT;
441 case BO_LE:
442 return PGOHash::BinaryOperatorLE;
443 case BO_GE:
444 return PGOHash::BinaryOperatorGE;
445 case BO_EQ:
446 return PGOHash::BinaryOperatorEQ;
447 case BO_NE:
448 return PGOHash::BinaryOperatorNE;
449 }
450 }
451 break;
452 }
453 }
454
455 if (HashVersion >= PGO_HASH_V2) {
456 switch (S->getStmtClass()) {
457 default:
458 break;
459 case Stmt::GotoStmtClass:
460 return PGOHash::GotoStmt;
461 case Stmt::IndirectGotoStmtClass:
462 return PGOHash::IndirectGotoStmt;
463 case Stmt::BreakStmtClass:
464 return PGOHash::BreakStmt;
465 case Stmt::ContinueStmtClass:
466 return PGOHash::ContinueStmt;
467 case Stmt::ReturnStmtClass:
468 return PGOHash::ReturnStmt;
469 case Stmt::CXXThrowExprClass:
470 return PGOHash::ThrowExpr;
471 case Stmt::UnaryOperatorClass: {
472 const UnaryOperator *UO = cast<UnaryOperator>(Val: S);
473 if (UO->getOpcode() == UO_LNot)
474 return PGOHash::UnaryOperatorLNot;
475 break;
476 }
477 }
478 }
479
480 return PGOHash::None;
481 }
482};
483
484/// A StmtVisitor that propagates the raw counts through the AST and
485/// records the count at statements where the value may change.
486struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
487 /// PGO state.
488 CodeGenPGO &PGO;
489
490 /// A flag that is set when the current count should be recorded on the
491 /// next statement, such as at the exit of a loop.
492 bool RecordNextStmtCount;
493
494 /// The count at the current location in the traversal.
495 uint64_t CurrentCount;
496
497 /// The map of statements to count values.
498 llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
499
500 /// BreakContinueStack - Keep counts of breaks and continues inside loops.
501 struct BreakContinue {
502 uint64_t BreakCount = 0;
503 uint64_t ContinueCount = 0;
504 BreakContinue() = default;
505 };
506 SmallVector<BreakContinue, 8> BreakContinueStack;
507
508 ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
509 CodeGenPGO &PGO)
510 : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
511
512 void RecordStmtCount(const Stmt *S) {
513 if (RecordNextStmtCount) {
514 CountMap[S] = CurrentCount;
515 RecordNextStmtCount = false;
516 }
517 }
518
519 /// Set and return the current count.
520 uint64_t setCount(uint64_t Count) {
521 CurrentCount = Count;
522 return Count;
523 }
524
525 void VisitStmt(const Stmt *S) {
526 RecordStmtCount(S);
527 for (const Stmt *Child : S->children())
528 if (Child)
529 this->Visit(Child);
530 }
531
532 void VisitFunctionDecl(const FunctionDecl *D) {
533 // Counter tracks entry to the function body.
534 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
535 CountMap[D->getBody()] = BodyCount;
536 Visit(D->getBody());
537 }
538
539 // Skip lambda expressions. We visit these as FunctionDecls when we're
540 // generating them and aren't interested in the body when generating a
541 // parent context.
542 void VisitLambdaExpr(const LambdaExpr *LE) {}
543
544 void VisitCapturedDecl(const CapturedDecl *D) {
545 // Counter tracks entry to the capture body.
546 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
547 CountMap[D->getBody()] = BodyCount;
548 Visit(D->getBody());
549 }
550
551 void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
552 // Counter tracks entry to the method body.
553 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
554 CountMap[D->getBody()] = BodyCount;
555 Visit(D->getBody());
556 }
557
558 void VisitBlockDecl(const BlockDecl *D) {
559 // Counter tracks entry to the block body.
560 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
561 CountMap[D->getBody()] = BodyCount;
562 Visit(D->getBody());
563 }
564
565 void VisitReturnStmt(const ReturnStmt *S) {
566 RecordStmtCount(S);
567 if (S->getRetValue())
568 Visit(S->getRetValue());
569 CurrentCount = 0;
570 RecordNextStmtCount = true;
571 }
572
573 void VisitCXXThrowExpr(const CXXThrowExpr *E) {
574 RecordStmtCount(E);
575 if (E->getSubExpr())
576 Visit(E->getSubExpr());
577 CurrentCount = 0;
578 RecordNextStmtCount = true;
579 }
580
581 void VisitGotoStmt(const GotoStmt *S) {
582 RecordStmtCount(S);
583 CurrentCount = 0;
584 RecordNextStmtCount = true;
585 }
586
587 void VisitLabelStmt(const LabelStmt *S) {
588 RecordNextStmtCount = false;
589 // Counter tracks the block following the label.
590 uint64_t BlockCount = setCount(PGO.getRegionCount(S));
591 CountMap[S] = BlockCount;
592 Visit(S->getSubStmt());
593 }
594
595 void VisitBreakStmt(const BreakStmt *S) {
596 RecordStmtCount(S);
597 assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
598 BreakContinueStack.back().BreakCount += CurrentCount;
599 CurrentCount = 0;
600 RecordNextStmtCount = true;
601 }
602
603 void VisitContinueStmt(const ContinueStmt *S) {
604 RecordStmtCount(S);
605 assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
606 BreakContinueStack.back().ContinueCount += CurrentCount;
607 CurrentCount = 0;
608 RecordNextStmtCount = true;
609 }
610
611 void VisitWhileStmt(const WhileStmt *S) {
612 RecordStmtCount(S);
613 uint64_t ParentCount = CurrentCount;
614
615 BreakContinueStack.push_back(Elt: BreakContinue());
616 // Visit the body region first so the break/continue adjustments can be
617 // included when visiting the condition.
618 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
619 CountMap[S->getBody()] = CurrentCount;
620 Visit(S->getBody());
621 uint64_t BackedgeCount = CurrentCount;
622
623 // ...then go back and propagate counts through the condition. The count
624 // at the start of the condition is the sum of the incoming edges,
625 // the backedge from the end of the loop body, and the edges from
626 // continue statements.
627 BreakContinue BC = BreakContinueStack.pop_back_val();
628 uint64_t CondCount =
629 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
630 CountMap[S->getCond()] = CondCount;
631 Visit(S->getCond());
632 setCount(BC.BreakCount + CondCount - BodyCount);
633 RecordNextStmtCount = true;
634 }
635
636 void VisitDoStmt(const DoStmt *S) {
637 RecordStmtCount(S);
638 uint64_t LoopCount = PGO.getRegionCount(S);
639
640 BreakContinueStack.push_back(Elt: BreakContinue());
641 // The count doesn't include the fallthrough from the parent scope. Add it.
642 uint64_t BodyCount = setCount(LoopCount + CurrentCount);
643 CountMap[S->getBody()] = BodyCount;
644 Visit(S->getBody());
645 uint64_t BackedgeCount = CurrentCount;
646
647 BreakContinue BC = BreakContinueStack.pop_back_val();
648 // The count at the start of the condition is equal to the count at the
649 // end of the body, plus any continues.
650 uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
651 CountMap[S->getCond()] = CondCount;
652 Visit(S->getCond());
653 setCount(BC.BreakCount + CondCount - LoopCount);
654 RecordNextStmtCount = true;
655 }
656
657 void VisitForStmt(const ForStmt *S) {
658 RecordStmtCount(S);
659 if (S->getInit())
660 Visit(S->getInit());
661
662 uint64_t ParentCount = CurrentCount;
663
664 BreakContinueStack.push_back(Elt: BreakContinue());
665 // Visit the body region first. (This is basically the same as a while
666 // loop; see further comments in VisitWhileStmt.)
667 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
668 CountMap[S->getBody()] = BodyCount;
669 Visit(S->getBody());
670 uint64_t BackedgeCount = CurrentCount;
671 BreakContinue BC = BreakContinueStack.pop_back_val();
672
673 // The increment is essentially part of the body but it needs to include
674 // the count for all the continue statements.
675 if (S->getInc()) {
676 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
677 CountMap[S->getInc()] = IncCount;
678 Visit(S->getInc());
679 }
680
681 // ...then go back and propagate counts through the condition.
682 uint64_t CondCount =
683 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
684 if (S->getCond()) {
685 CountMap[S->getCond()] = CondCount;
686 Visit(S->getCond());
687 }
688 setCount(BC.BreakCount + CondCount - BodyCount);
689 RecordNextStmtCount = true;
690 }
691
692 void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
693 RecordStmtCount(S);
694 if (S->getInit())
695 Visit(S->getInit());
696 Visit(S->getLoopVarStmt());
697 Visit(S->getRangeStmt());
698 Visit(S->getBeginStmt());
699 Visit(S->getEndStmt());
700
701 uint64_t ParentCount = CurrentCount;
702 BreakContinueStack.push_back(Elt: BreakContinue());
703 // Visit the body region first. (This is basically the same as a while
704 // loop; see further comments in VisitWhileStmt.)
705 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
706 CountMap[S->getBody()] = BodyCount;
707 Visit(S->getBody());
708 uint64_t BackedgeCount = CurrentCount;
709 BreakContinue BC = BreakContinueStack.pop_back_val();
710
711 // The increment is essentially part of the body but it needs to include
712 // the count for all the continue statements.
713 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
714 CountMap[S->getInc()] = IncCount;
715 Visit(S->getInc());
716
717 // ...then go back and propagate counts through the condition.
718 uint64_t CondCount =
719 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
720 CountMap[S->getCond()] = CondCount;
721 Visit(S->getCond());
722 setCount(BC.BreakCount + CondCount - BodyCount);
723 RecordNextStmtCount = true;
724 }
725
726 void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
727 RecordStmtCount(S);
728 Visit(S->getElement());
729 uint64_t ParentCount = CurrentCount;
730 BreakContinueStack.push_back(Elt: BreakContinue());
731 // Counter tracks the body of the loop.
732 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
733 CountMap[S->getBody()] = BodyCount;
734 Visit(S->getBody());
735 uint64_t BackedgeCount = CurrentCount;
736 BreakContinue BC = BreakContinueStack.pop_back_val();
737
738 setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
739 BodyCount);
740 RecordNextStmtCount = true;
741 }
742
743 void VisitSwitchStmt(const SwitchStmt *S) {
744 RecordStmtCount(S);
745 if (S->getInit())
746 Visit(S->getInit());
747 Visit(S->getCond());
748 CurrentCount = 0;
749 BreakContinueStack.push_back(Elt: BreakContinue());
750 Visit(S->getBody());
751 // If the switch is inside a loop, add the continue counts.
752 BreakContinue BC = BreakContinueStack.pop_back_val();
753 if (!BreakContinueStack.empty())
754 BreakContinueStack.back().ContinueCount += BC.ContinueCount;
755 // Counter tracks the exit block of the switch.
756 setCount(PGO.getRegionCount(S));
757 RecordNextStmtCount = true;
758 }
759
760 void VisitSwitchCase(const SwitchCase *S) {
761 RecordNextStmtCount = false;
762 // Counter for this particular case. This counts only jumps from the
763 // switch header and does not include fallthrough from the case before
764 // this one.
765 uint64_t CaseCount = PGO.getRegionCount(S);
766 setCount(CurrentCount + CaseCount);
767 // We need the count without fallthrough in the mapping, so it's more useful
768 // for branch probabilities.
769 CountMap[S] = CaseCount;
770 RecordNextStmtCount = true;
771 Visit(S->getSubStmt());
772 }
773
774 void VisitIfStmt(const IfStmt *S) {
775 RecordStmtCount(S);
776
777 if (S->isConsteval()) {
778 const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse();
779 if (Stm)
780 Visit(Stm);
781 return;
782 }
783
784 uint64_t ParentCount = CurrentCount;
785 if (S->getInit())
786 Visit(S->getInit());
787 Visit(S->getCond());
788
789 // Counter tracks the "then" part of an if statement. The count for
790 // the "else" part, if it exists, will be calculated from this counter.
791 uint64_t ThenCount = setCount(PGO.getRegionCount(S));
792 CountMap[S->getThen()] = ThenCount;
793 Visit(S->getThen());
794 uint64_t OutCount = CurrentCount;
795
796 uint64_t ElseCount = ParentCount - ThenCount;
797 if (S->getElse()) {
798 setCount(ElseCount);
799 CountMap[S->getElse()] = ElseCount;
800 Visit(S->getElse());
801 OutCount += CurrentCount;
802 } else
803 OutCount += ElseCount;
804 setCount(OutCount);
805 RecordNextStmtCount = true;
806 }
807
808 void VisitCXXTryStmt(const CXXTryStmt *S) {
809 RecordStmtCount(S);
810 Visit(S->getTryBlock());
811 for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
812 Visit(S->getHandler(i: I));
813 // Counter tracks the continuation block of the try statement.
814 setCount(PGO.getRegionCount(S));
815 RecordNextStmtCount = true;
816 }
817
818 void VisitCXXCatchStmt(const CXXCatchStmt *S) {
819 RecordNextStmtCount = false;
820 // Counter tracks the catch statement's handler block.
821 uint64_t CatchCount = setCount(PGO.getRegionCount(S));
822 CountMap[S] = CatchCount;
823 Visit(S->getHandlerBlock());
824 }
825
826 void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
827 RecordStmtCount(E);
828 uint64_t ParentCount = CurrentCount;
829 Visit(E->getCond());
830
831 // Counter tracks the "true" part of a conditional operator. The
832 // count in the "false" part will be calculated from this counter.
833 uint64_t TrueCount = setCount(PGO.getRegionCount(E));
834 CountMap[E->getTrueExpr()] = TrueCount;
835 Visit(E->getTrueExpr());
836 uint64_t OutCount = CurrentCount;
837
838 uint64_t FalseCount = setCount(ParentCount - TrueCount);
839 CountMap[E->getFalseExpr()] = FalseCount;
840 Visit(E->getFalseExpr());
841 OutCount += CurrentCount;
842
843 setCount(OutCount);
844 RecordNextStmtCount = true;
845 }
846
847 void VisitBinLAnd(const BinaryOperator *E) {
848 RecordStmtCount(E);
849 uint64_t ParentCount = CurrentCount;
850 Visit(E->getLHS());
851 // Counter tracks the right hand side of a logical and operator.
852 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
853 CountMap[E->getRHS()] = RHSCount;
854 Visit(E->getRHS());
855 setCount(ParentCount + RHSCount - CurrentCount);
856 RecordNextStmtCount = true;
857 }
858
859 void VisitBinLOr(const BinaryOperator *E) {
860 RecordStmtCount(E);
861 uint64_t ParentCount = CurrentCount;
862 Visit(E->getLHS());
863 // Counter tracks the right hand side of a logical or operator.
864 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
865 CountMap[E->getRHS()] = RHSCount;
866 Visit(E->getRHS());
867 setCount(ParentCount + RHSCount - CurrentCount);
868 RecordNextStmtCount = true;
869 }
870};
871} // end anonymous namespace
872
873void PGOHash::combine(HashType Type) {
874 // Check that we never combine 0 and only have six bits.
875 assert(Type && "Hash is invalid: unexpected type 0");
876 assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
877
878 // Pass through MD5 if enough work has built up.
879 if (Count && Count % NumTypesPerWord == 0) {
880 using namespace llvm::support;
881 uint64_t Swapped =
882 endian::byte_swap<uint64_t, llvm::endianness::little>(value: Working);
883 MD5.update(Data: llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
884 Working = 0;
885 }
886
887 // Accumulate the current type.
888 ++Count;
889 Working = Working << NumBitsPerType | Type;
890}
891
892uint64_t PGOHash::finalize() {
893 // Use Working as the hash directly if we never used MD5.
894 if (Count <= NumTypesPerWord)
895 // No need to byte swap here, since none of the math was endian-dependent.
896 // This number will be byte-swapped as required on endianness transitions,
897 // so we will see the same value on the other side.
898 return Working;
899
900 // Check for remaining work in Working.
901 if (Working) {
902 // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
903 // is buggy because it converts a uint64_t into an array of uint8_t.
904 if (HashVersion < PGO_HASH_V3) {
905 MD5.update(Data: {(uint8_t)Working});
906 } else {
907 using namespace llvm::support;
908 uint64_t Swapped =
909 endian::byte_swap<uint64_t, llvm::endianness::little>(value: Working);
910 MD5.update(Data: llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
911 }
912 }
913
914 // Finalize the MD5 and return the hash.
915 llvm::MD5::MD5Result Result;
916 MD5.final(Result);
917 return Result.low();
918}
919
920void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
921 const Decl *D = GD.getDecl();
922 if (!D->hasBody())
923 return;
924
925 // Skip CUDA/HIP kernel launch stub functions.
926 if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice &&
927 D->hasAttr<CUDAGlobalAttr>())
928 return;
929
930 bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
931 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
932 if (!InstrumentRegions && !PGOReader)
933 return;
934 if (D->isImplicit())
935 return;
936 // Constructors and destructors may be represented by several functions in IR.
937 // If so, instrument only base variant, others are implemented by delegation
938 // to the base one, it would be counted twice otherwise.
939 if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
940 if (const auto *CCD = dyn_cast<CXXConstructorDecl>(Val: D))
941 if (GD.getCtorType() != Ctor_Base &&
942 CodeGenFunction::IsConstructorDelegationValid(Ctor: CCD))
943 return;
944 }
945 if (isa<CXXDestructorDecl>(Val: D) && GD.getDtorType() != Dtor_Base)
946 return;
947
948 CGM.ClearUnusedCoverageMapping(D);
949 if (Fn->hasFnAttribute(llvm::Attribute::NoProfile))
950 return;
951 if (Fn->hasFnAttribute(llvm::Attribute::SkipProfile))
952 return;
953
954 setFuncName(Fn);
955
956 mapRegionCounters(D);
957 if (CGM.getCodeGenOpts().CoverageMapping)
958 emitCounterRegionMapping(D);
959 if (PGOReader) {
960 SourceManager &SM = CGM.getContext().getSourceManager();
961 loadRegionCounts(PGOReader, IsInMainFile: SM.isInMainFile(Loc: D->getLocation()));
962 computeRegionCounts(D);
963 applyFunctionAttributes(PGOReader, Fn);
964 }
965}
966
967void CodeGenPGO::mapRegionCounters(const Decl *D) {
968 // Use the latest hash version when inserting instrumentation, but use the
969 // version in the indexed profile if we're reading PGO data.
970 PGOHashVersion HashVersion = PGO_HASH_LATEST;
971 uint64_t ProfileVersion = llvm::IndexedInstrProf::Version;
972 if (auto *PGOReader = CGM.getPGOReader()) {
973 HashVersion = getPGOHashVersion(PGOReader, CGM);
974 ProfileVersion = PGOReader->getVersion();
975 }
976
977 // If MC/DC is enabled, set the MaxConditions to a preset value. Otherwise,
978 // set it to zero. This value impacts the number of conditions accepted in a
979 // given boolean expression, which impacts the size of the bitmap used to
980 // track test vector execution for that boolean expression. Because the
981 // bitmap scales exponentially (2^n) based on the number of conditions seen,
982 // the maximum value is hard-coded at 6 conditions, which is more than enough
983 // for most embedded applications. Setting a maximum value prevents the
984 // bitmap footprint from growing too large without the user's knowledge. In
985 // the future, this value could be adjusted with a command-line option.
986 unsigned MCDCMaxConditions = (CGM.getCodeGenOpts().MCDCCoverage) ? 6 : 0;
987
988 RegionCounterMap.reset(p: new llvm::DenseMap<const Stmt *, unsigned>);
989 RegionMCDCState.reset(p: new MCDC::State);
990 MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap,
991 *RegionMCDCState, MCDCMaxConditions, CGM.getDiags());
992 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D))
993 Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
994 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(Val: D))
995 Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
996 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(Val: D))
997 Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
998 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(Val: D))
999 Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
1000 assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
1001 NumRegionCounters = Walker.NextCounter;
1002 RegionMCDCState->BitmapBytes = Walker.NextMCDCBitmapIdx;
1003 FunctionHash = Walker.Hash.finalize();
1004}
1005
1006bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
1007 if (!D->getBody())
1008 return true;
1009
1010 // Skip host-only functions in the CUDA device compilation and device-only
1011 // functions in the host compilation. Just roughly filter them out based on
1012 // the function attributes. If there are effectively host-only or device-only
1013 // ones, their coverage mapping may still be generated.
1014 if (CGM.getLangOpts().CUDA &&
1015 ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() &&
1016 !D->hasAttr<CUDAGlobalAttr>()) ||
1017 (!CGM.getLangOpts().CUDAIsDevice &&
1018 (D->hasAttr<CUDAGlobalAttr>() ||
1019 (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>())))))
1020 return true;
1021
1022 // Don't map the functions in system headers.
1023 const auto &SM = CGM.getContext().getSourceManager();
1024 auto Loc = D->getBody()->getBeginLoc();
1025 return !SystemHeadersCoverage && SM.isInSystemHeader(Loc);
1026}
1027
1028void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
1029 if (skipRegionMappingForDecl(D))
1030 return;
1031
1032 std::string CoverageMapping;
1033 llvm::raw_string_ostream OS(CoverageMapping);
1034 RegionMCDCState->CondIDMap.clear();
1035 CoverageMappingGen MappingGen(
1036 *CGM.getCoverageMapping(), CGM.getContext().getSourceManager(),
1037 CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCState.get());
1038 MappingGen.emitCounterMapping(D, OS);
1039 OS.flush();
1040
1041 if (CoverageMapping.empty())
1042 return;
1043
1044 CGM.getCoverageMapping()->addFunctionMappingRecord(
1045 FunctionName: FuncNameVar, FunctionNameValue: FuncName, FunctionHash, CoverageMapping);
1046}
1047
1048void
1049CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
1050 llvm::GlobalValue::LinkageTypes Linkage) {
1051 if (skipRegionMappingForDecl(D))
1052 return;
1053
1054 std::string CoverageMapping;
1055 llvm::raw_string_ostream OS(CoverageMapping);
1056 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
1057 CGM.getContext().getSourceManager(),
1058 CGM.getLangOpts());
1059 MappingGen.emitEmptyMapping(D, OS);
1060 OS.flush();
1061
1062 if (CoverageMapping.empty())
1063 return;
1064
1065 setFuncName(Name, Linkage);
1066 CGM.getCoverageMapping()->addFunctionMappingRecord(
1067 FunctionName: FuncNameVar, FunctionNameValue: FuncName, FunctionHash, CoverageMapping, IsUsed: false);
1068}
1069
1070void CodeGenPGO::computeRegionCounts(const Decl *D) {
1071 StmtCountMap.reset(p: new llvm::DenseMap<const Stmt *, uint64_t>);
1072 ComputeRegionCounts Walker(*StmtCountMap, *this);
1073 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D))
1074 Walker.VisitFunctionDecl(D: FD);
1075 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(Val: D))
1076 Walker.VisitObjCMethodDecl(D: MD);
1077 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(Val: D))
1078 Walker.VisitBlockDecl(D: BD);
1079 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(Val: D))
1080 Walker.VisitCapturedDecl(D: const_cast<CapturedDecl *>(CD));
1081}
1082
1083void
1084CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
1085 llvm::Function *Fn) {
1086 if (!haveRegionCounts())
1087 return;
1088
1089 uint64_t FunctionCount = getRegionCount(S: nullptr);
1090 Fn->setEntryCount(Count: FunctionCount);
1091}
1092
1093void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
1094 llvm::Value *StepV) {
1095 if (!RegionCounterMap || !Builder.GetInsertBlock())
1096 return;
1097
1098 unsigned Counter = (*RegionCounterMap)[S];
1099
1100 llvm::Value *Args[] = {FuncNameVar,
1101 Builder.getInt64(C: FunctionHash),
1102 Builder.getInt32(C: NumRegionCounters),
1103 Builder.getInt32(C: Counter), StepV};
1104 if (!StepV)
1105 Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
1106 ArrayRef(Args, 4));
1107 else
1108 Builder.CreateCall(
1109 CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
1110 ArrayRef(Args));
1111}
1112
1113bool CodeGenPGO::canEmitMCDCCoverage(const CGBuilderTy &Builder) {
1114 return (CGM.getCodeGenOpts().hasProfileClangInstr() &&
1115 CGM.getCodeGenOpts().MCDCCoverage && Builder.GetInsertBlock());
1116}
1117
1118void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) {
1119 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1120 return;
1121
1122 auto *I8PtrTy = llvm::PointerType::getUnqual(C&: CGM.getLLVMContext());
1123
1124 // Emit intrinsic representing MCDC bitmap parameters at function entry.
1125 // This is used by the instrumentation pass, but it isn't actually lowered to
1126 // anything.
1127 llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(C: FuncNameVar, Ty: I8PtrTy),
1128 Builder.getInt64(C: FunctionHash),
1129 Builder.getInt32(C: RegionMCDCState->BitmapBytes)};
1130 Builder.CreateCall(
1131 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_parameters), Args);
1132}
1133
1134void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder,
1135 const Expr *S,
1136 Address MCDCCondBitmapAddr) {
1137 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1138 return;
1139
1140 S = S->IgnoreParens();
1141
1142 auto ExprMCDCBitmapMapIterator = RegionMCDCState->BitmapMap.find(S);
1143 if (ExprMCDCBitmapMapIterator == RegionMCDCState->BitmapMap.end())
1144 return;
1145
1146 // Extract the ID of the global bitmap associated with this expression.
1147 unsigned MCDCTestVectorBitmapID = ExprMCDCBitmapMapIterator->second;
1148 auto *I8PtrTy = llvm::PointerType::getUnqual(C&: CGM.getLLVMContext());
1149
1150 // Emit intrinsic responsible for updating the global bitmap corresponding to
1151 // a boolean expression. The index being set is based on the value loaded
1152 // from a pointer to a dedicated temporary value on the stack that is itself
1153 // updated via emitMCDCCondBitmapReset() and emitMCDCCondBitmapUpdate(). The
1154 // index represents an executed test vector.
1155 llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(C: FuncNameVar, Ty: I8PtrTy),
1156 Builder.getInt64(C: FunctionHash),
1157 Builder.getInt32(C: RegionMCDCState->BitmapBytes),
1158 Builder.getInt32(C: MCDCTestVectorBitmapID),
1159 MCDCCondBitmapAddr.getPointer()};
1160 Builder.CreateCall(
1161 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_tvbitmap_update), Args);
1162}
1163
1164void CodeGenPGO::emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S,
1165 Address MCDCCondBitmapAddr) {
1166 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1167 return;
1168
1169 S = S->IgnoreParens();
1170
1171 if (!RegionMCDCState->BitmapMap.contains(S))
1172 return;
1173
1174 // Emit intrinsic that resets a dedicated temporary value on the stack to 0.
1175 Builder.CreateStore(Val: Builder.getInt32(C: 0), Addr: MCDCCondBitmapAddr);
1176}
1177
1178void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S,
1179 Address MCDCCondBitmapAddr,
1180 llvm::Value *Val) {
1181 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1182 return;
1183
1184 // Even though, for simplicity, parentheses and unary logical-NOT operators
1185 // are considered part of their underlying condition for both MC/DC and
1186 // branch coverage, the condition IDs themselves are assigned and tracked
1187 // using the underlying condition itself. This is done solely for
1188 // consistency since parentheses and logical-NOTs are ignored when checking
1189 // whether the condition is actually an instrumentable condition. This can
1190 // also make debugging a bit easier.
1191 S = CodeGenFunction::stripCond(C: S);
1192
1193 auto ExprMCDCConditionIDMapIterator = RegionMCDCState->CondIDMap.find(S);
1194 if (ExprMCDCConditionIDMapIterator == RegionMCDCState->CondIDMap.end())
1195 return;
1196
1197 // Extract the ID of the condition we are setting in the bitmap.
1198 unsigned CondID = ExprMCDCConditionIDMapIterator->second;
1199 assert(CondID > 0 && "Condition has no ID!");
1200
1201 auto *I8PtrTy = llvm::PointerType::getUnqual(C&: CGM.getLLVMContext());
1202
1203 // Emit intrinsic that updates a dedicated temporary value on the stack after
1204 // a condition is evaluated. After the set of conditions has been updated,
1205 // the resulting value is used to update the boolean expression's bitmap.
1206 llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(C: FuncNameVar, Ty: I8PtrTy),
1207 Builder.getInt64(C: FunctionHash),
1208 Builder.getInt32(C: CondID - 1),
1209 MCDCCondBitmapAddr.getPointer(), Val};
1210 Builder.CreateCall(
1211 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_condbitmap_update),
1212 Args);
1213}
1214
1215void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) {
1216 if (CGM.getCodeGenOpts().hasProfileClangInstr())
1217 M.addModuleFlag(Behavior: llvm::Module::Warning, Key: "EnableValueProfiling",
1218 Val: uint32_t(EnableValueProfiling));
1219}
1220
1221// This method either inserts a call to the profile run-time during
1222// instrumentation or puts profile data into metadata for PGO use.
1223void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
1224 llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
1225
1226 if (!EnableValueProfiling)
1227 return;
1228
1229 if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
1230 return;
1231
1232 if (isa<llvm::Constant>(Val: ValuePtr))
1233 return;
1234
1235 bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
1236 if (InstrumentValueSites && RegionCounterMap) {
1237 auto BuilderInsertPoint = Builder.saveIP();
1238 Builder.SetInsertPoint(ValueSite);
1239 llvm::Value *Args[5] = {
1240 FuncNameVar,
1241 Builder.getInt64(C: FunctionHash),
1242 Builder.CreatePtrToInt(V: ValuePtr, DestTy: Builder.getInt64Ty()),
1243 Builder.getInt32(C: ValueKind),
1244 Builder.getInt32(C: NumValueSites[ValueKind]++)
1245 };
1246 Builder.CreateCall(
1247 CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
1248 Builder.restoreIP(IP: BuilderInsertPoint);
1249 return;
1250 }
1251
1252 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
1253 if (PGOReader && haveRegionCounts()) {
1254 // We record the top most called three functions at each call site.
1255 // Profile metadata contains "VP" string identifying this metadata
1256 // as value profiling data, then a uint32_t value for the value profiling
1257 // kind, a uint64_t value for the total number of times the call is
1258 // executed, followed by the function hash and execution count (uint64_t)
1259 // pairs for each function.
1260 if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
1261 return;
1262
1263 llvm::annotateValueSite(M&: CGM.getModule(), Inst&: *ValueSite, InstrProfR: *ProfRecord,
1264 ValueKind: (llvm::InstrProfValueKind)ValueKind,
1265 SiteIndx: NumValueSites[ValueKind]);
1266
1267 NumValueSites[ValueKind]++;
1268 }
1269}
1270
1271void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
1272 bool IsInMainFile) {
1273 CGM.getPGOStats().addVisited(MainFile: IsInMainFile);
1274 RegionCounts.clear();
1275 llvm::Expected<llvm::InstrProfRecord> RecordExpected =
1276 PGOReader->getInstrProfRecord(FuncName, FuncHash: FunctionHash);
1277 if (auto E = RecordExpected.takeError()) {
1278 auto IPE = std::get<0>(in: llvm::InstrProfError::take(E: std::move(E)));
1279 if (IPE == llvm::instrprof_error::unknown_function)
1280 CGM.getPGOStats().addMissing(MainFile: IsInMainFile);
1281 else if (IPE == llvm::instrprof_error::hash_mismatch)
1282 CGM.getPGOStats().addMismatched(MainFile: IsInMainFile);
1283 else if (IPE == llvm::instrprof_error::malformed)
1284 // TODO: Consider a more specific warning for this case.
1285 CGM.getPGOStats().addMismatched(MainFile: IsInMainFile);
1286 return;
1287 }
1288 ProfRecord =
1289 std::make_unique<llvm::InstrProfRecord>(args: std::move(RecordExpected.get()));
1290 RegionCounts = ProfRecord->Counts;
1291}
1292
1293/// Calculate what to divide by to scale weights.
1294///
1295/// Given the maximum weight, calculate a divisor that will scale all the
1296/// weights to strictly less than UINT32_MAX.
1297static uint64_t calculateWeightScale(uint64_t MaxWeight) {
1298 return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
1299}
1300
1301/// Scale an individual branch weight (and add 1).
1302///
1303/// Scale a 64-bit weight down to 32-bits using \c Scale.
1304///
1305/// According to Laplace's Rule of Succession, it is better to compute the
1306/// weight based on the count plus 1, so universally add 1 to the value.
1307///
1308/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1309/// greater than \c Weight.
1310static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1311 assert(Scale && "scale by 0?");
1312 uint64_t Scaled = Weight / Scale + 1;
1313 assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1314 return Scaled;
1315}
1316
1317llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1318 uint64_t FalseCount) const {
1319 // Check for empty weights.
1320 if (!TrueCount && !FalseCount)
1321 return nullptr;
1322
1323 // Calculate how to scale down to 32-bits.
1324 uint64_t Scale = calculateWeightScale(MaxWeight: std::max(a: TrueCount, b: FalseCount));
1325
1326 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1327 return MDHelper.createBranchWeights(TrueWeight: scaleBranchWeight(Weight: TrueCount, Scale),
1328 FalseWeight: scaleBranchWeight(Weight: FalseCount, Scale));
1329}
1330
1331llvm::MDNode *
1332CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const {
1333 // We need at least two elements to create meaningful weights.
1334 if (Weights.size() < 2)
1335 return nullptr;
1336
1337 // Check for empty weights.
1338 uint64_t MaxWeight = *std::max_element(first: Weights.begin(), last: Weights.end());
1339 if (MaxWeight == 0)
1340 return nullptr;
1341
1342 // Calculate how to scale down to 32-bits.
1343 uint64_t Scale = calculateWeightScale(MaxWeight);
1344
1345 SmallVector<uint32_t, 16> ScaledWeights;
1346 ScaledWeights.reserve(N: Weights.size());
1347 for (uint64_t W : Weights)
1348 ScaledWeights.push_back(Elt: scaleBranchWeight(Weight: W, Scale));
1349
1350 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1351 return MDHelper.createBranchWeights(Weights: ScaledWeights);
1352}
1353
1354llvm::MDNode *
1355CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1356 uint64_t LoopCount) const {
1357 if (!PGO.haveRegionCounts())
1358 return nullptr;
1359 std::optional<uint64_t> CondCount = PGO.getStmtCount(S: Cond);
1360 if (!CondCount || *CondCount == 0)
1361 return nullptr;
1362 return createProfileWeights(TrueCount: LoopCount,
1363 FalseCount: std::max(a: *CondCount, b: LoopCount) - LoopCount);
1364}
1365

source code of clang/lib/CodeGen/CodeGenPGO.cpp