1 | //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SampleProfileMatcher used for stale |
10 | // profile matching. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/IPO/SampleProfileMatcher.h" |
15 | #include "llvm/IR/IntrinsicInst.h" |
16 | #include "llvm/IR/MDBuilder.h" |
17 | |
18 | using namespace llvm; |
19 | using namespace sampleprof; |
20 | |
21 | #define DEBUG_TYPE "sample-profile-matcher" |
22 | |
23 | extern cl::opt<bool> SalvageStaleProfile; |
24 | extern cl::opt<bool> PersistProfileStaleness; |
25 | extern cl::opt<bool> ReportProfileStaleness; |
26 | |
27 | void SampleProfileMatcher::findIRAnchors( |
28 | const Function &F, std::map<LineLocation, StringRef> &IRAnchors) { |
29 | // For inlined code, recover the original callsite and callee by finding the |
30 | // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the |
31 | // top-level frame is "main:1", the callsite is "1" and the callee is "foo". |
32 | auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { |
33 | assert((DIL && DIL->getInlinedAt()) && "No inlined callsite" ); |
34 | const DILocation *PrevDIL = nullptr; |
35 | do { |
36 | PrevDIL = DIL; |
37 | DIL = DIL->getInlinedAt(); |
38 | } while (DIL->getInlinedAt()); |
39 | |
40 | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL); |
41 | StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); |
42 | return std::make_pair(x&: Callsite, y&: CalleeName); |
43 | }; |
44 | |
45 | auto GetCanonicalCalleeName = [](const CallBase *CB) { |
46 | StringRef CalleeName = UnknownIndirectCallee; |
47 | if (Function *Callee = CB->getCalledFunction()) |
48 | CalleeName = FunctionSamples::getCanonicalFnName(FnName: Callee->getName()); |
49 | return CalleeName; |
50 | }; |
51 | |
52 | // Extract profile matching anchors in the IR. |
53 | for (auto &BB : F) { |
54 | for (auto &I : BB) { |
55 | DILocation *DIL = I.getDebugLoc(); |
56 | if (!DIL) |
57 | continue; |
58 | |
59 | if (FunctionSamples::ProfileIsProbeBased) { |
60 | if (auto Probe = extractProbe(Inst: I)) { |
61 | // Flatten inlined IR for the matching. |
62 | if (DIL->getInlinedAt()) { |
63 | IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL)); |
64 | } else { |
65 | // Use empty StringRef for basic block probe. |
66 | StringRef CalleeName; |
67 | if (const auto *CB = dyn_cast<CallBase>(Val: &I)) { |
68 | // Skip the probe inst whose callee name is "llvm.pseudoprobe". |
69 | if (!isa<IntrinsicInst>(Val: &I)) |
70 | CalleeName = GetCanonicalCalleeName(CB); |
71 | } |
72 | IRAnchors.emplace(args: LineLocation(Probe->Id, 0), args&: CalleeName); |
73 | } |
74 | } |
75 | } else { |
76 | // TODO: For line-number based profile(AutoFDO), currently only support |
77 | // find callsite anchors. In future, we need to parse all the non-call |
78 | // instructions to extract the line locations for profile matching. |
79 | if (!isa<CallBase>(Val: &I) || isa<IntrinsicInst>(Val: &I)) |
80 | continue; |
81 | |
82 | if (DIL->getInlinedAt()) { |
83 | IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL)); |
84 | } else { |
85 | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL); |
86 | StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(Val: &I)); |
87 | IRAnchors.emplace(args&: Callsite, args&: CalleeName); |
88 | } |
89 | } |
90 | } |
91 | } |
92 | } |
93 | |
94 | void SampleProfileMatcher::findProfileAnchors( |
95 | const FunctionSamples &FS, |
96 | std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) { |
97 | auto isInvalidLineOffset = [](uint32_t LineOffset) { |
98 | return LineOffset & 0x8000; |
99 | }; |
100 | |
101 | for (const auto &I : FS.getBodySamples()) { |
102 | const LineLocation &Loc = I.first; |
103 | if (isInvalidLineOffset(Loc.LineOffset)) |
104 | continue; |
105 | for (const auto &I : I.second.getCallTargets()) { |
106 | auto Ret = |
107 | ProfileAnchors.try_emplace(k: Loc, args: std::unordered_set<FunctionId>()); |
108 | Ret.first->second.insert(x: I.first); |
109 | } |
110 | } |
111 | |
112 | for (const auto &I : FS.getCallsiteSamples()) { |
113 | const LineLocation &Loc = I.first; |
114 | if (isInvalidLineOffset(Loc.LineOffset)) |
115 | continue; |
116 | const auto &CalleeMap = I.second; |
117 | for (const auto &I : CalleeMap) { |
118 | auto Ret = |
119 | ProfileAnchors.try_emplace(k: Loc, args: std::unordered_set<FunctionId>()); |
120 | Ret.first->second.insert(x: I.first); |
121 | } |
122 | } |
123 | } |
124 | |
125 | // Call target name anchor based profile fuzzy matching. |
126 | // Input: |
127 | // For IR locations, the anchor is the callee name of direct callsite; For |
128 | // profile locations, it's the call target name for BodySamples or inlinee's |
129 | // profile name for CallsiteSamples. |
130 | // Matching heuristic: |
131 | // First match all the anchors in lexical order, then split the non-anchor |
132 | // locations between the two anchors evenly, first half are matched based on the |
133 | // start anchor, second half are matched based on the end anchor. |
134 | // For example, given: |
135 | // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] |
136 | // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] |
137 | // The matching gives: |
138 | // [1, 2(foo), 3, 5, 6(bar), 7] |
139 | // | | | | | | |
140 | // [1, 2, 3(foo), 4, 7, 8(bar), 9] |
141 | // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. |
142 | void SampleProfileMatcher::runStaleProfileMatching( |
143 | const Function &F, const std::map<LineLocation, StringRef> &IRAnchors, |
144 | const std::map<LineLocation, std::unordered_set<FunctionId>> |
145 | &ProfileAnchors, |
146 | LocToLocMap &IRToProfileLocationMap) { |
147 | LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() |
148 | << "\n" ); |
149 | assert(IRToProfileLocationMap.empty() && |
150 | "Run stale profile matching only once per function" ); |
151 | |
152 | std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap; |
153 | for (const auto &I : ProfileAnchors) { |
154 | const auto &Loc = I.first; |
155 | const auto &Callees = I.second; |
156 | // Filter out possible indirect calls, use direct callee name as anchor. |
157 | if (Callees.size() == 1) { |
158 | FunctionId CalleeName = *Callees.begin(); |
159 | const auto &Candidates = CalleeToCallsitesMap.try_emplace( |
160 | k: CalleeName, args: std::set<LineLocation>()); |
161 | Candidates.first->second.insert(x: Loc); |
162 | } |
163 | } |
164 | |
165 | auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { |
166 | // Skip the unchanged location mapping to save memory. |
167 | if (From != To) |
168 | IRToProfileLocationMap.insert(x: {From, To}); |
169 | }; |
170 | |
171 | // Use function's beginning location as the initial anchor. |
172 | int32_t LocationDelta = 0; |
173 | SmallVector<LineLocation> LastMatchedNonAnchors; |
174 | |
175 | for (const auto &IR : IRAnchors) { |
176 | const auto &Loc = IR.first; |
177 | auto CalleeName = IR.second; |
178 | bool IsMatchedAnchor = false; |
179 | // Match the anchor location in lexical order. |
180 | if (!CalleeName.empty()) { |
181 | auto CandidateAnchors = |
182 | CalleeToCallsitesMap.find(x: getRepInFormat(Name: CalleeName)); |
183 | if (CandidateAnchors != CalleeToCallsitesMap.end() && |
184 | !CandidateAnchors->second.empty()) { |
185 | auto CI = CandidateAnchors->second.begin(); |
186 | const auto Candidate = *CI; |
187 | CandidateAnchors->second.erase(position: CI); |
188 | InsertMatching(Loc, Candidate); |
189 | LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName |
190 | << " is matched from " << Loc << " to " << Candidate |
191 | << "\n" ); |
192 | LocationDelta = Candidate.LineOffset - Loc.LineOffset; |
193 | |
194 | // Match backwards for non-anchor locations. |
195 | // The locations in LastMatchedNonAnchors have been matched forwards |
196 | // based on the previous anchor, spilt it evenly and overwrite the |
197 | // second half based on the current anchor. |
198 | for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; |
199 | I < LastMatchedNonAnchors.size(); I++) { |
200 | const auto &L = LastMatchedNonAnchors[I]; |
201 | uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; |
202 | LineLocation Candidate(CandidateLineOffset, L.Discriminator); |
203 | InsertMatching(L, Candidate); |
204 | LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L |
205 | << " to " << Candidate << "\n" ); |
206 | } |
207 | |
208 | IsMatchedAnchor = true; |
209 | LastMatchedNonAnchors.clear(); |
210 | } |
211 | } |
212 | |
213 | // Match forwards for non-anchor locations. |
214 | if (!IsMatchedAnchor) { |
215 | uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; |
216 | LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); |
217 | InsertMatching(Loc, Candidate); |
218 | LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " |
219 | << Candidate << "\n" ); |
220 | LastMatchedNonAnchors.emplace_back(Args: Loc); |
221 | } |
222 | } |
223 | } |
224 | |
225 | void SampleProfileMatcher::runOnFunction(Function &F) { |
226 | // We need to use flattened function samples for matching. |
227 | // Unlike IR, which includes all callsites from the source code, the callsites |
228 | // in profile only show up when they are hit by samples, i,e. the profile |
229 | // callsites in one context may differ from those in another context. To get |
230 | // the maximum number of callsites, we merge the function profiles from all |
231 | // contexts, aka, the flattened profile to find profile anchors. |
232 | const auto *FSFlattened = getFlattenedSamplesFor(F); |
233 | if (!FSFlattened) |
234 | return; |
235 | |
236 | // Anchors for IR. It's a map from IR location to callee name, callee name is |
237 | // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) |
238 | // for unknown indrect callee name. |
239 | std::map<LineLocation, StringRef> IRAnchors; |
240 | findIRAnchors(F, IRAnchors); |
241 | // Anchors for profile. It's a map from callsite location to a set of callee |
242 | // name. |
243 | std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors; |
244 | findProfileAnchors(FS: *FSFlattened, ProfileAnchors); |
245 | |
246 | // Compute the callsite match states for profile staleness report. |
247 | if (ReportProfileStaleness || PersistProfileStaleness) |
248 | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap: nullptr); |
249 | |
250 | // For probe-based profiles, run matching only when the current profile is not |
251 | // valid. |
252 | if (SalvageStaleProfile && (!FunctionSamples::ProfileIsProbeBased || |
253 | !ProbeManager->profileIsValid(F, Samples: *FSFlattened))) { |
254 | // For imported functions, the checksum metadata(pseudo_probe_desc) are |
255 | // dropped, so we leverage function attribute(profile-checksum-mismatch) to |
256 | // transfer the info: add the attribute during pre-link phase and check it |
257 | // during post-link phase(see "profileIsValid"). |
258 | if (FunctionSamples::ProfileIsProbeBased && |
259 | LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) |
260 | F.addFnAttr(Kind: "profile-checksum-mismatch" ); |
261 | |
262 | // The matching result will be saved to IRToProfileLocationMap, create a |
263 | // new map for each function. |
264 | auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); |
265 | runStaleProfileMatching(F, IRAnchors, ProfileAnchors, |
266 | IRToProfileLocationMap); |
267 | // Find and update callsite match states after matching. |
268 | if (ReportProfileStaleness || PersistProfileStaleness) |
269 | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, |
270 | IRToProfileLocationMap: &IRToProfileLocationMap); |
271 | } |
272 | } |
273 | |
274 | void SampleProfileMatcher::recordCallsiteMatchStates( |
275 | const Function &F, const std::map<LineLocation, StringRef> &IRAnchors, |
276 | const std::map<LineLocation, std::unordered_set<FunctionId>> |
277 | &ProfileAnchors, |
278 | const LocToLocMap *IRToProfileLocationMap) { |
279 | bool IsPostMatch = IRToProfileLocationMap != nullptr; |
280 | auto &CallsiteMatchStates = |
281 | FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(FnName: F.getName())]; |
282 | |
283 | auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { |
284 | // IRToProfileLocationMap is null in pre-match phrase. |
285 | if (!IRToProfileLocationMap) |
286 | return IRLoc; |
287 | const auto &ProfileLoc = IRToProfileLocationMap->find(x: IRLoc); |
288 | if (ProfileLoc != IRToProfileLocationMap->end()) |
289 | return ProfileLoc->second; |
290 | else |
291 | return IRLoc; |
292 | }; |
293 | |
294 | for (const auto &I : IRAnchors) { |
295 | // After fuzzy profile matching, use the matching result to remap the |
296 | // current IR callsite. |
297 | const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); |
298 | const auto &IRCalleeName = I.second; |
299 | const auto &It = ProfileAnchors.find(x: ProfileLoc); |
300 | if (It == ProfileAnchors.end()) |
301 | continue; |
302 | const auto &Callees = It->second; |
303 | |
304 | bool IsCallsiteMatched = false; |
305 | // Since indirect call does not have CalleeName, check conservatively if |
306 | // callsite in the profile is a callsite location. This is to reduce num of |
307 | // false positive since otherwise all the indirect call samples will be |
308 | // reported as mismatching. |
309 | if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee) |
310 | IsCallsiteMatched = true; |
311 | else if (Callees.size() == 1 && Callees.count(x: getRepInFormat(Name: IRCalleeName))) |
312 | IsCallsiteMatched = true; |
313 | |
314 | if (IsCallsiteMatched) { |
315 | auto It = CallsiteMatchStates.find(x: ProfileLoc); |
316 | if (It == CallsiteMatchStates.end()) |
317 | CallsiteMatchStates.emplace(args: ProfileLoc, args: MatchState::InitialMatch); |
318 | else if (IsPostMatch) { |
319 | if (It->second == MatchState::InitialMatch) |
320 | It->second = MatchState::UnchangedMatch; |
321 | else if (It->second == MatchState::InitialMismatch) |
322 | It->second = MatchState::RecoveredMismatch; |
323 | } |
324 | } |
325 | } |
326 | |
327 | // Check if there are any callsites in the profile that does not match to any |
328 | // IR callsites. |
329 | for (const auto &I : ProfileAnchors) { |
330 | const auto &Loc = I.first; |
331 | [[maybe_unused]] const auto &Callees = I.second; |
332 | assert(!Callees.empty() && "Callees should not be empty" ); |
333 | auto It = CallsiteMatchStates.find(x: Loc); |
334 | if (It == CallsiteMatchStates.end()) |
335 | CallsiteMatchStates.emplace(args: Loc, args: MatchState::InitialMismatch); |
336 | else if (IsPostMatch) { |
337 | // Update the state if it's not matched(UnchangedMatch or |
338 | // RecoveredMismatch). |
339 | if (It->second == MatchState::InitialMismatch) |
340 | It->second = MatchState::UnchangedMismatch; |
341 | else if (It->second == MatchState::InitialMatch) |
342 | It->second = MatchState::RemovedMatch; |
343 | } |
344 | } |
345 | } |
346 | |
347 | void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, |
348 | bool IsTopLevel) { |
349 | const auto *FuncDesc = ProbeManager->getDesc(GUID: FS.getGUID()); |
350 | // Skip the function that is external or renamed. |
351 | if (!FuncDesc) |
352 | return; |
353 | |
354 | if (ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: FS)) { |
355 | if (IsTopLevel) |
356 | NumStaleProfileFunc++; |
357 | // Given currently all probe ids are after block probe ids, once the |
358 | // checksum is mismatched, it's likely all the callites are mismatched and |
359 | // dropped. We conservatively count all the samples as mismatched and stop |
360 | // counting the inlinees' profiles. |
361 | MismatchedFunctionSamples += FS.getTotalSamples(); |
362 | return; |
363 | } |
364 | |
365 | // Even the current-level function checksum is matched, it's possible that the |
366 | // nested inlinees' checksums are mismatched that affect the inlinee's sample |
367 | // loading, we need to go deeper to check the inlinees' function samples. |
368 | // Similarly, count all the samples as mismatched if the inlinee's checksum is |
369 | // mismatched using this recursive function. |
370 | for (const auto &I : FS.getCallsiteSamples()) |
371 | for (const auto &CS : I.second) |
372 | countMismatchedFuncSamples(FS: CS.second, IsTopLevel: false); |
373 | } |
374 | |
375 | void SampleProfileMatcher::countMismatchedCallsiteSamples( |
376 | const FunctionSamples &FS) { |
377 | auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName()); |
378 | // Skip it if no mismatched callsite or this is an external function. |
379 | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) |
380 | return; |
381 | const auto &CallsiteMatchStates = It->second; |
382 | |
383 | auto findMatchState = [&](const LineLocation &Loc) { |
384 | auto It = CallsiteMatchStates.find(x: Loc); |
385 | if (It == CallsiteMatchStates.end()) |
386 | return MatchState::Unknown; |
387 | return It->second; |
388 | }; |
389 | |
390 | auto AttributeMismatchedSamples = [&](const enum MatchState &State, |
391 | uint64_t Samples) { |
392 | if (isMismatchState(State)) |
393 | MismatchedCallsiteSamples += Samples; |
394 | else if (State == MatchState::RecoveredMismatch) |
395 | RecoveredCallsiteSamples += Samples; |
396 | }; |
397 | |
398 | // The non-inlined callsites are saved in the body samples of function |
399 | // profile, go through it to count the non-inlined callsite samples. |
400 | for (const auto &I : FS.getBodySamples()) |
401 | AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); |
402 | |
403 | // Count the inlined callsite samples. |
404 | for (const auto &I : FS.getCallsiteSamples()) { |
405 | auto State = findMatchState(I.first); |
406 | uint64_t CallsiteSamples = 0; |
407 | for (const auto &CS : I.second) |
408 | CallsiteSamples += CS.second.getTotalSamples(); |
409 | AttributeMismatchedSamples(State, CallsiteSamples); |
410 | |
411 | if (isMismatchState(State)) |
412 | continue; |
413 | |
414 | // When the current level of inlined call site matches the profiled call |
415 | // site, we need to go deeper along the inline tree to count mismatches from |
416 | // lower level inlinees. |
417 | for (const auto &CS : I.second) |
418 | countMismatchedCallsiteSamples(FS: CS.second); |
419 | } |
420 | } |
421 | |
422 | void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { |
423 | auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName()); |
424 | // Skip it if no mismatched callsite or this is an external function. |
425 | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) |
426 | return; |
427 | const auto &MatchStates = It->second; |
428 | [[maybe_unused]] bool OnInitialState = |
429 | isInitialState(State: MatchStates.begin()->second); |
430 | for (const auto &I : MatchStates) { |
431 | TotalProfiledCallsites++; |
432 | assert( |
433 | (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && |
434 | "Profile matching state is inconsistent" ); |
435 | |
436 | if (isMismatchState(State: I.second)) |
437 | NumMismatchedCallsites++; |
438 | else if (I.second == MatchState::RecoveredMismatch) |
439 | NumRecoveredCallsites++; |
440 | } |
441 | } |
442 | |
443 | void SampleProfileMatcher::computeAndReportProfileStaleness() { |
444 | if (!ReportProfileStaleness && !PersistProfileStaleness) |
445 | return; |
446 | |
447 | // Count profile mismatches for profile staleness report. |
448 | for (const auto &F : M) { |
449 | if (skipProfileForFunction(F)) |
450 | continue; |
451 | // As the stats will be merged by linker, skip reporting the metrics for |
452 | // imported functions to avoid repeated counting. |
453 | if (GlobalValue::isAvailableExternallyLinkage(Linkage: F.getLinkage())) |
454 | continue; |
455 | const auto *FS = Reader.getSamplesFor(F); |
456 | if (!FS) |
457 | continue; |
458 | TotalProfiledFunc++; |
459 | TotalFunctionSamples += FS->getTotalSamples(); |
460 | |
461 | // Checksum mismatch is only used in pseudo-probe mode. |
462 | if (FunctionSamples::ProfileIsProbeBased) |
463 | countMismatchedFuncSamples(FS: *FS, IsTopLevel: true); |
464 | |
465 | // Count mismatches and samples for calliste. |
466 | countMismatchCallsites(FS: *FS); |
467 | countMismatchedCallsiteSamples(FS: *FS); |
468 | } |
469 | |
470 | if (ReportProfileStaleness) { |
471 | if (FunctionSamples::ProfileIsProbeBased) { |
472 | errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc |
473 | << ") of functions' profile are invalid and (" |
474 | << MismatchedFunctionSamples << "/" << TotalFunctionSamples |
475 | << ") of samples are discarded due to function hash mismatch.\n" ; |
476 | } |
477 | errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" |
478 | << TotalProfiledCallsites |
479 | << ") of callsites' profile are invalid and (" |
480 | << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" |
481 | << TotalFunctionSamples |
482 | << ") of samples are discarded due to callsite location mismatch.\n" ; |
483 | errs() << "(" << NumRecoveredCallsites << "/" |
484 | << (NumRecoveredCallsites + NumMismatchedCallsites) |
485 | << ") of callsites and (" << RecoveredCallsiteSamples << "/" |
486 | << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) |
487 | << ") of samples are recovered by stale profile matching.\n" ; |
488 | } |
489 | |
490 | if (PersistProfileStaleness) { |
491 | LLVMContext &Ctx = M.getContext(); |
492 | MDBuilder MDB(Ctx); |
493 | |
494 | SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; |
495 | if (FunctionSamples::ProfileIsProbeBased) { |
496 | ProfStatsVec.emplace_back(Args: "NumStaleProfileFunc" , Args&: NumStaleProfileFunc); |
497 | ProfStatsVec.emplace_back(Args: "TotalProfiledFunc" , Args&: TotalProfiledFunc); |
498 | ProfStatsVec.emplace_back(Args: "MismatchedFunctionSamples" , |
499 | Args&: MismatchedFunctionSamples); |
500 | ProfStatsVec.emplace_back(Args: "TotalFunctionSamples" , Args&: TotalFunctionSamples); |
501 | } |
502 | |
503 | ProfStatsVec.emplace_back(Args: "NumMismatchedCallsites" , Args&: NumMismatchedCallsites); |
504 | ProfStatsVec.emplace_back(Args: "NumRecoveredCallsites" , Args&: NumRecoveredCallsites); |
505 | ProfStatsVec.emplace_back(Args: "TotalProfiledCallsites" , Args&: TotalProfiledCallsites); |
506 | ProfStatsVec.emplace_back(Args: "MismatchedCallsiteSamples" , |
507 | Args&: MismatchedCallsiteSamples); |
508 | ProfStatsVec.emplace_back(Args: "RecoveredCallsiteSamples" , |
509 | Args&: RecoveredCallsiteSamples); |
510 | |
511 | auto *MD = MDB.createLLVMStats(LLVMStatsVec: ProfStatsVec); |
512 | auto *NMD = M.getOrInsertNamedMetadata(Name: "llvm.stats" ); |
513 | NMD->addOperand(M: MD); |
514 | } |
515 | } |
516 | |
517 | void SampleProfileMatcher::runOnModule() { |
518 | ProfileConverter::flattenProfile(InputProfiles: Reader.getProfiles(), OutputProfiles&: FlattenedProfiles, |
519 | ProfileIsCS: FunctionSamples::ProfileIsCS); |
520 | for (auto &F : M) { |
521 | if (skipProfileForFunction(F)) |
522 | continue; |
523 | runOnFunction(F); |
524 | } |
525 | if (SalvageStaleProfile) |
526 | distributeIRToProfileLocationMap(); |
527 | |
528 | computeAndReportProfileStaleness(); |
529 | } |
530 | |
531 | void SampleProfileMatcher::distributeIRToProfileLocationMap( |
532 | FunctionSamples &FS) { |
533 | const auto ProfileMappings = FuncMappings.find(Key: FS.getFuncName()); |
534 | if (ProfileMappings != FuncMappings.end()) { |
535 | FS.setIRToProfileLocationMap(&(ProfileMappings->second)); |
536 | } |
537 | |
538 | for (auto &Callees : |
539 | const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { |
540 | for (auto &FS : Callees.second) { |
541 | distributeIRToProfileLocationMap(FS&: FS.second); |
542 | } |
543 | } |
544 | } |
545 | |
546 | // Use a central place to distribute the matching results. Outlined and inlined |
547 | // profile with the function name will be set to the same pointer. |
548 | void SampleProfileMatcher::distributeIRToProfileLocationMap() { |
549 | for (auto &I : Reader.getProfiles()) { |
550 | distributeIRToProfileLocationMap(FS&: I.second); |
551 | } |
552 | } |
553 | |