1 | //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "PerfReader.h" |
9 | #include "ProfileGenerator.h" |
10 | #include "llvm/ADT/SmallString.h" |
11 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
12 | #include "llvm/Support/FileSystem.h" |
13 | #include "llvm/Support/Process.h" |
14 | #include "llvm/Support/ToolOutputFile.h" |
15 | |
16 | #define DEBUG_TYPE "perf-reader" |
17 | |
18 | cl::opt<bool> SkipSymbolization("skip-symbolization" , |
19 | cl::desc("Dump the unsymbolized profile to the " |
20 | "output file. It will show unwinder " |
21 | "output for CS profile generation." )); |
22 | |
23 | static cl::opt<bool> ShowMmapEvents("show-mmap-events" , |
24 | cl::desc("Print binary load events." )); |
25 | |
26 | static cl::opt<bool> |
27 | UseOffset("use-offset" , cl::init(Val: true), |
28 | cl::desc("Work with `--skip-symbolization` or " |
29 | "`--unsymbolized-profile` to write/read the " |
30 | "offset instead of virtual address." )); |
31 | |
32 | static cl::opt<bool> UseLoadableSegmentAsBase( |
33 | "use-first-loadable-segment-as-base" , |
34 | cl::desc("Use first loadable segment address as base address " |
35 | "for offsets in unsymbolized profile. By default " |
36 | "first executable segment address is used" )); |
37 | |
38 | static cl::opt<bool> |
39 | IgnoreStackSamples("ignore-stack-samples" , |
40 | cl::desc("Ignore call stack samples for hybrid samples " |
41 | "and produce context-insensitive profile." )); |
42 | cl::opt<bool> ShowDetailedWarning("show-detailed-warning" , |
43 | cl::desc("Show detailed warning message." )); |
44 | |
45 | extern cl::opt<std::string> PerfTraceFilename; |
46 | extern cl::opt<bool> ShowDisassemblyOnly; |
47 | extern cl::opt<bool> ShowSourceLocations; |
48 | extern cl::opt<std::string> OutputFilename; |
49 | |
50 | namespace llvm { |
51 | namespace sampleprof { |
52 | |
53 | void VirtualUnwinder::unwindCall(UnwindState &State) { |
54 | uint64_t Source = State.getCurrentLBRSource(); |
55 | auto *ParentFrame = State.getParentFrame(); |
56 | // The 2nd frame after leaf could be missing if stack sample is |
57 | // taken when IP is within prolog/epilog, as frame chain isn't |
58 | // setup yet. Fill in the missing frame in that case. |
59 | // TODO: Currently we just assume all the addr that can't match the |
60 | // 2nd frame is in prolog/epilog. In the future, we will switch to |
61 | // pro/epi tracker(Dwarf CFI) for the precise check. |
62 | if (ParentFrame == State.getDummyRootPtr() || |
63 | ParentFrame->Address != Source) { |
64 | State.switchToFrame(Address: Source); |
65 | if (ParentFrame != State.getDummyRootPtr()) { |
66 | if (Source == ExternalAddr) |
67 | NumMismatchedExtCallBranch++; |
68 | else |
69 | NumMismatchedProEpiBranch++; |
70 | } |
71 | } else { |
72 | State.popFrame(); |
73 | } |
74 | State.InstPtr.update(Addr: Source); |
75 | } |
76 | |
77 | void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { |
78 | InstructionPointer &IP = State.InstPtr; |
79 | uint64_t Target = State.getCurrentLBRTarget(); |
80 | uint64_t End = IP.Address; |
81 | |
82 | if (End == ExternalAddr && Target == ExternalAddr) { |
83 | // Filter out the case when leaf external frame matches the external LBR |
84 | // target, this is a valid state, it happens that the code run into external |
85 | // address then return back. The call frame under the external frame |
86 | // remains valid and can be unwound later, just skip recording this range. |
87 | NumPairedExtAddr++; |
88 | return; |
89 | } |
90 | |
91 | if (End == ExternalAddr || Target == ExternalAddr) { |
92 | // Range is invalid if only one point is external address. This means LBR |
93 | // traces contains a standalone external address failing to pair another |
94 | // one, likely due to interrupt jmp or broken perf script. Set the |
95 | // state to invalid. |
96 | NumUnpairedExtAddr++; |
97 | State.setInvalid(); |
98 | return; |
99 | } |
100 | |
101 | if (!isValidFallThroughRange(Start: Target, End, Binary)) { |
102 | // Skip unwinding the rest of LBR trace when a bogus range is seen. |
103 | State.setInvalid(); |
104 | return; |
105 | } |
106 | |
107 | if (Binary->usePseudoProbes()) { |
108 | // We don't need to top frame probe since it should be extracted |
109 | // from the range. |
110 | // The outcome of the virtual unwinding with pseudo probes is a |
111 | // map from a context key to the address range being unwound. |
112 | // This means basically linear unwinding is not needed for pseudo |
113 | // probes. The range will be simply recorded here and will be |
114 | // converted to a list of pseudo probes to report in ProfileGenerator. |
115 | State.getParentFrame()->recordRangeCount(Start: Target, End, Count: Repeat); |
116 | } else { |
117 | // Unwind linear execution part. |
118 | // Split and record the range by different inline context. For example: |
119 | // [0x01] ... main:1 # Target |
120 | // [0x02] ... main:2 |
121 | // [0x03] ... main:3 @ foo:1 |
122 | // [0x04] ... main:3 @ foo:2 |
123 | // [0x05] ... main:3 @ foo:3 |
124 | // [0x06] ... main:4 |
125 | // [0x07] ... main:5 # End |
126 | // It will be recorded: |
127 | // [main:*] : [0x06, 0x07], [0x01, 0x02] |
128 | // [main:3 @ foo:*] : [0x03, 0x05] |
129 | while (IP.Address > Target) { |
130 | uint64_t PrevIP = IP.Address; |
131 | IP.backward(); |
132 | // Break into segments for implicit call/return due to inlining |
133 | bool SameInlinee = Binary->inlineContextEqual(Add1: PrevIP, Add2: IP.Address); |
134 | if (!SameInlinee) { |
135 | State.switchToFrame(Address: PrevIP); |
136 | State.CurrentLeafFrame->recordRangeCount(Start: PrevIP, End, Count: Repeat); |
137 | End = IP.Address; |
138 | } |
139 | } |
140 | assert(IP.Address == Target && "The last one must be the target address." ); |
141 | // Record the remaining range, [0x01, 0x02] in the example |
142 | State.switchToFrame(Address: IP.Address); |
143 | State.CurrentLeafFrame->recordRangeCount(Start: IP.Address, End, Count: Repeat); |
144 | } |
145 | } |
146 | |
147 | void VirtualUnwinder::unwindReturn(UnwindState &State) { |
148 | // Add extra frame as we unwind through the return |
149 | const LBREntry &LBR = State.getCurrentLBR(); |
150 | uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr: LBR.Target); |
151 | State.switchToFrame(Address: CallAddr); |
152 | State.pushFrame(Address: LBR.Source); |
153 | State.InstPtr.update(Addr: LBR.Source); |
154 | } |
155 | |
156 | void VirtualUnwinder::unwindBranch(UnwindState &State) { |
157 | // TODO: Tolerate tail call for now, as we may see tail call from libraries. |
158 | // This is only for intra function branches, excluding tail calls. |
159 | uint64_t Source = State.getCurrentLBRSource(); |
160 | State.switchToFrame(Address: Source); |
161 | State.InstPtr.update(Addr: Source); |
162 | } |
163 | |
164 | std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() { |
165 | std::shared_ptr<StringBasedCtxKey> KeyStr = |
166 | std::make_shared<StringBasedCtxKey>(); |
167 | KeyStr->Context = Binary->getExpandedContext(Stack, WasLeafInlined&: KeyStr->WasLeafInlined); |
168 | return KeyStr; |
169 | } |
170 | |
171 | std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() { |
172 | std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>(); |
173 | KeyStr->Context = Stack; |
174 | CSProfileGenerator::compressRecursionContext<uint64_t>(Context&: KeyStr->Context); |
175 | CSProfileGenerator::trimContext<uint64_t>(S&: KeyStr->Context); |
176 | return KeyStr; |
177 | } |
178 | |
179 | template <typename T> |
180 | void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, |
181 | T &Stack) { |
182 | if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty()) |
183 | return; |
184 | |
185 | std::shared_ptr<ContextKey> Key = Stack.getContextKey(); |
186 | if (Key == nullptr) |
187 | return; |
188 | auto Ret = CtxCounterMap->emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
189 | SampleCounter &SCounter = Ret.first->second; |
190 | for (auto &I : Cur->RangeSamples) |
191 | SCounter.recordRangeCount(Start: std::get<0>(t&: I), End: std::get<1>(t&: I), Repeat: std::get<2>(t&: I)); |
192 | |
193 | for (auto &I : Cur->BranchSamples) |
194 | SCounter.recordBranchCount(Source: std::get<0>(t&: I), Target: std::get<1>(t&: I), Repeat: std::get<2>(t&: I)); |
195 | } |
196 | |
197 | template <typename T> |
198 | void VirtualUnwinder::collectSamplesFromFrameTrie( |
199 | UnwindState::ProfiledFrame *Cur, T &Stack) { |
200 | if (!Cur->isDummyRoot()) { |
201 | // Truncate the context for external frame since this isn't a real call |
202 | // context the compiler will see. |
203 | if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) { |
204 | // Process truncated context |
205 | // Start a new traversal ignoring its bottom context |
206 | T EmptyStack(Binary); |
207 | collectSamplesFromFrame(Cur, EmptyStack); |
208 | for (const auto &Item : Cur->Children) { |
209 | collectSamplesFromFrameTrie(Item.second.get(), EmptyStack); |
210 | } |
211 | |
212 | // Keep note of untracked call site and deduplicate them |
213 | // for warning later. |
214 | if (!Cur->isLeafFrame()) |
215 | UntrackedCallsites.insert(x: Cur->Address); |
216 | |
217 | return; |
218 | } |
219 | } |
220 | |
221 | collectSamplesFromFrame(Cur, Stack); |
222 | // Process children frame |
223 | for (const auto &Item : Cur->Children) { |
224 | collectSamplesFromFrameTrie(Item.second.get(), Stack); |
225 | } |
226 | // Recover the call stack |
227 | Stack.popFrame(); |
228 | } |
229 | |
230 | void VirtualUnwinder::collectSamplesFromFrameTrie( |
231 | UnwindState::ProfiledFrame *Cur) { |
232 | if (Binary->usePseudoProbes()) { |
233 | AddressStack Stack(Binary); |
234 | collectSamplesFromFrameTrie<AddressStack>(Cur, Stack); |
235 | } else { |
236 | FrameStack Stack(Binary); |
237 | collectSamplesFromFrameTrie<FrameStack>(Cur, Stack); |
238 | } |
239 | } |
240 | |
241 | void VirtualUnwinder::recordBranchCount(const LBREntry &Branch, |
242 | UnwindState &State, uint64_t Repeat) { |
243 | if (Branch.Target == ExternalAddr) |
244 | return; |
245 | |
246 | // Record external-to-internal pattern on the trie root, it later can be |
247 | // used for generating head samples. |
248 | if (Branch.Source == ExternalAddr) { |
249 | State.getDummyRootPtr()->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
250 | Count: Repeat); |
251 | return; |
252 | } |
253 | |
254 | if (Binary->usePseudoProbes()) { |
255 | // Same as recordRangeCount, We don't need to top frame probe since we will |
256 | // extract it from branch's source address |
257 | State.getParentFrame()->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
258 | Count: Repeat); |
259 | } else { |
260 | State.CurrentLeafFrame->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
261 | Count: Repeat); |
262 | } |
263 | } |
264 | |
265 | bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) { |
266 | // Capture initial state as starting point for unwinding. |
267 | UnwindState State(Sample, Binary); |
268 | |
269 | // Sanity check - making sure leaf of LBR aligns with leaf of stack sample |
270 | // Stack sample sometimes can be unreliable, so filter out bogus ones. |
271 | if (!State.validateInitialState()) |
272 | return false; |
273 | |
274 | NumTotalBranches += State.LBRStack.size(); |
275 | // Now process the LBR samples in parrallel with stack sample |
276 | // Note that we do not reverse the LBR entry order so we can |
277 | // unwind the sample stack as we walk through LBR entries. |
278 | while (State.hasNextLBR()) { |
279 | State.checkStateConsistency(); |
280 | |
281 | // Do not attempt linear unwind for the leaf range as it's incomplete. |
282 | if (!State.IsLastLBR()) { |
283 | // Unwind implicit calls/returns from inlining, along the linear path, |
284 | // break into smaller sub section each with its own calling context. |
285 | unwindLinear(State, Repeat); |
286 | } |
287 | |
288 | // Save the LBR branch before it gets unwound. |
289 | const LBREntry &Branch = State.getCurrentLBR(); |
290 | if (isCallState(State)) { |
291 | // Unwind calls - we know we encountered call if LBR overlaps with |
292 | // transition between leaf the 2nd frame. Note that for calls that |
293 | // were not in the original stack sample, we should have added the |
294 | // extra frame when processing the return paired with this call. |
295 | unwindCall(State); |
296 | } else if (isReturnState(State)) { |
297 | // Unwind returns - check whether the IP is indeed at a return |
298 | // instruction |
299 | unwindReturn(State); |
300 | } else if (isValidState(State)) { |
301 | // Unwind branches |
302 | unwindBranch(State); |
303 | } else { |
304 | // Skip unwinding the rest of LBR trace. Reset the stack and update the |
305 | // state so that the rest of the trace can still be processed as if they |
306 | // do not have stack samples. |
307 | State.clearCallStack(); |
308 | State.InstPtr.update(Addr: State.getCurrentLBRSource()); |
309 | State.pushFrame(Address: State.InstPtr.Address); |
310 | } |
311 | |
312 | State.advanceLBR(); |
313 | // Record `branch` with calling context after unwinding. |
314 | recordBranchCount(Branch, State, Repeat); |
315 | } |
316 | // As samples are aggregated on trie, record them into counter map |
317 | collectSamplesFromFrameTrie(Cur: State.getDummyRootPtr()); |
318 | |
319 | return true; |
320 | } |
321 | |
322 | std::unique_ptr<PerfReaderBase> |
323 | PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, |
324 | std::optional<uint32_t> PIDFilter) { |
325 | std::unique_ptr<PerfReaderBase> PerfReader; |
326 | |
327 | if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) { |
328 | PerfReader.reset( |
329 | p: new UnsymbolizedProfileReader(Binary, PerfInput.InputFile)); |
330 | return PerfReader; |
331 | } |
332 | |
333 | // For perf data input, we need to convert them into perf script first. |
334 | if (PerfInput.Format == PerfFormat::PerfData) |
335 | PerfInput = |
336 | PerfScriptReader::convertPerfDataToTrace(Binary, File&: PerfInput, PIDFilter); |
337 | |
338 | assert((PerfInput.Format == PerfFormat::PerfScript) && |
339 | "Should be a perfscript!" ); |
340 | |
341 | PerfInput.Content = |
342 | PerfScriptReader::checkPerfScriptType(FileName: PerfInput.InputFile); |
343 | if (PerfInput.Content == PerfContent::LBRStack) { |
344 | PerfReader.reset( |
345 | p: new HybridPerfReader(Binary, PerfInput.InputFile, PIDFilter)); |
346 | } else if (PerfInput.Content == PerfContent::LBR) { |
347 | PerfReader.reset(p: new LBRPerfReader(Binary, PerfInput.InputFile, PIDFilter)); |
348 | } else { |
349 | exitWithError(Message: "Unsupported perfscript!" ); |
350 | } |
351 | |
352 | return PerfReader; |
353 | } |
354 | |
355 | PerfInputFile |
356 | PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, |
357 | PerfInputFile &File, |
358 | std::optional<uint32_t> PIDFilter) { |
359 | StringRef PerfData = File.InputFile; |
360 | // Run perf script to retrieve PIDs matching binary we're interested in. |
361 | auto PerfExecutable = sys::Process::FindInEnvPath(EnvName: "PATH" , FileName: "perf" ); |
362 | if (!PerfExecutable) { |
363 | exitWithError(Message: "Perf not found." ); |
364 | } |
365 | std::string PerfPath = *PerfExecutable; |
366 | |
367 | SmallString<128> PerfTraceFile; |
368 | sys::fs::createUniquePath(Model: "perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp" , |
369 | ResultPath&: PerfTraceFile, /*MakeAbsolute=*/true); |
370 | std::string ErrorFile = std::string(PerfTraceFile) + ".err" ; |
371 | StringRef ScriptMMapArgs[] = {PerfPath, "script" , "--show-mmap-events" , |
372 | "-F" , "comm,pid" , "-i" , |
373 | PerfData}; |
374 | std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin |
375 | StringRef(PerfTraceFile), // Stdout |
376 | StringRef(ErrorFile)}; // Stderr |
377 | sys::ExecuteAndWait(Program: PerfPath, Args: ScriptMMapArgs, Env: std::nullopt, Redirects); |
378 | |
379 | PerfScriptReader::TempFileCleanups.emplace_back(Args&: PerfTraceFile); |
380 | PerfScriptReader::TempFileCleanups.emplace_back(Args&: ErrorFile); |
381 | |
382 | // Collect the PIDs |
383 | TraceStream TraceIt(PerfTraceFile); |
384 | std::string PIDs; |
385 | std::unordered_set<uint32_t> PIDSet; |
386 | while (!TraceIt.isAtEoF()) { |
387 | MMapEvent MMap; |
388 | if (isMMap2Event(Line: TraceIt.getCurrentLine()) && |
389 | extractMMap2EventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) { |
390 | auto It = PIDSet.emplace(args&: MMap.PID); |
391 | if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) { |
392 | if (!PIDs.empty()) { |
393 | PIDs.append(s: "," ); |
394 | } |
395 | PIDs.append(str: utostr(X: MMap.PID)); |
396 | } |
397 | } |
398 | TraceIt.advance(); |
399 | } |
400 | |
401 | if (PIDs.empty()) { |
402 | exitWithError(Message: "No relevant mmap event is found in perf data." ); |
403 | } |
404 | |
405 | // Run perf script again to retrieve events for PIDs collected above |
406 | StringRef ScriptSampleArgs[] = {PerfPath, "script" , "--show-mmap-events" , |
407 | "-F" , "ip,brstack" , "--pid" , |
408 | PIDs, "-i" , PerfData}; |
409 | sys::ExecuteAndWait(Program: PerfPath, Args: ScriptSampleArgs, Env: std::nullopt, Redirects); |
410 | |
411 | return {.InputFile: std::string(PerfTraceFile), .Format: PerfFormat::PerfScript, |
412 | .Content: PerfContent::UnknownContent}; |
413 | } |
414 | |
415 | static StringRef filename(StringRef Path, bool UseBackSlash) { |
416 | llvm::sys::path::Style PathStyle = |
417 | UseBackSlash ? llvm::sys::path::Style::windows_backslash |
418 | : llvm::sys::path::Style::native; |
419 | StringRef FileName = llvm::sys::path::filename(path: Path, style: PathStyle); |
420 | |
421 | // In case this file use \r\n as newline. |
422 | if (UseBackSlash && FileName.back() == '\r') |
423 | return FileName.drop_back(); |
424 | |
425 | return FileName; |
426 | } |
427 | |
428 | void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { |
429 | // Drop the event which doesn't belong to user-provided binary |
430 | StringRef BinaryName = filename(Path: Event.BinaryPath, UseBackSlash: Binary->isCOFF()); |
431 | if (Binary->getName() != BinaryName) |
432 | return; |
433 | |
434 | // Drop the event if process does not match pid filter |
435 | if (PIDFilter && Event.PID != *PIDFilter) |
436 | return; |
437 | |
438 | // Drop the event if its image is loaded at the same address |
439 | if (Event.Address == Binary->getBaseAddress()) { |
440 | Binary->setIsLoadedByMMap(true); |
441 | return; |
442 | } |
443 | |
444 | if (Event.Offset == Binary->getTextSegmentOffset()) { |
445 | // A binary image could be unloaded and then reloaded at different |
446 | // place, so update binary load address. |
447 | // Only update for the first executable segment and assume all other |
448 | // segments are loaded at consecutive memory addresses, which is the case on |
449 | // X64. |
450 | Binary->setBaseAddress(Event.Address); |
451 | Binary->setIsLoadedByMMap(true); |
452 | } else { |
453 | // Verify segments are loaded consecutively. |
454 | const auto &Offsets = Binary->getTextSegmentOffsets(); |
455 | auto It = llvm::lower_bound(Range: Offsets, Value: Event.Offset); |
456 | if (It != Offsets.end() && *It == Event.Offset) { |
457 | // The event is for loading a separate executable segment. |
458 | auto I = std::distance(first: Offsets.begin(), last: It); |
459 | const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses(); |
460 | if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() != |
461 | Event.Address - Binary->getBaseAddress()) |
462 | exitWithError(Message: "Executable segments not loaded consecutively" ); |
463 | } else { |
464 | if (It == Offsets.begin()) |
465 | exitWithError(Message: "File offset not found" ); |
466 | else { |
467 | // Find the segment the event falls in. A large segment could be loaded |
468 | // via multiple mmap calls with consecutive memory addresses. |
469 | --It; |
470 | assert(*It < Event.Offset); |
471 | if (Event.Offset - *It != Event.Address - Binary->getBaseAddress()) |
472 | exitWithError(Message: "Segment not loaded by consecutive mmaps" ); |
473 | } |
474 | } |
475 | } |
476 | } |
477 | |
478 | static std::string getContextKeyStr(ContextKey *K, |
479 | const ProfiledBinary *Binary) { |
480 | if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(Val: K)) { |
481 | return SampleContext::getContextString(Context: CtxKey->Context); |
482 | } else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(Val: K)) { |
483 | std::ostringstream OContextStr; |
484 | for (uint32_t I = 0; I < CtxKey->Context.size(); I++) { |
485 | if (OContextStr.str().size()) |
486 | OContextStr << " @ " ; |
487 | uint64_t Address = CtxKey->Context[I]; |
488 | if (UseOffset) { |
489 | if (UseLoadableSegmentAsBase) |
490 | Address -= Binary->getFirstLoadableAddress(); |
491 | else |
492 | Address -= Binary->getPreferredBaseAddress(); |
493 | } |
494 | OContextStr << "0x" |
495 | << utohexstr(X: Address, |
496 | /*LowerCase=*/true); |
497 | } |
498 | return OContextStr.str(); |
499 | } else { |
500 | llvm_unreachable("unexpected key type" ); |
501 | } |
502 | } |
503 | |
504 | void HybridPerfReader::unwindSamples() { |
505 | VirtualUnwinder Unwinder(&SampleCounters, Binary); |
506 | for (const auto &Item : AggregatedSamples) { |
507 | const PerfSample *Sample = Item.first.getPtr(); |
508 | Unwinder.unwind(Sample, Repeat: Item.second); |
509 | } |
510 | |
511 | // Warn about untracked frames due to missing probes. |
512 | if (ShowDetailedWarning) { |
513 | for (auto Address : Unwinder.getUntrackedCallsites()) |
514 | WithColor::warning() << "Profile context truncated due to missing probe " |
515 | << "for call instruction at " |
516 | << format(Fmt: "0x%" PRIx64, Vals: Address) << "\n" ; |
517 | } |
518 | |
519 | emitWarningSummary(Num: Unwinder.getUntrackedCallsites().size(), |
520 | Total: SampleCounters.size(), |
521 | Msg: "of profiled contexts are truncated due to missing probe " |
522 | "for call instruction." ); |
523 | |
524 | emitWarningSummary( |
525 | Num: Unwinder.NumMismatchedExtCallBranch, Total: Unwinder.NumTotalBranches, |
526 | Msg: "of branches'source is a call instruction but doesn't match call frame " |
527 | "stack, likely due to unwinding error of external frame." ); |
528 | |
529 | emitWarningSummary(Num: Unwinder.NumPairedExtAddr * 2, Total: Unwinder.NumTotalBranches, |
530 | Msg: "of branches containing paired external address." ); |
531 | |
532 | emitWarningSummary(Num: Unwinder.NumUnpairedExtAddr, Total: Unwinder.NumTotalBranches, |
533 | Msg: "of branches containing external address but doesn't have " |
534 | "another external address to pair, likely due to " |
535 | "interrupt jmp or broken perf script." ); |
536 | |
537 | emitWarningSummary( |
538 | Num: Unwinder.NumMismatchedProEpiBranch, Total: Unwinder.NumTotalBranches, |
539 | Msg: "of branches'source is a call instruction but doesn't match call frame " |
540 | "stack, likely due to frame in prolog/epilog." ); |
541 | |
542 | emitWarningSummary(Num: Unwinder.NumMissingExternalFrame, |
543 | Total: Unwinder.NumExtCallBranch, |
544 | Msg: "of artificial call branches but doesn't have an external " |
545 | "frame to match." ); |
546 | } |
547 | |
548 | bool PerfScriptReader::(TraceStream &TraceIt, |
549 | SmallVectorImpl<LBREntry> &LBRStack) { |
550 | // The raw format of LBR stack is like: |
551 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
552 | // ... 0x4005c8/0x4005dc/P/-/-/0 |
553 | // It's in FIFO order and seperated by whitespace. |
554 | SmallVector<StringRef, 32> Records; |
555 | TraceIt.getCurrentLine().split(A&: Records, Separator: " " , MaxSplit: -1, KeepEmpty: false); |
556 | auto WarnInvalidLBR = [](TraceStream &TraceIt) { |
557 | WithColor::warning() << "Invalid address in LBR record at line " |
558 | << TraceIt.getLineNumber() << ": " |
559 | << TraceIt.getCurrentLine() << "\n" ; |
560 | }; |
561 | |
562 | // Skip the leading instruction pointer. |
563 | size_t Index = 0; |
564 | uint64_t LeadingAddr; |
565 | if (!Records.empty() && !Records[0].contains(C: '/')) { |
566 | if (Records[0].getAsInteger(Radix: 16, Result&: LeadingAddr)) { |
567 | WarnInvalidLBR(TraceIt); |
568 | TraceIt.advance(); |
569 | return false; |
570 | } |
571 | Index = 1; |
572 | } |
573 | |
574 | // Now extract LBR samples - note that we do not reverse the |
575 | // LBR entry order so we can unwind the sample stack as we walk |
576 | // through LBR entries. |
577 | while (Index < Records.size()) { |
578 | auto &Token = Records[Index++]; |
579 | if (Token.size() == 0) |
580 | continue; |
581 | |
582 | SmallVector<StringRef, 8> Addresses; |
583 | Token.split(A&: Addresses, Separator: "/" ); |
584 | uint64_t Src; |
585 | uint64_t Dst; |
586 | |
587 | // Stop at broken LBR records. |
588 | if (Addresses.size() < 2 || Addresses[0].substr(Start: 2).getAsInteger(Radix: 16, Result&: Src) || |
589 | Addresses[1].substr(Start: 2).getAsInteger(Radix: 16, Result&: Dst)) { |
590 | WarnInvalidLBR(TraceIt); |
591 | break; |
592 | } |
593 | |
594 | // Canonicalize to use preferred load address as base address. |
595 | Src = Binary->canonicalizeVirtualAddress(Address: Src); |
596 | Dst = Binary->canonicalizeVirtualAddress(Address: Dst); |
597 | bool SrcIsInternal = Binary->addressIsCode(Address: Src); |
598 | bool DstIsInternal = Binary->addressIsCode(Address: Dst); |
599 | if (!SrcIsInternal) |
600 | Src = ExternalAddr; |
601 | if (!DstIsInternal) |
602 | Dst = ExternalAddr; |
603 | // Filter external-to-external case to reduce LBR trace size. |
604 | if (!SrcIsInternal && !DstIsInternal) |
605 | continue; |
606 | |
607 | LBRStack.emplace_back(Args: LBREntry(Src, Dst)); |
608 | } |
609 | TraceIt.advance(); |
610 | return !LBRStack.empty(); |
611 | } |
612 | |
613 | bool PerfScriptReader::(TraceStream &TraceIt, |
614 | SmallVectorImpl<uint64_t> &CallStack) { |
615 | // The raw format of call stack is like: |
616 | // 4005dc # leaf frame |
617 | // 400634 |
618 | // 400684 # root frame |
619 | // It's in bottom-up order with each frame in one line. |
620 | |
621 | // Extract stack frames from sample |
622 | while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) { |
623 | StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); |
624 | uint64_t FrameAddr = 0; |
625 | if (FrameStr.getAsInteger(Radix: 16, Result&: FrameAddr)) { |
626 | // We might parse a non-perf sample line like empty line and comments, |
627 | // skip it |
628 | TraceIt.advance(); |
629 | return false; |
630 | } |
631 | TraceIt.advance(); |
632 | |
633 | FrameAddr = Binary->canonicalizeVirtualAddress(Address: FrameAddr); |
634 | // Currently intermixed frame from different binaries is not supported. |
635 | if (!Binary->addressIsCode(Address: FrameAddr)) { |
636 | if (CallStack.empty()) |
637 | NumLeafExternalFrame++; |
638 | // Push a special value(ExternalAddr) for the external frames so that |
639 | // unwinder can still work on this with artificial Call/Return branch. |
640 | // After unwinding, the context will be truncated for external frame. |
641 | // Also deduplicate the consecutive external addresses. |
642 | if (CallStack.empty() || CallStack.back() != ExternalAddr) |
643 | CallStack.emplace_back(Args: ExternalAddr); |
644 | continue; |
645 | } |
646 | |
647 | // We need to translate return address to call address for non-leaf frames. |
648 | if (!CallStack.empty()) { |
649 | auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); |
650 | if (!CallAddr) { |
651 | // Stop at an invalid return address caused by bad unwinding. This could |
652 | // happen to frame-pointer-based unwinding and the callee functions that |
653 | // do not have the frame pointer chain set up. |
654 | InvalidReturnAddresses.insert(x: FrameAddr); |
655 | break; |
656 | } |
657 | FrameAddr = CallAddr; |
658 | } |
659 | |
660 | CallStack.emplace_back(Args&: FrameAddr); |
661 | } |
662 | |
663 | // Strip out the bottom external addr. |
664 | if (CallStack.size() > 1 && CallStack.back() == ExternalAddr) |
665 | CallStack.pop_back(); |
666 | |
667 | // Skip other unrelated line, find the next valid LBR line |
668 | // Note that even for empty call stack, we should skip the address at the |
669 | // bottom, otherwise the following pass may generate a truncated callstack |
670 | while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) { |
671 | TraceIt.advance(); |
672 | } |
673 | // Filter out broken stack sample. We may not have complete frame info |
674 | // if sample end up in prolog/epilog, the result is dangling context not |
675 | // connected to entry point. This should be relatively rare thus not much |
676 | // impact on overall profile quality. However we do want to filter them |
677 | // out to reduce the number of different calling contexts. One instance |
678 | // of such case - when sample landed in prolog/epilog, somehow stack |
679 | // walking will be broken in an unexpected way that higher frames will be |
680 | // missing. |
681 | return !CallStack.empty() && |
682 | !Binary->addressInPrologEpilog(Address: CallStack.front()); |
683 | } |
684 | |
685 | void PerfScriptReader::warnIfMissingMMap() { |
686 | if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) { |
687 | WithColor::warning() << "No relevant mmap event is matched for " |
688 | << Binary->getName() |
689 | << ", will use preferred address (" |
690 | << format(Fmt: "0x%" PRIx64, |
691 | Vals: Binary->getPreferredBaseAddress()) |
692 | << ") as the base loading address!\n" ; |
693 | // Avoid redundant warning, only warn at the first unmatched sample. |
694 | Binary->setMissingMMapWarned(true); |
695 | } |
696 | } |
697 | |
698 | void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { |
699 | // The raw hybird sample started with call stack in FILO order and followed |
700 | // intermediately by LBR sample |
701 | // e.g. |
702 | // 4005dc # call stack leaf |
703 | // 400634 |
704 | // 400684 # call stack root |
705 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
706 | // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries |
707 | // |
708 | std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); |
709 | #ifndef NDEBUG |
710 | Sample->Linenum = TraceIt.getLineNumber(); |
711 | #endif |
712 | // Parsing call stack and populate into PerfSample.CallStack |
713 | if (!extractCallstack(TraceIt, CallStack&: Sample->CallStack)) { |
714 | // Skip the next LBR line matched current call stack |
715 | if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) |
716 | TraceIt.advance(); |
717 | return; |
718 | } |
719 | |
720 | warnIfMissingMMap(); |
721 | |
722 | if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) { |
723 | // Parsing LBR stack and populate into PerfSample.LBRStack |
724 | if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) { |
725 | if (IgnoreStackSamples) { |
726 | Sample->CallStack.clear(); |
727 | } else { |
728 | // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR |
729 | // ranges |
730 | Sample->CallStack.front() = Sample->LBRStack[0].Target; |
731 | } |
732 | // Record samples by aggregation |
733 | AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; |
734 | } |
735 | } else { |
736 | // LBR sample is encoded in single line after stack sample |
737 | exitWithError(Message: "'Hybrid perf sample is corrupted, No LBR sample line" ); |
738 | } |
739 | } |
740 | |
741 | void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) { |
742 | std::error_code EC; |
743 | raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF); |
744 | if (EC) |
745 | exitWithError(EC, Whence: Filename); |
746 | writeUnsymbolizedProfile(OS); |
747 | } |
748 | |
749 | // Use ordered map to make the output deterministic |
750 | using OrderedCounterForPrint = std::map<std::string, SampleCounter *>; |
751 | |
752 | void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) { |
753 | OrderedCounterForPrint OrderedCounters; |
754 | for (auto &CI : SampleCounters) { |
755 | OrderedCounters[getContextKeyStr(K: CI.first.getPtr(), Binary)] = &CI.second; |
756 | } |
757 | |
758 | auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator, |
759 | uint32_t Indent) { |
760 | OS.indent(NumSpaces: Indent); |
761 | OS << Counter.size() << "\n" ; |
762 | for (auto &I : Counter) { |
763 | uint64_t Start = I.first.first; |
764 | uint64_t End = I.first.second; |
765 | |
766 | if (UseOffset) { |
767 | if (UseLoadableSegmentAsBase) { |
768 | Start -= Binary->getFirstLoadableAddress(); |
769 | End -= Binary->getFirstLoadableAddress(); |
770 | } else { |
771 | Start -= Binary->getPreferredBaseAddress(); |
772 | End -= Binary->getPreferredBaseAddress(); |
773 | } |
774 | } |
775 | |
776 | OS.indent(NumSpaces: Indent); |
777 | OS << Twine::utohexstr(Val: Start) << Separator << Twine::utohexstr(Val: End) << ":" |
778 | << I.second << "\n" ; |
779 | } |
780 | }; |
781 | |
782 | for (auto &CI : OrderedCounters) { |
783 | uint32_t Indent = 0; |
784 | if (ProfileIsCS) { |
785 | // Context string key |
786 | OS << "[" << CI.first << "]\n" ; |
787 | Indent = 2; |
788 | } |
789 | |
790 | SampleCounter &Counter = *CI.second; |
791 | SCounterPrinter(Counter.RangeCounter, "-" , Indent); |
792 | SCounterPrinter(Counter.BranchCounter, "->" , Indent); |
793 | } |
794 | } |
795 | |
796 | // Format of input: |
797 | // number of entries in RangeCounter |
798 | // from_1-to_1:count_1 |
799 | // from_2-to_2:count_2 |
800 | // ...... |
801 | // from_n-to_n:count_n |
802 | // number of entries in BranchCounter |
803 | // src_1->dst_1:count_1 |
804 | // src_2->dst_2:count_2 |
805 | // ...... |
806 | // src_n->dst_n:count_n |
807 | void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt, |
808 | SampleCounter &SCounters) { |
809 | auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) { |
810 | std::string Msg = TraceIt.isAtEoF() |
811 | ? "Invalid raw profile!" |
812 | : "Invalid raw profile at line " + |
813 | Twine(TraceIt.getLineNumber()).str() + ": " + |
814 | TraceIt.getCurrentLine().str(); |
815 | exitWithError(Message: Msg); |
816 | }; |
817 | auto ReadNumber = [&](uint64_t &Num) { |
818 | if (TraceIt.isAtEoF()) |
819 | exitWithErrorForTraceLine(TraceIt); |
820 | if (TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 10, Result&: Num)) |
821 | exitWithErrorForTraceLine(TraceIt); |
822 | TraceIt.advance(); |
823 | }; |
824 | |
825 | auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) { |
826 | uint64_t Num = 0; |
827 | ReadNumber(Num); |
828 | while (Num--) { |
829 | if (TraceIt.isAtEoF()) |
830 | exitWithErrorForTraceLine(TraceIt); |
831 | StringRef Line = TraceIt.getCurrentLine().ltrim(); |
832 | |
833 | uint64_t Count = 0; |
834 | auto LineSplit = Line.split(Separator: ":" ); |
835 | if (LineSplit.second.empty() || LineSplit.second.getAsInteger(Radix: 10, Result&: Count)) |
836 | exitWithErrorForTraceLine(TraceIt); |
837 | |
838 | uint64_t Source = 0; |
839 | uint64_t Target = 0; |
840 | auto Range = LineSplit.first.split(Separator); |
841 | if (Range.second.empty() || Range.first.getAsInteger(Radix: 16, Result&: Source) || |
842 | Range.second.getAsInteger(Radix: 16, Result&: Target)) |
843 | exitWithErrorForTraceLine(TraceIt); |
844 | |
845 | if (UseOffset) { |
846 | if (UseLoadableSegmentAsBase) { |
847 | Source += Binary->getFirstLoadableAddress(); |
848 | Target += Binary->getFirstLoadableAddress(); |
849 | } else { |
850 | Source += Binary->getPreferredBaseAddress(); |
851 | Target += Binary->getPreferredBaseAddress(); |
852 | } |
853 | } |
854 | |
855 | Counter[{Source, Target}] += Count; |
856 | TraceIt.advance(); |
857 | } |
858 | }; |
859 | |
860 | ReadCounter(SCounters.RangeCounter, "-" ); |
861 | ReadCounter(SCounters.BranchCounter, "->" ); |
862 | } |
863 | |
864 | void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) { |
865 | TraceStream TraceIt(FileName); |
866 | while (!TraceIt.isAtEoF()) { |
867 | std::shared_ptr<StringBasedCtxKey> Key = |
868 | std::make_shared<StringBasedCtxKey>(); |
869 | StringRef Line = TraceIt.getCurrentLine(); |
870 | // Read context stack for CS profile. |
871 | if (Line.starts_with(Prefix: "[" )) { |
872 | ProfileIsCS = true; |
873 | auto I = ContextStrSet.insert(x: Line.str()); |
874 | SampleContext::createCtxVectorFromStr(ContextStr: *I.first, Context&: Key->Context); |
875 | TraceIt.advance(); |
876 | } |
877 | auto Ret = |
878 | SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
879 | readSampleCounters(TraceIt, SCounters&: Ret.first->second); |
880 | } |
881 | } |
882 | |
883 | void UnsymbolizedProfileReader::parsePerfTraces() { |
884 | readUnsymbolizedProfile(FileName: PerfTraceFile); |
885 | } |
886 | |
887 | void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, |
888 | uint64_t Repeat) { |
889 | SampleCounter &Counter = SampleCounters.begin()->second; |
890 | uint64_t EndAddress = 0; |
891 | for (const LBREntry &LBR : Sample->LBRStack) { |
892 | uint64_t SourceAddress = LBR.Source; |
893 | uint64_t TargetAddress = LBR.Target; |
894 | |
895 | // Record the branch if its SourceAddress is external. It can be the case an |
896 | // external source call an internal function, later this branch will be used |
897 | // to generate the function's head sample. |
898 | if (Binary->addressIsCode(Address: TargetAddress)) { |
899 | Counter.recordBranchCount(Source: SourceAddress, Target: TargetAddress, Repeat); |
900 | } |
901 | |
902 | // If this not the first LBR, update the range count between TO of current |
903 | // LBR and FROM of next LBR. |
904 | uint64_t StartAddress = TargetAddress; |
905 | if (Binary->addressIsCode(Address: StartAddress) && |
906 | Binary->addressIsCode(Address: EndAddress) && |
907 | isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) |
908 | Counter.recordRangeCount(Start: StartAddress, End: EndAddress, Repeat); |
909 | EndAddress = SourceAddress; |
910 | } |
911 | } |
912 | |
913 | void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { |
914 | std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); |
915 | // Parsing LBR stack and populate into PerfSample.LBRStack |
916 | if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) { |
917 | warnIfMissingMMap(); |
918 | // Record LBR only samples by aggregation |
919 | AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; |
920 | } |
921 | } |
922 | |
923 | void PerfScriptReader::generateUnsymbolizedProfile() { |
924 | // There is no context for LBR only sample, so initialize one entry with |
925 | // fake "empty" context key. |
926 | assert(SampleCounters.empty() && |
927 | "Sample counter map should be empty before raw profile generation" ); |
928 | std::shared_ptr<StringBasedCtxKey> Key = |
929 | std::make_shared<StringBasedCtxKey>(); |
930 | SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
931 | for (const auto &Item : AggregatedSamples) { |
932 | const PerfSample *Sample = Item.first.getPtr(); |
933 | computeCounterFromLBR(Sample, Repeat: Item.second); |
934 | } |
935 | } |
936 | |
937 | uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) { |
938 | // The aggregated count is optional, so do not skip the line and return 1 if |
939 | // it's unmatched |
940 | uint64_t Count = 1; |
941 | if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: Count)) |
942 | TraceIt.advance(); |
943 | return Count; |
944 | } |
945 | |
946 | void PerfScriptReader::parseSample(TraceStream &TraceIt) { |
947 | NumTotalSample++; |
948 | uint64_t Count = parseAggregatedCount(TraceIt); |
949 | assert(Count >= 1 && "Aggregated count should be >= 1!" ); |
950 | parseSample(TraceIt, Count); |
951 | } |
952 | |
953 | bool PerfScriptReader::(ProfiledBinary *Binary, |
954 | StringRef Line, |
955 | MMapEvent &MMap) { |
956 | // Parse a line like: |
957 | // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 |
958 | // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so |
959 | constexpr static const char *const Pattern = |
960 | "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " |
961 | "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " |
962 | "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)" ; |
963 | // Field 0 - whole line |
964 | // Field 1 - PID |
965 | // Field 2 - base address |
966 | // Field 3 - mmapped size |
967 | // Field 4 - page offset |
968 | // Field 5 - binary path |
969 | enum EventIndex { |
970 | WHOLE_LINE = 0, |
971 | PID = 1, |
972 | MMAPPED_ADDRESS = 2, |
973 | MMAPPED_SIZE = 3, |
974 | PAGE_OFFSET = 4, |
975 | BINARY_PATH = 5 |
976 | }; |
977 | |
978 | Regex RegMmap2(Pattern); |
979 | SmallVector<StringRef, 6> Fields; |
980 | bool R = RegMmap2.match(String: Line, Matches: &Fields); |
981 | if (!R) { |
982 | std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n" ; |
983 | WithColor::warning() << WarningMsg; |
984 | } |
985 | Fields[PID].getAsInteger(Radix: 10, Result&: MMap.PID); |
986 | Fields[MMAPPED_ADDRESS].getAsInteger(Radix: 0, Result&: MMap.Address); |
987 | Fields[MMAPPED_SIZE].getAsInteger(Radix: 0, Result&: MMap.Size); |
988 | Fields[PAGE_OFFSET].getAsInteger(Radix: 0, Result&: MMap.Offset); |
989 | MMap.BinaryPath = Fields[BINARY_PATH]; |
990 | if (ShowMmapEvents) { |
991 | outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at " |
992 | << format(Fmt: "0x%" PRIx64 ":" , Vals: MMap.Address) << " \n" ; |
993 | } |
994 | |
995 | StringRef BinaryName = filename(Path: MMap.BinaryPath, UseBackSlash: Binary->isCOFF()); |
996 | return Binary->getName() == BinaryName; |
997 | } |
998 | |
999 | void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) { |
1000 | MMapEvent MMap; |
1001 | if (extractMMap2EventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) |
1002 | updateBinaryAddress(Event: MMap); |
1003 | TraceIt.advance(); |
1004 | } |
1005 | |
1006 | void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) { |
1007 | if (isMMap2Event(Line: TraceIt.getCurrentLine())) |
1008 | parseMMap2Event(TraceIt); |
1009 | else |
1010 | parseSample(TraceIt); |
1011 | } |
1012 | |
1013 | void PerfScriptReader::parseAndAggregateTrace() { |
1014 | // Trace line iterator |
1015 | TraceStream TraceIt(PerfTraceFile); |
1016 | while (!TraceIt.isAtEoF()) |
1017 | parseEventOrSample(TraceIt); |
1018 | } |
1019 | |
1020 | // A LBR sample is like: |
1021 | // 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... |
1022 | // A heuristic for fast detection by checking whether a |
1023 | // leading " 0x" and the '/' exist. |
1024 | bool PerfScriptReader::isLBRSample(StringRef Line) { |
1025 | // Skip the leading instruction pointer |
1026 | SmallVector<StringRef, 32> Records; |
1027 | Line.trim().split(A&: Records, Separator: " " , MaxSplit: 2, KeepEmpty: false); |
1028 | if (Records.size() < 2) |
1029 | return false; |
1030 | if (Records[1].starts_with(Prefix: "0x" ) && Records[1].contains(C: '/')) |
1031 | return true; |
1032 | return false; |
1033 | } |
1034 | |
1035 | bool PerfScriptReader::isMMap2Event(StringRef Line) { |
1036 | // Short cut to avoid string find is possible. |
1037 | if (Line.empty() || Line.size() < 50) |
1038 | return false; |
1039 | |
1040 | if (std::isdigit(Line[0])) |
1041 | return false; |
1042 | |
1043 | // PERF_RECORD_MMAP2 does not appear at the beginning of the line |
1044 | // for ` perf script --show-mmap-events -i ...` |
1045 | return Line.contains(Other: "PERF_RECORD_MMAP2" ); |
1046 | } |
1047 | |
1048 | // The raw hybird sample is like |
1049 | // e.g. |
1050 | // 4005dc # call stack leaf |
1051 | // 400634 |
1052 | // 400684 # call stack root |
1053 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
1054 | // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries |
1055 | // Determine the perfscript contains hybrid samples(call stack + LBRs) by |
1056 | // checking whether there is a non-empty call stack immediately followed by |
1057 | // a LBR sample |
1058 | PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) { |
1059 | TraceStream TraceIt(FileName); |
1060 | uint64_t FrameAddr = 0; |
1061 | while (!TraceIt.isAtEoF()) { |
1062 | // Skip the aggregated count |
1063 | if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: FrameAddr)) |
1064 | TraceIt.advance(); |
1065 | |
1066 | // Detect sample with call stack |
1067 | int32_t Count = 0; |
1068 | while (!TraceIt.isAtEoF() && |
1069 | !TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 16, Result&: FrameAddr)) { |
1070 | Count++; |
1071 | TraceIt.advance(); |
1072 | } |
1073 | if (!TraceIt.isAtEoF()) { |
1074 | if (isLBRSample(Line: TraceIt.getCurrentLine())) { |
1075 | if (Count > 0) |
1076 | return PerfContent::LBRStack; |
1077 | else |
1078 | return PerfContent::LBR; |
1079 | } |
1080 | TraceIt.advance(); |
1081 | } |
1082 | } |
1083 | |
1084 | exitWithError(Message: "Invalid perf script input!" ); |
1085 | return PerfContent::UnknownContent; |
1086 | } |
1087 | |
1088 | void HybridPerfReader::generateUnsymbolizedProfile() { |
1089 | ProfileIsCS = !IgnoreStackSamples; |
1090 | if (ProfileIsCS) |
1091 | unwindSamples(); |
1092 | else |
1093 | PerfScriptReader::generateUnsymbolizedProfile(); |
1094 | } |
1095 | |
1096 | void PerfScriptReader::warnTruncatedStack() { |
1097 | if (ShowDetailedWarning) { |
1098 | for (auto Address : InvalidReturnAddresses) { |
1099 | WithColor::warning() |
1100 | << "Truncated stack sample due to invalid return address at " |
1101 | << format(Fmt: "0x%" PRIx64, Vals: Address) |
1102 | << ", likely caused by frame pointer omission\n" ; |
1103 | } |
1104 | } |
1105 | emitWarningSummary( |
1106 | Num: InvalidReturnAddresses.size(), Total: AggregatedSamples.size(), |
1107 | Msg: "of truncated stack samples due to invalid return address, " |
1108 | "likely caused by frame pointer omission." ); |
1109 | } |
1110 | |
1111 | void PerfScriptReader::warnInvalidRange() { |
1112 | std::unordered_map<std::pair<uint64_t, uint64_t>, uint64_t, |
1113 | pair_hash<uint64_t, uint64_t>> |
1114 | Ranges; |
1115 | |
1116 | for (const auto &Item : AggregatedSamples) { |
1117 | const PerfSample *Sample = Item.first.getPtr(); |
1118 | uint64_t Count = Item.second; |
1119 | uint64_t EndAddress = 0; |
1120 | for (const LBREntry &LBR : Sample->LBRStack) { |
1121 | uint64_t SourceAddress = LBR.Source; |
1122 | uint64_t StartAddress = LBR.Target; |
1123 | if (EndAddress != 0) |
1124 | Ranges[{StartAddress, EndAddress}] += Count; |
1125 | EndAddress = SourceAddress; |
1126 | } |
1127 | } |
1128 | |
1129 | if (Ranges.empty()) { |
1130 | WithColor::warning() << "No samples in perf script!\n" ; |
1131 | return; |
1132 | } |
1133 | |
1134 | auto WarnInvalidRange = [&](uint64_t StartAddress, uint64_t EndAddress, |
1135 | StringRef Msg) { |
1136 | if (!ShowDetailedWarning) |
1137 | return; |
1138 | WithColor::warning() << "[" << format(Fmt: "%8" PRIx64, Vals: StartAddress) << "," |
1139 | << format(Fmt: "%8" PRIx64, Vals: EndAddress) << "]: " << Msg |
1140 | << "\n" ; |
1141 | }; |
1142 | |
1143 | const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " |
1144 | "likely due to profile and binary mismatch." ; |
1145 | const char *DanglingRangeMsg = "Range does not belong to any functions, " |
1146 | "likely from PLT, .init or .fini section." ; |
1147 | const char *RangeCrossFuncMsg = |
1148 | "Fall through range should not cross function boundaries, likely due to " |
1149 | "profile and binary mismatch." ; |
1150 | const char *BogusRangeMsg = "Range start is after or too far from range end." ; |
1151 | |
1152 | uint64_t TotalRangeNum = 0; |
1153 | uint64_t InstNotBoundary = 0; |
1154 | uint64_t UnmatchedRange = 0; |
1155 | uint64_t RangeCrossFunc = 0; |
1156 | uint64_t BogusRange = 0; |
1157 | |
1158 | for (auto &I : Ranges) { |
1159 | uint64_t StartAddress = I.first.first; |
1160 | uint64_t EndAddress = I.first.second; |
1161 | TotalRangeNum += I.second; |
1162 | |
1163 | if (!Binary->addressIsCode(Address: StartAddress) && |
1164 | !Binary->addressIsCode(Address: EndAddress)) |
1165 | continue; |
1166 | |
1167 | if (!Binary->addressIsCode(Address: StartAddress) || |
1168 | !Binary->addressIsTransfer(Address: EndAddress)) { |
1169 | InstNotBoundary += I.second; |
1170 | WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); |
1171 | } |
1172 | |
1173 | auto *FRange = Binary->findFuncRange(Address: StartAddress); |
1174 | if (!FRange) { |
1175 | UnmatchedRange += I.second; |
1176 | WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg); |
1177 | continue; |
1178 | } |
1179 | |
1180 | if (EndAddress >= FRange->EndAddress) { |
1181 | RangeCrossFunc += I.second; |
1182 | WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg); |
1183 | } |
1184 | |
1185 | if (Binary->addressIsCode(Address: StartAddress) && |
1186 | Binary->addressIsCode(Address: EndAddress) && |
1187 | !isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) { |
1188 | BogusRange += I.second; |
1189 | WarnInvalidRange(StartAddress, EndAddress, BogusRangeMsg); |
1190 | } |
1191 | } |
1192 | |
1193 | emitWarningSummary( |
1194 | Num: InstNotBoundary, Total: TotalRangeNum, |
1195 | Msg: "of samples are from ranges that are not on instruction boundary." ); |
1196 | emitWarningSummary( |
1197 | Num: UnmatchedRange, Total: TotalRangeNum, |
1198 | Msg: "of samples are from ranges that do not belong to any functions." ); |
1199 | emitWarningSummary( |
1200 | Num: RangeCrossFunc, Total: TotalRangeNum, |
1201 | Msg: "of samples are from ranges that do cross function boundaries." ); |
1202 | emitWarningSummary( |
1203 | Num: BogusRange, Total: TotalRangeNum, |
1204 | Msg: "of samples are from ranges that have range start after or too far from " |
1205 | "range end acrossing the unconditinal jmp." ); |
1206 | } |
1207 | |
1208 | void PerfScriptReader::parsePerfTraces() { |
1209 | // Parse perf traces and do aggregation. |
1210 | parseAndAggregateTrace(); |
1211 | |
1212 | emitWarningSummary(Num: NumLeafExternalFrame, Total: NumTotalSample, |
1213 | Msg: "of samples have leaf external frame in call stack." ); |
1214 | emitWarningSummary(Num: NumLeadingOutgoingLBR, Total: NumTotalSample, |
1215 | Msg: "of samples have leading external LBR." ); |
1216 | |
1217 | // Generate unsymbolized profile. |
1218 | warnTruncatedStack(); |
1219 | warnInvalidRange(); |
1220 | generateUnsymbolizedProfile(); |
1221 | AggregatedSamples.clear(); |
1222 | |
1223 | if (SkipSymbolization) |
1224 | writeUnsymbolizedProfile(Filename: OutputFilename); |
1225 | } |
1226 | |
1227 | SmallVector<CleanupInstaller, 2> PerfScriptReader::TempFileCleanups; |
1228 | |
1229 | } // end namespace sampleprof |
1230 | } // end namespace llvm |
1231 | |