1 | //===- MemProfiler.cpp - memory allocation and access profiler ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of MemProfiler. Memory accesses are instrumented |
10 | // to increment the access count held in a shadow memory location, or |
11 | // alternatively to call into the runtime. Memory intrinsic calls (memmove, |
12 | // memcpy, memset) are changed to call the memory profiling runtime version |
13 | // instead. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/Transforms/Instrumentation/MemProfiler.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/Analysis/MemoryBuiltins.h" |
22 | #include "llvm/Analysis/MemoryProfileInfo.h" |
23 | #include "llvm/Analysis/ValueTracking.h" |
24 | #include "llvm/IR/Constant.h" |
25 | #include "llvm/IR/DataLayout.h" |
26 | #include "llvm/IR/DiagnosticInfo.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/IR/GlobalValue.h" |
29 | #include "llvm/IR/IRBuilder.h" |
30 | #include "llvm/IR/Instruction.h" |
31 | #include "llvm/IR/IntrinsicInst.h" |
32 | #include "llvm/IR/Module.h" |
33 | #include "llvm/IR/Type.h" |
34 | #include "llvm/IR/Value.h" |
35 | #include "llvm/ProfileData/InstrProf.h" |
36 | #include "llvm/ProfileData/InstrProfReader.h" |
37 | #include "llvm/Support/BLAKE3.h" |
38 | #include "llvm/Support/CommandLine.h" |
39 | #include "llvm/Support/Debug.h" |
40 | #include "llvm/Support/HashBuilder.h" |
41 | #include "llvm/Support/VirtualFileSystem.h" |
42 | #include "llvm/TargetParser/Triple.h" |
43 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
44 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
45 | #include <map> |
46 | #include <set> |
47 | |
48 | using namespace llvm; |
49 | using namespace llvm::memprof; |
50 | |
51 | #define DEBUG_TYPE "memprof" |
52 | |
53 | namespace llvm { |
54 | extern cl::opt<bool> PGOWarnMissing; |
55 | extern cl::opt<bool> NoPGOWarnMismatch; |
56 | extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; |
57 | } // namespace llvm |
58 | |
59 | constexpr int LLVM_MEM_PROFILER_VERSION = 1; |
60 | |
61 | // Size of memory mapped to a single shadow location. |
62 | constexpr uint64_t DefaultMemGranularity = 64; |
63 | |
64 | // Scale from granularity down to shadow size. |
65 | constexpr uint64_t DefaultShadowScale = 3; |
66 | |
67 | constexpr char MemProfModuleCtorName[] = "memprof.module_ctor" ; |
68 | constexpr uint64_t MemProfCtorAndDtorPriority = 1; |
69 | // On Emscripten, the system needs more than one priorities for constructors. |
70 | constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; |
71 | constexpr char MemProfInitName[] = "__memprof_init" ; |
72 | constexpr char MemProfVersionCheckNamePrefix[] = |
73 | "__memprof_version_mismatch_check_v" ; |
74 | |
75 | constexpr char MemProfShadowMemoryDynamicAddress[] = |
76 | "__memprof_shadow_memory_dynamic_address" ; |
77 | |
78 | constexpr char MemProfFilenameVar[] = "__memprof_profile_filename" ; |
79 | |
80 | // Command-line flags. |
81 | |
82 | static cl::opt<bool> ClInsertVersionCheck( |
83 | "memprof-guard-against-version-mismatch" , |
84 | cl::desc("Guard against compiler/runtime version mismatch." ), cl::Hidden, |
85 | cl::init(Val: true)); |
86 | |
87 | // This flag may need to be replaced with -f[no-]memprof-reads. |
88 | static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads" , |
89 | cl::desc("instrument read instructions" ), |
90 | cl::Hidden, cl::init(Val: true)); |
91 | |
92 | static cl::opt<bool> |
93 | ClInstrumentWrites("memprof-instrument-writes" , |
94 | cl::desc("instrument write instructions" ), cl::Hidden, |
95 | cl::init(Val: true)); |
96 | |
97 | static cl::opt<bool> ClInstrumentAtomics( |
98 | "memprof-instrument-atomics" , |
99 | cl::desc("instrument atomic instructions (rmw, cmpxchg)" ), cl::Hidden, |
100 | cl::init(Val: true)); |
101 | |
102 | static cl::opt<bool> ClUseCalls( |
103 | "memprof-use-callbacks" , |
104 | cl::desc("Use callbacks instead of inline instrumentation sequences." ), |
105 | cl::Hidden, cl::init(Val: false)); |
106 | |
107 | static cl::opt<std::string> |
108 | ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix" , |
109 | cl::desc("Prefix for memory access callbacks" ), |
110 | cl::Hidden, cl::init(Val: "__memprof_" )); |
111 | |
112 | // These flags allow to change the shadow mapping. |
113 | // The shadow mapping looks like |
114 | // Shadow = ((Mem & mask) >> scale) + offset |
115 | |
116 | static cl::opt<int> ClMappingScale("memprof-mapping-scale" , |
117 | cl::desc("scale of memprof shadow mapping" ), |
118 | cl::Hidden, cl::init(Val: DefaultShadowScale)); |
119 | |
120 | static cl::opt<int> |
121 | ClMappingGranularity("memprof-mapping-granularity" , |
122 | cl::desc("granularity of memprof shadow mapping" ), |
123 | cl::Hidden, cl::init(Val: DefaultMemGranularity)); |
124 | |
125 | static cl::opt<bool> ClStack("memprof-instrument-stack" , |
126 | cl::desc("Instrument scalar stack variables" ), |
127 | cl::Hidden, cl::init(Val: false)); |
128 | |
129 | // Debug flags. |
130 | |
131 | static cl::opt<int> ClDebug("memprof-debug" , cl::desc("debug" ), cl::Hidden, |
132 | cl::init(Val: 0)); |
133 | |
134 | static cl::opt<std::string> ClDebugFunc("memprof-debug-func" , cl::Hidden, |
135 | cl::desc("Debug func" )); |
136 | |
137 | static cl::opt<int> ClDebugMin("memprof-debug-min" , cl::desc("Debug min inst" ), |
138 | cl::Hidden, cl::init(Val: -1)); |
139 | |
140 | static cl::opt<int> ClDebugMax("memprof-debug-max" , cl::desc("Debug max inst" ), |
141 | cl::Hidden, cl::init(Val: -1)); |
142 | |
143 | STATISTIC(NumInstrumentedReads, "Number of instrumented reads" ); |
144 | STATISTIC(NumInstrumentedWrites, "Number of instrumented writes" ); |
145 | STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads" ); |
146 | STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes" ); |
147 | STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile." ); |
148 | |
149 | namespace { |
150 | |
151 | /// This struct defines the shadow mapping using the rule: |
152 | /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. |
153 | struct ShadowMapping { |
154 | ShadowMapping() { |
155 | Scale = ClMappingScale; |
156 | Granularity = ClMappingGranularity; |
157 | Mask = ~(Granularity - 1); |
158 | } |
159 | |
160 | int Scale; |
161 | int Granularity; |
162 | uint64_t Mask; // Computed as ~(Granularity-1) |
163 | }; |
164 | |
165 | static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { |
166 | return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority |
167 | : MemProfCtorAndDtorPriority; |
168 | } |
169 | |
170 | struct InterestingMemoryAccess { |
171 | Value *Addr = nullptr; |
172 | bool IsWrite; |
173 | Type *AccessTy; |
174 | Value *MaybeMask = nullptr; |
175 | }; |
176 | |
177 | /// Instrument the code in module to profile memory accesses. |
178 | class MemProfiler { |
179 | public: |
180 | MemProfiler(Module &M) { |
181 | C = &(M.getContext()); |
182 | LongSize = M.getDataLayout().getPointerSizeInBits(); |
183 | IntptrTy = Type::getIntNTy(C&: *C, N: LongSize); |
184 | PtrTy = PointerType::getUnqual(C&: *C); |
185 | } |
186 | |
187 | /// If it is an interesting memory access, populate information |
188 | /// about the access and return a InterestingMemoryAccess struct. |
189 | /// Otherwise return std::nullopt. |
190 | std::optional<InterestingMemoryAccess> |
191 | isInterestingMemoryAccess(Instruction *I) const; |
192 | |
193 | void instrumentMop(Instruction *I, const DataLayout &DL, |
194 | InterestingMemoryAccess &Access); |
195 | void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, |
196 | Value *Addr, bool IsWrite); |
197 | void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, |
198 | Instruction *I, Value *Addr, Type *AccessTy, |
199 | bool IsWrite); |
200 | void instrumentMemIntrinsic(MemIntrinsic *MI); |
201 | Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); |
202 | bool instrumentFunction(Function &F); |
203 | bool maybeInsertMemProfInitAtFunctionEntry(Function &F); |
204 | bool insertDynamicShadowAtFunctionEntry(Function &F); |
205 | |
206 | private: |
207 | void initializeCallbacks(Module &M); |
208 | |
209 | LLVMContext *C; |
210 | int LongSize; |
211 | Type *IntptrTy; |
212 | PointerType *PtrTy; |
213 | ShadowMapping Mapping; |
214 | |
215 | // These arrays is indexed by AccessIsWrite |
216 | FunctionCallee MemProfMemoryAccessCallback[2]; |
217 | |
218 | FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; |
219 | Value *DynamicShadowOffset = nullptr; |
220 | }; |
221 | |
222 | class ModuleMemProfiler { |
223 | public: |
224 | ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } |
225 | |
226 | bool instrumentModule(Module &); |
227 | |
228 | private: |
229 | Triple TargetTriple; |
230 | ShadowMapping Mapping; |
231 | Function *MemProfCtorFunction = nullptr; |
232 | }; |
233 | |
234 | } // end anonymous namespace |
235 | |
236 | MemProfilerPass::MemProfilerPass() = default; |
237 | |
238 | PreservedAnalyses MemProfilerPass::run(Function &F, |
239 | AnalysisManager<Function> &AM) { |
240 | Module &M = *F.getParent(); |
241 | MemProfiler Profiler(M); |
242 | if (Profiler.instrumentFunction(F)) |
243 | return PreservedAnalyses::none(); |
244 | return PreservedAnalyses::all(); |
245 | } |
246 | |
247 | ModuleMemProfilerPass::ModuleMemProfilerPass() = default; |
248 | |
249 | PreservedAnalyses ModuleMemProfilerPass::run(Module &M, |
250 | AnalysisManager<Module> &AM) { |
251 | ModuleMemProfiler Profiler(M); |
252 | if (Profiler.instrumentModule(M)) |
253 | return PreservedAnalyses::none(); |
254 | return PreservedAnalyses::all(); |
255 | } |
256 | |
257 | Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { |
258 | // (Shadow & mask) >> scale |
259 | Shadow = IRB.CreateAnd(LHS: Shadow, RHS: Mapping.Mask); |
260 | Shadow = IRB.CreateLShr(LHS: Shadow, RHS: Mapping.Scale); |
261 | // (Shadow >> scale) | offset |
262 | assert(DynamicShadowOffset); |
263 | return IRB.CreateAdd(LHS: Shadow, RHS: DynamicShadowOffset); |
264 | } |
265 | |
266 | // Instrument memset/memmove/memcpy |
267 | void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { |
268 | IRBuilder<> IRB(MI); |
269 | if (isa<MemTransferInst>(Val: MI)) { |
270 | IRB.CreateCall(Callee: isa<MemMoveInst>(Val: MI) ? MemProfMemmove : MemProfMemcpy, |
271 | Args: {MI->getOperand(i_nocapture: 0), MI->getOperand(i_nocapture: 1), |
272 | IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 2), DestTy: IntptrTy, isSigned: false)}); |
273 | } else if (isa<MemSetInst>(Val: MI)) { |
274 | IRB.CreateCall( |
275 | Callee: MemProfMemset, |
276 | Args: {MI->getOperand(i_nocapture: 0), |
277 | IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 1), DestTy: IRB.getInt32Ty(), isSigned: false), |
278 | IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 2), DestTy: IntptrTy, isSigned: false)}); |
279 | } |
280 | MI->eraseFromParent(); |
281 | } |
282 | |
283 | std::optional<InterestingMemoryAccess> |
284 | MemProfiler::isInterestingMemoryAccess(Instruction *I) const { |
285 | // Do not instrument the load fetching the dynamic shadow address. |
286 | if (DynamicShadowOffset == I) |
287 | return std::nullopt; |
288 | |
289 | InterestingMemoryAccess Access; |
290 | |
291 | if (LoadInst *LI = dyn_cast<LoadInst>(Val: I)) { |
292 | if (!ClInstrumentReads) |
293 | return std::nullopt; |
294 | Access.IsWrite = false; |
295 | Access.AccessTy = LI->getType(); |
296 | Access.Addr = LI->getPointerOperand(); |
297 | } else if (StoreInst *SI = dyn_cast<StoreInst>(Val: I)) { |
298 | if (!ClInstrumentWrites) |
299 | return std::nullopt; |
300 | Access.IsWrite = true; |
301 | Access.AccessTy = SI->getValueOperand()->getType(); |
302 | Access.Addr = SI->getPointerOperand(); |
303 | } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: I)) { |
304 | if (!ClInstrumentAtomics) |
305 | return std::nullopt; |
306 | Access.IsWrite = true; |
307 | Access.AccessTy = RMW->getValOperand()->getType(); |
308 | Access.Addr = RMW->getPointerOperand(); |
309 | } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Val: I)) { |
310 | if (!ClInstrumentAtomics) |
311 | return std::nullopt; |
312 | Access.IsWrite = true; |
313 | Access.AccessTy = XCHG->getCompareOperand()->getType(); |
314 | Access.Addr = XCHG->getPointerOperand(); |
315 | } else if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
316 | auto *F = CI->getCalledFunction(); |
317 | if (F && (F->getIntrinsicID() == Intrinsic::masked_load || |
318 | F->getIntrinsicID() == Intrinsic::masked_store)) { |
319 | unsigned OpOffset = 0; |
320 | if (F->getIntrinsicID() == Intrinsic::masked_store) { |
321 | if (!ClInstrumentWrites) |
322 | return std::nullopt; |
323 | // Masked store has an initial operand for the value. |
324 | OpOffset = 1; |
325 | Access.AccessTy = CI->getArgOperand(i: 0)->getType(); |
326 | Access.IsWrite = true; |
327 | } else { |
328 | if (!ClInstrumentReads) |
329 | return std::nullopt; |
330 | Access.AccessTy = CI->getType(); |
331 | Access.IsWrite = false; |
332 | } |
333 | |
334 | auto *BasePtr = CI->getOperand(i_nocapture: 0 + OpOffset); |
335 | Access.MaybeMask = CI->getOperand(i_nocapture: 2 + OpOffset); |
336 | Access.Addr = BasePtr; |
337 | } |
338 | } |
339 | |
340 | if (!Access.Addr) |
341 | return std::nullopt; |
342 | |
343 | // Do not instrument accesses from different address spaces; we cannot deal |
344 | // with them. |
345 | Type *PtrTy = cast<PointerType>(Val: Access.Addr->getType()->getScalarType()); |
346 | if (PtrTy->getPointerAddressSpace() != 0) |
347 | return std::nullopt; |
348 | |
349 | // Ignore swifterror addresses. |
350 | // swifterror memory addresses are mem2reg promoted by instruction |
351 | // selection. As such they cannot have regular uses like an instrumentation |
352 | // function and it makes no sense to track them as memory. |
353 | if (Access.Addr->isSwiftError()) |
354 | return std::nullopt; |
355 | |
356 | // Peel off GEPs and BitCasts. |
357 | auto *Addr = Access.Addr->stripInBoundsOffsets(); |
358 | |
359 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: Addr)) { |
360 | // Do not instrument PGO counter updates. |
361 | if (GV->hasSection()) { |
362 | StringRef SectionName = GV->getSection(); |
363 | // Check if the global is in the PGO counters section. |
364 | auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); |
365 | if (SectionName.ends_with( |
366 | Suffix: getInstrProfSectionName(IPSK: IPSK_cnts, OF, /*AddSegmentInfo=*/false))) |
367 | return std::nullopt; |
368 | } |
369 | |
370 | // Do not instrument accesses to LLVM internal variables. |
371 | if (GV->getName().starts_with(Prefix: "__llvm" )) |
372 | return std::nullopt; |
373 | } |
374 | |
375 | return Access; |
376 | } |
377 | |
378 | void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, |
379 | Instruction *I, Value *Addr, |
380 | Type *AccessTy, bool IsWrite) { |
381 | auto *VTy = cast<FixedVectorType>(Val: AccessTy); |
382 | unsigned Num = VTy->getNumElements(); |
383 | auto *Zero = ConstantInt::get(Ty: IntptrTy, V: 0); |
384 | for (unsigned Idx = 0; Idx < Num; ++Idx) { |
385 | Value *InstrumentedAddress = nullptr; |
386 | Instruction *InsertBefore = I; |
387 | if (auto *Vector = dyn_cast<ConstantVector>(Val: Mask)) { |
388 | // dyn_cast as we might get UndefValue |
389 | if (auto *Masked = dyn_cast<ConstantInt>(Val: Vector->getOperand(i_nocapture: Idx))) { |
390 | if (Masked->isZero()) |
391 | // Mask is constant false, so no instrumentation needed. |
392 | continue; |
393 | // If we have a true or undef value, fall through to instrumentAddress. |
394 | // with InsertBefore == I |
395 | } |
396 | } else { |
397 | IRBuilder<> IRB(I); |
398 | Value *MaskElem = IRB.CreateExtractElement(Vec: Mask, Idx); |
399 | Instruction *ThenTerm = SplitBlockAndInsertIfThen(Cond: MaskElem, SplitBefore: I, Unreachable: false); |
400 | InsertBefore = ThenTerm; |
401 | } |
402 | |
403 | IRBuilder<> IRB(InsertBefore); |
404 | InstrumentedAddress = |
405 | IRB.CreateGEP(Ty: VTy, Ptr: Addr, IdxList: {Zero, ConstantInt::get(Ty: IntptrTy, V: Idx)}); |
406 | instrumentAddress(OrigIns: I, InsertBefore, Addr: InstrumentedAddress, IsWrite); |
407 | } |
408 | } |
409 | |
410 | void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, |
411 | InterestingMemoryAccess &Access) { |
412 | // Skip instrumentation of stack accesses unless requested. |
413 | if (!ClStack && isa<AllocaInst>(Val: getUnderlyingObject(V: Access.Addr))) { |
414 | if (Access.IsWrite) |
415 | ++NumSkippedStackWrites; |
416 | else |
417 | ++NumSkippedStackReads; |
418 | return; |
419 | } |
420 | |
421 | if (Access.IsWrite) |
422 | NumInstrumentedWrites++; |
423 | else |
424 | NumInstrumentedReads++; |
425 | |
426 | if (Access.MaybeMask) { |
427 | instrumentMaskedLoadOrStore(DL, Mask: Access.MaybeMask, I, Addr: Access.Addr, |
428 | AccessTy: Access.AccessTy, IsWrite: Access.IsWrite); |
429 | } else { |
430 | // Since the access counts will be accumulated across the entire allocation, |
431 | // we only update the shadow access count for the first location and thus |
432 | // don't need to worry about alignment and type size. |
433 | instrumentAddress(OrigIns: I, InsertBefore: I, Addr: Access.Addr, IsWrite: Access.IsWrite); |
434 | } |
435 | } |
436 | |
437 | void MemProfiler::instrumentAddress(Instruction *OrigIns, |
438 | Instruction *InsertBefore, Value *Addr, |
439 | bool IsWrite) { |
440 | IRBuilder<> IRB(InsertBefore); |
441 | Value *AddrLong = IRB.CreatePointerCast(V: Addr, DestTy: IntptrTy); |
442 | |
443 | if (ClUseCalls) { |
444 | IRB.CreateCall(Callee: MemProfMemoryAccessCallback[IsWrite], Args: AddrLong); |
445 | return; |
446 | } |
447 | |
448 | // Create an inline sequence to compute shadow location, and increment the |
449 | // value by one. |
450 | Type *ShadowTy = Type::getInt64Ty(C&: *C); |
451 | Type *ShadowPtrTy = PointerType::get(ElementType: ShadowTy, AddressSpace: 0); |
452 | Value *ShadowPtr = memToShadow(Shadow: AddrLong, IRB); |
453 | Value *ShadowAddr = IRB.CreateIntToPtr(V: ShadowPtr, DestTy: ShadowPtrTy); |
454 | Value *ShadowValue = IRB.CreateLoad(Ty: ShadowTy, Ptr: ShadowAddr); |
455 | Value *Inc = ConstantInt::get(Ty: Type::getInt64Ty(C&: *C), V: 1); |
456 | ShadowValue = IRB.CreateAdd(LHS: ShadowValue, RHS: Inc); |
457 | IRB.CreateStore(Val: ShadowValue, Ptr: ShadowAddr); |
458 | } |
459 | |
460 | // Create the variable for the profile file name. |
461 | void createProfileFileNameVar(Module &M) { |
462 | const MDString *MemProfFilename = |
463 | dyn_cast_or_null<MDString>(Val: M.getModuleFlag(Key: "MemProfProfileFilename" )); |
464 | if (!MemProfFilename) |
465 | return; |
466 | assert(!MemProfFilename->getString().empty() && |
467 | "Unexpected MemProfProfileFilename metadata with empty string" ); |
468 | Constant *ProfileNameConst = ConstantDataArray::getString( |
469 | Context&: M.getContext(), Initializer: MemProfFilename->getString(), AddNull: true); |
470 | GlobalVariable *ProfileNameVar = new GlobalVariable( |
471 | M, ProfileNameConst->getType(), /*isConstant=*/true, |
472 | GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); |
473 | Triple TT(M.getTargetTriple()); |
474 | if (TT.supportsCOMDAT()) { |
475 | ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); |
476 | ProfileNameVar->setComdat(M.getOrInsertComdat(Name: MemProfFilenameVar)); |
477 | } |
478 | } |
479 | |
480 | bool ModuleMemProfiler::instrumentModule(Module &M) { |
481 | // Create a module constructor. |
482 | std::string MemProfVersion = std::to_string(val: LLVM_MEM_PROFILER_VERSION); |
483 | std::string VersionCheckName = |
484 | ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) |
485 | : "" ; |
486 | std::tie(args&: MemProfCtorFunction, args: std::ignore) = |
487 | createSanitizerCtorAndInitFunctions(M, CtorName: MemProfModuleCtorName, |
488 | InitName: MemProfInitName, /*InitArgTypes=*/{}, |
489 | /*InitArgs=*/{}, VersionCheckName); |
490 | |
491 | const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); |
492 | appendToGlobalCtors(M, F: MemProfCtorFunction, Priority); |
493 | |
494 | createProfileFileNameVar(M); |
495 | |
496 | return true; |
497 | } |
498 | |
499 | void MemProfiler::initializeCallbacks(Module &M) { |
500 | IRBuilder<> IRB(*C); |
501 | |
502 | for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { |
503 | const std::string TypeStr = AccessIsWrite ? "store" : "load" ; |
504 | |
505 | SmallVector<Type *, 2> Args1{1, IntptrTy}; |
506 | MemProfMemoryAccessCallback[AccessIsWrite] = |
507 | M.getOrInsertFunction(Name: ClMemoryAccessCallbackPrefix + TypeStr, |
508 | T: FunctionType::get(Result: IRB.getVoidTy(), Params: Args1, isVarArg: false)); |
509 | } |
510 | MemProfMemmove = M.getOrInsertFunction( |
511 | Name: ClMemoryAccessCallbackPrefix + "memmove" , RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy); |
512 | MemProfMemcpy = M.getOrInsertFunction(Name: ClMemoryAccessCallbackPrefix + "memcpy" , |
513 | RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy); |
514 | MemProfMemset = |
515 | M.getOrInsertFunction(Name: ClMemoryAccessCallbackPrefix + "memset" , RetTy: PtrTy, |
516 | Args: PtrTy, Args: IRB.getInt32Ty(), Args: IntptrTy); |
517 | } |
518 | |
519 | bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { |
520 | // For each NSObject descendant having a +load method, this method is invoked |
521 | // by the ObjC runtime before any of the static constructors is called. |
522 | // Therefore we need to instrument such methods with a call to __memprof_init |
523 | // at the beginning in order to initialize our runtime before any access to |
524 | // the shadow memory. |
525 | // We cannot just ignore these methods, because they may call other |
526 | // instrumented functions. |
527 | if (F.getName().contains(Other: " load]" )) { |
528 | FunctionCallee MemProfInitFunction = |
529 | declareSanitizerInitFunction(M&: *F.getParent(), InitName: MemProfInitName, InitArgTypes: {}); |
530 | IRBuilder<> IRB(&F.front(), F.front().begin()); |
531 | IRB.CreateCall(Callee: MemProfInitFunction, Args: {}); |
532 | return true; |
533 | } |
534 | return false; |
535 | } |
536 | |
537 | bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { |
538 | IRBuilder<> IRB(&F.front().front()); |
539 | Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( |
540 | Name: MemProfShadowMemoryDynamicAddress, Ty: IntptrTy); |
541 | if (F.getParent()->getPICLevel() == PICLevel::NotPIC) |
542 | cast<GlobalVariable>(Val: GlobalDynamicAddress)->setDSOLocal(true); |
543 | DynamicShadowOffset = IRB.CreateLoad(Ty: IntptrTy, Ptr: GlobalDynamicAddress); |
544 | return true; |
545 | } |
546 | |
547 | bool MemProfiler::instrumentFunction(Function &F) { |
548 | if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) |
549 | return false; |
550 | if (ClDebugFunc == F.getName()) |
551 | return false; |
552 | if (F.getName().starts_with(Prefix: "__memprof_" )) |
553 | return false; |
554 | |
555 | bool FunctionModified = false; |
556 | |
557 | // If needed, insert __memprof_init. |
558 | // This function needs to be called even if the function body is not |
559 | // instrumented. |
560 | if (maybeInsertMemProfInitAtFunctionEntry(F)) |
561 | FunctionModified = true; |
562 | |
563 | LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n" ); |
564 | |
565 | initializeCallbacks(M&: *F.getParent()); |
566 | |
567 | SmallVector<Instruction *, 16> ToInstrument; |
568 | |
569 | // Fill the set of memory operations to instrument. |
570 | for (auto &BB : F) { |
571 | for (auto &Inst : BB) { |
572 | if (isInterestingMemoryAccess(I: &Inst) || isa<MemIntrinsic>(Val: Inst)) |
573 | ToInstrument.push_back(Elt: &Inst); |
574 | } |
575 | } |
576 | |
577 | if (ToInstrument.empty()) { |
578 | LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified |
579 | << " " << F << "\n" ); |
580 | |
581 | return FunctionModified; |
582 | } |
583 | |
584 | FunctionModified |= insertDynamicShadowAtFunctionEntry(F); |
585 | |
586 | int NumInstrumented = 0; |
587 | for (auto *Inst : ToInstrument) { |
588 | if (ClDebugMin < 0 || ClDebugMax < 0 || |
589 | (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { |
590 | std::optional<InterestingMemoryAccess> Access = |
591 | isInterestingMemoryAccess(I: Inst); |
592 | if (Access) |
593 | instrumentMop(I: Inst, DL: F.getParent()->getDataLayout(), Access&: *Access); |
594 | else |
595 | instrumentMemIntrinsic(MI: cast<MemIntrinsic>(Val: Inst)); |
596 | } |
597 | NumInstrumented++; |
598 | } |
599 | |
600 | if (NumInstrumented > 0) |
601 | FunctionModified = true; |
602 | |
603 | LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " |
604 | << F << "\n" ); |
605 | |
606 | return FunctionModified; |
607 | } |
608 | |
609 | static void addCallsiteMetadata(Instruction &I, |
610 | std::vector<uint64_t> &InlinedCallStack, |
611 | LLVMContext &Ctx) { |
612 | I.setMetadata(KindID: LLVMContext::MD_callsite, |
613 | Node: buildCallstackMetadata(CallStack: InlinedCallStack, Ctx)); |
614 | } |
615 | |
616 | static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, |
617 | uint32_t Column) { |
618 | llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> |
619 | HashBuilder; |
620 | HashBuilder.add(Args: Function, Args: LineOffset, Args: Column); |
621 | llvm::BLAKE3Result<8> Hash = HashBuilder.final(); |
622 | uint64_t Id; |
623 | std::memcpy(dest: &Id, src: Hash.data(), n: sizeof(Hash)); |
624 | return Id; |
625 | } |
626 | |
627 | static uint64_t computeStackId(const memprof::Frame &Frame) { |
628 | return computeStackId(Function: Frame.Function, LineOffset: Frame.LineOffset, Column: Frame.Column); |
629 | } |
630 | |
631 | static void addCallStack(CallStackTrie &AllocTrie, |
632 | const AllocationInfo *AllocInfo) { |
633 | SmallVector<uint64_t> StackIds; |
634 | for (const auto &StackFrame : AllocInfo->CallStack) |
635 | StackIds.push_back(Elt: computeStackId(Frame: StackFrame)); |
636 | auto AllocType = getAllocType(TotalLifetimeAccessDensity: AllocInfo->Info.getTotalLifetimeAccessDensity(), |
637 | AllocCount: AllocInfo->Info.getAllocCount(), |
638 | TotalLifetime: AllocInfo->Info.getTotalLifetime()); |
639 | AllocTrie.addCallStack(AllocType, StackIds); |
640 | } |
641 | |
642 | // Helper to compare the InlinedCallStack computed from an instruction's debug |
643 | // info to a list of Frames from profile data (either the allocation data or a |
644 | // callsite). For callsites, the StartIndex to use in the Frame array may be |
645 | // non-zero. |
646 | static bool |
647 | stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, |
648 | ArrayRef<uint64_t> InlinedCallStack, |
649 | unsigned StartIndex = 0) { |
650 | auto StackFrame = ProfileCallStack.begin() + StartIndex; |
651 | auto InlCallStackIter = InlinedCallStack.begin(); |
652 | for (; StackFrame != ProfileCallStack.end() && |
653 | InlCallStackIter != InlinedCallStack.end(); |
654 | ++StackFrame, ++InlCallStackIter) { |
655 | uint64_t StackId = computeStackId(Frame: *StackFrame); |
656 | if (StackId != *InlCallStackIter) |
657 | return false; |
658 | } |
659 | // Return true if we found and matched all stack ids from the call |
660 | // instruction. |
661 | return InlCallStackIter == InlinedCallStack.end(); |
662 | } |
663 | |
664 | static void readMemprof(Module &M, Function &F, |
665 | IndexedInstrProfReader *MemProfReader, |
666 | const TargetLibraryInfo &TLI) { |
667 | auto &Ctx = M.getContext(); |
668 | // Previously we used getIRPGOFuncName() here. If F is local linkage, |
669 | // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But |
670 | // llvm-profdata uses FuncName in dwarf to create GUID which doesn't |
671 | // contain FileName's prefix. It caused local linkage function can't |
672 | // find MemProfRecord. So we use getName() now. |
673 | // 'unique-internal-linkage-names' can make MemProf work better for local |
674 | // linkage function. |
675 | auto FuncName = F.getName(); |
676 | auto FuncGUID = Function::getGUID(GlobalName: FuncName); |
677 | std::optional<memprof::MemProfRecord> MemProfRec; |
678 | auto Err = MemProfReader->getMemProfRecord(FuncNameHash: FuncGUID).moveInto(Value&: MemProfRec); |
679 | if (Err) { |
680 | handleAllErrors(E: std::move(Err), Handlers: [&](const InstrProfError &IPE) { |
681 | auto Err = IPE.get(); |
682 | bool SkipWarning = false; |
683 | LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName |
684 | << ": " ); |
685 | if (Err == instrprof_error::unknown_function) { |
686 | NumOfMemProfMissing++; |
687 | SkipWarning = !PGOWarnMissing; |
688 | LLVM_DEBUG(dbgs() << "unknown function" ); |
689 | } else if (Err == instrprof_error::hash_mismatch) { |
690 | SkipWarning = |
691 | NoPGOWarnMismatch || |
692 | (NoPGOWarnMismatchComdatWeak && |
693 | (F.hasComdat() || |
694 | F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
695 | LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")" ); |
696 | } |
697 | |
698 | if (SkipWarning) |
699 | return; |
700 | |
701 | std::string Msg = (IPE.message() + Twine(" " ) + F.getName().str() + |
702 | Twine(" Hash = " ) + std::to_string(val: FuncGUID)) |
703 | .str(); |
704 | |
705 | Ctx.diagnose( |
706 | DI: DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
707 | }); |
708 | return; |
709 | } |
710 | |
711 | // Detect if there are non-zero column numbers in the profile. If not, |
712 | // treat all column numbers as 0 when matching (i.e. ignore any non-zero |
713 | // columns in the IR). The profiled binary might have been built with |
714 | // column numbers disabled, for example. |
715 | bool ProfileHasColumns = false; |
716 | |
717 | // Build maps of the location hash to all profile data with that leaf location |
718 | // (allocation info and the callsites). |
719 | std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; |
720 | // For the callsites we need to record the index of the associated frame in |
721 | // the frame array (see comments below where the map entries are added). |
722 | std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> |
723 | LocHashToCallSites; |
724 | for (auto &AI : MemProfRec->AllocSites) { |
725 | // Associate the allocation info with the leaf frame. The later matching |
726 | // code will match any inlined call sequences in the IR with a longer prefix |
727 | // of call stack frames. |
728 | uint64_t StackId = computeStackId(Frame: AI.CallStack[0]); |
729 | LocHashToAllocInfo[StackId].insert(x: &AI); |
730 | ProfileHasColumns |= AI.CallStack[0].Column; |
731 | } |
732 | for (auto &CS : MemProfRec->CallSites) { |
733 | // Need to record all frames from leaf up to and including this function, |
734 | // as any of these may or may not have been inlined at this point. |
735 | unsigned Idx = 0; |
736 | for (auto &StackFrame : CS) { |
737 | uint64_t StackId = computeStackId(Frame: StackFrame); |
738 | LocHashToCallSites[StackId].insert(x: std::make_pair(x: &CS, y: Idx++)); |
739 | ProfileHasColumns |= StackFrame.Column; |
740 | // Once we find this function, we can stop recording. |
741 | if (StackFrame.Function == FuncGUID) |
742 | break; |
743 | } |
744 | assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); |
745 | } |
746 | |
747 | auto GetOffset = [](const DILocation *DIL) { |
748 | return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & |
749 | 0xffff; |
750 | }; |
751 | |
752 | // Now walk the instructions, looking up the associated profile data using |
753 | // dbug locations. |
754 | for (auto &BB : F) { |
755 | for (auto &I : BB) { |
756 | if (I.isDebugOrPseudoInst()) |
757 | continue; |
758 | // We are only interested in calls (allocation or interior call stack |
759 | // context calls). |
760 | auto *CI = dyn_cast<CallBase>(Val: &I); |
761 | if (!CI) |
762 | continue; |
763 | auto *CalledFunction = CI->getCalledFunction(); |
764 | if (CalledFunction && CalledFunction->isIntrinsic()) |
765 | continue; |
766 | // List of call stack ids computed from the location hashes on debug |
767 | // locations (leaf to inlined at root). |
768 | std::vector<uint64_t> InlinedCallStack; |
769 | // Was the leaf location found in one of the profile maps? |
770 | bool LeafFound = false; |
771 | // If leaf was found in a map, iterators pointing to its location in both |
772 | // of the maps. It might exist in neither, one, or both (the latter case |
773 | // can happen because we don't currently have discriminators to |
774 | // distinguish the case when a single line/col maps to both an allocation |
775 | // and another callsite). |
776 | std::map<uint64_t, std::set<const AllocationInfo *>>::iterator |
777 | AllocInfoIter; |
778 | std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, |
779 | unsigned>>>::iterator CallSitesIter; |
780 | for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; |
781 | DIL = DIL->getInlinedAt()) { |
782 | // Use C++ linkage name if possible. Need to compile with |
783 | // -fdebug-info-for-profiling to get linkage name. |
784 | StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); |
785 | if (Name.empty()) |
786 | Name = DIL->getScope()->getSubprogram()->getName(); |
787 | auto CalleeGUID = Function::getGUID(GlobalName: Name); |
788 | auto StackId = computeStackId(Function: CalleeGUID, LineOffset: GetOffset(DIL), |
789 | Column: ProfileHasColumns ? DIL->getColumn() : 0); |
790 | // Check if we have found the profile's leaf frame. If yes, collect |
791 | // the rest of the call's inlined context starting here. If not, see if |
792 | // we find a match further up the inlined context (in case the profile |
793 | // was missing debug frames at the leaf). |
794 | if (!LeafFound) { |
795 | AllocInfoIter = LocHashToAllocInfo.find(x: StackId); |
796 | CallSitesIter = LocHashToCallSites.find(x: StackId); |
797 | if (AllocInfoIter != LocHashToAllocInfo.end() || |
798 | CallSitesIter != LocHashToCallSites.end()) |
799 | LeafFound = true; |
800 | } |
801 | if (LeafFound) |
802 | InlinedCallStack.push_back(x: StackId); |
803 | } |
804 | // If leaf not in either of the maps, skip inst. |
805 | if (!LeafFound) |
806 | continue; |
807 | |
808 | // First add !memprof metadata from allocation info, if we found the |
809 | // instruction's leaf location in that map, and if the rest of the |
810 | // instruction's locations match the prefix Frame locations on an |
811 | // allocation context with the same leaf. |
812 | if (AllocInfoIter != LocHashToAllocInfo.end()) { |
813 | // Only consider allocations via new, to reduce unnecessary metadata, |
814 | // since those are the only allocations that will be targeted initially. |
815 | if (!isNewLikeFn(V: CI, TLI: &TLI)) |
816 | continue; |
817 | // We may match this instruction's location list to multiple MIB |
818 | // contexts. Add them to a Trie specialized for trimming the contexts to |
819 | // the minimal needed to disambiguate contexts with unique behavior. |
820 | CallStackTrie AllocTrie; |
821 | for (auto *AllocInfo : AllocInfoIter->second) { |
822 | // Check the full inlined call stack against this one. |
823 | // If we found and thus matched all frames on the call, include |
824 | // this MIB. |
825 | if (stackFrameIncludesInlinedCallStack(ProfileCallStack: AllocInfo->CallStack, |
826 | InlinedCallStack)) |
827 | addCallStack(AllocTrie, AllocInfo); |
828 | } |
829 | // We might not have matched any to the full inlined call stack. |
830 | // But if we did, create and attach metadata, or a function attribute if |
831 | // all contexts have identical profiled behavior. |
832 | if (!AllocTrie.empty()) { |
833 | // MemprofMDAttached will be false if a function attribute was |
834 | // attached. |
835 | bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); |
836 | assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); |
837 | if (MemprofMDAttached) { |
838 | // Add callsite metadata for the instruction's location list so that |
839 | // it simpler later on to identify which part of the MIB contexts |
840 | // are from this particular instruction (including during inlining, |
841 | // when the callsite metdata will be updated appropriately). |
842 | // FIXME: can this be changed to strip out the matching stack |
843 | // context ids from the MIB contexts and not add any callsite |
844 | // metadata here to save space? |
845 | addCallsiteMetadata(I, InlinedCallStack, Ctx); |
846 | } |
847 | } |
848 | continue; |
849 | } |
850 | |
851 | // Otherwise, add callsite metadata. If we reach here then we found the |
852 | // instruction's leaf location in the callsites map and not the allocation |
853 | // map. |
854 | assert(CallSitesIter != LocHashToCallSites.end()); |
855 | for (auto CallStackIdx : CallSitesIter->second) { |
856 | // If we found and thus matched all frames on the call, create and |
857 | // attach call stack metadata. |
858 | if (stackFrameIncludesInlinedCallStack( |
859 | ProfileCallStack: *CallStackIdx.first, InlinedCallStack, StartIndex: CallStackIdx.second)) { |
860 | addCallsiteMetadata(I, InlinedCallStack, Ctx); |
861 | // Only need to find one with a matching call stack and add a single |
862 | // callsite metadata. |
863 | break; |
864 | } |
865 | } |
866 | } |
867 | } |
868 | } |
869 | |
870 | MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, |
871 | IntrusiveRefCntPtr<vfs::FileSystem> FS) |
872 | : MemoryProfileFileName(MemoryProfileFile), FS(FS) { |
873 | if (!FS) |
874 | this->FS = vfs::getRealFileSystem(); |
875 | } |
876 | |
877 | PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { |
878 | LLVM_DEBUG(dbgs() << "Read in memory profile:" ); |
879 | auto &Ctx = M.getContext(); |
880 | auto ReaderOrErr = IndexedInstrProfReader::create(Path: MemoryProfileFileName, FS&: *FS); |
881 | if (Error E = ReaderOrErr.takeError()) { |
882 | handleAllErrors(E: std::move(E), Handlers: [&](const ErrorInfoBase &EI) { |
883 | Ctx.diagnose( |
884 | DI: DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); |
885 | }); |
886 | return PreservedAnalyses::all(); |
887 | } |
888 | |
889 | std::unique_ptr<IndexedInstrProfReader> MemProfReader = |
890 | std::move(ReaderOrErr.get()); |
891 | if (!MemProfReader) { |
892 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
893 | MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader" ))); |
894 | return PreservedAnalyses::all(); |
895 | } |
896 | |
897 | if (!MemProfReader->hasMemoryProfile()) { |
898 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), |
899 | "Not a memory profile" )); |
900 | return PreservedAnalyses::all(); |
901 | } |
902 | |
903 | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
904 | |
905 | for (auto &F : M) { |
906 | if (F.isDeclaration()) |
907 | continue; |
908 | |
909 | const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
910 | readMemprof(M, F, MemProfReader: MemProfReader.get(), TLI); |
911 | } |
912 | |
913 | return PreservedAnalyses::none(); |
914 | } |
915 | |