1 | //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass builds a ModuleSummaryIndex object for the module, to be written |
10 | // to bitcode or LLVM assembly. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/DenseSet.h" |
17 | #include "llvm/ADT/MapVector.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SetVector.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
24 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
25 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
26 | #include "llvm/Analysis/LoopInfo.h" |
27 | #include "llvm/Analysis/MemoryProfileInfo.h" |
28 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
29 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
30 | #include "llvm/Analysis/TypeMetadataUtils.h" |
31 | #include "llvm/IR/Attributes.h" |
32 | #include "llvm/IR/BasicBlock.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/Dominators.h" |
36 | #include "llvm/IR/Function.h" |
37 | #include "llvm/IR/GlobalAlias.h" |
38 | #include "llvm/IR/GlobalValue.h" |
39 | #include "llvm/IR/GlobalVariable.h" |
40 | #include "llvm/IR/Instructions.h" |
41 | #include "llvm/IR/IntrinsicInst.h" |
42 | #include "llvm/IR/Metadata.h" |
43 | #include "llvm/IR/Module.h" |
44 | #include "llvm/IR/ModuleSummaryIndex.h" |
45 | #include "llvm/IR/Use.h" |
46 | #include "llvm/IR/User.h" |
47 | #include "llvm/InitializePasses.h" |
48 | #include "llvm/Object/ModuleSymbolTable.h" |
49 | #include "llvm/Object/SymbolicFile.h" |
50 | #include "llvm/Pass.h" |
51 | #include "llvm/Support/Casting.h" |
52 | #include "llvm/Support/CommandLine.h" |
53 | #include "llvm/Support/FileSystem.h" |
54 | #include <algorithm> |
55 | #include <cassert> |
56 | #include <cstdint> |
57 | #include <vector> |
58 | |
59 | using namespace llvm; |
60 | using namespace llvm::memprof; |
61 | |
62 | #define DEBUG_TYPE "module-summary-analysis" |
63 | |
64 | // Option to force edges cold which will block importing when the |
65 | // -import-cold-multiplier is set to 0. Useful for debugging. |
66 | namespace llvm { |
67 | FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold = |
68 | FunctionSummary::FSHT_None; |
69 | } // namespace llvm |
70 | |
71 | static cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC( |
72 | "force-summary-edges-cold" , cl::Hidden, cl::location(L&: ForceSummaryEdgesCold), |
73 | cl::desc("Force all edges in the function summary to cold" ), |
74 | cl::values(clEnumValN(FunctionSummary::FSHT_None, "none" , "None." ), |
75 | clEnumValN(FunctionSummary::FSHT_AllNonCritical, |
76 | "all-non-critical" , "All non-critical edges." ), |
77 | clEnumValN(FunctionSummary::FSHT_All, "all" , "All edges." ))); |
78 | |
79 | static cl::opt<std::string> ModuleSummaryDotFile( |
80 | "module-summary-dot-file" , cl::Hidden, cl::value_desc("filename" ), |
81 | cl::desc("File to emit dot graph of new summary into" )); |
82 | |
83 | extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize; |
84 | |
85 | // Walk through the operands of a given User via worklist iteration and populate |
86 | // the set of GlobalValue references encountered. Invoked either on an |
87 | // Instruction or a GlobalVariable (which walks its initializer). |
88 | // Return true if any of the operands contains blockaddress. This is important |
89 | // to know when computing summary for global var, because if global variable |
90 | // references basic block address we can't import it separately from function |
91 | // containing that basic block. For simplicity we currently don't import such |
92 | // global vars at all. When importing function we aren't interested if any |
93 | // instruction in it takes an address of any basic block, because instruction |
94 | // can only take an address of basic block located in the same function. |
95 | static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, |
96 | SetVector<ValueInfo, std::vector<ValueInfo>> &RefEdges, |
97 | SmallPtrSet<const User *, 8> &Visited) { |
98 | bool HasBlockAddress = false; |
99 | SmallVector<const User *, 32> Worklist; |
100 | if (Visited.insert(Ptr: CurUser).second) |
101 | Worklist.push_back(Elt: CurUser); |
102 | |
103 | while (!Worklist.empty()) { |
104 | const User *U = Worklist.pop_back_val(); |
105 | const auto *CB = dyn_cast<CallBase>(Val: U); |
106 | |
107 | for (const auto &OI : U->operands()) { |
108 | const User *Operand = dyn_cast<User>(Val: OI); |
109 | if (!Operand) |
110 | continue; |
111 | if (isa<BlockAddress>(Val: Operand)) { |
112 | HasBlockAddress = true; |
113 | continue; |
114 | } |
115 | if (auto *GV = dyn_cast<GlobalValue>(Val: Operand)) { |
116 | // We have a reference to a global value. This should be added to |
117 | // the reference set unless it is a callee. Callees are handled |
118 | // specially by WriteFunction and are added to a separate list. |
119 | if (!(CB && CB->isCallee(U: &OI))) |
120 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
121 | continue; |
122 | } |
123 | if (Visited.insert(Ptr: Operand).second) |
124 | Worklist.push_back(Elt: Operand); |
125 | } |
126 | } |
127 | return HasBlockAddress; |
128 | } |
129 | |
130 | static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount, |
131 | ProfileSummaryInfo *PSI) { |
132 | if (!PSI) |
133 | return CalleeInfo::HotnessType::Unknown; |
134 | if (PSI->isHotCount(C: ProfileCount)) |
135 | return CalleeInfo::HotnessType::Hot; |
136 | if (PSI->isColdCount(C: ProfileCount)) |
137 | return CalleeInfo::HotnessType::Cold; |
138 | return CalleeInfo::HotnessType::None; |
139 | } |
140 | |
141 | static bool isNonRenamableLocal(const GlobalValue &GV) { |
142 | return GV.hasSection() && GV.hasLocalLinkage(); |
143 | } |
144 | |
145 | /// Determine whether this call has all constant integer arguments (excluding |
146 | /// "this") and summarize it to VCalls or ConstVCalls as appropriate. |
147 | static void addVCallToSet( |
148 | DevirtCallSite Call, GlobalValue::GUID Guid, |
149 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
150 | &VCalls, |
151 | SetVector<FunctionSummary::ConstVCall, |
152 | std::vector<FunctionSummary::ConstVCall>> &ConstVCalls) { |
153 | std::vector<uint64_t> Args; |
154 | // Start from the second argument to skip the "this" pointer. |
155 | for (auto &Arg : drop_begin(RangeOrContainer: Call.CB.args())) { |
156 | auto *CI = dyn_cast<ConstantInt>(Val&: Arg); |
157 | if (!CI || CI->getBitWidth() > 64) { |
158 | VCalls.insert(X: {.GUID: Guid, .Offset: Call.Offset}); |
159 | return; |
160 | } |
161 | Args.push_back(x: CI->getZExtValue()); |
162 | } |
163 | ConstVCalls.insert(X: {.VFunc: {.GUID: Guid, .Offset: Call.Offset}, .Args: std::move(Args)}); |
164 | } |
165 | |
166 | /// If this intrinsic call requires that we add information to the function |
167 | /// summary, do so via the non-constant reference arguments. |
168 | static void addIntrinsicToSummary( |
169 | const CallInst *CI, |
170 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> &TypeTests, |
171 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
172 | &TypeTestAssumeVCalls, |
173 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
174 | &TypeCheckedLoadVCalls, |
175 | SetVector<FunctionSummary::ConstVCall, |
176 | std::vector<FunctionSummary::ConstVCall>> |
177 | &TypeTestAssumeConstVCalls, |
178 | SetVector<FunctionSummary::ConstVCall, |
179 | std::vector<FunctionSummary::ConstVCall>> |
180 | &TypeCheckedLoadConstVCalls, |
181 | DominatorTree &DT) { |
182 | switch (CI->getCalledFunction()->getIntrinsicID()) { |
183 | case Intrinsic::type_test: |
184 | case Intrinsic::public_type_test: { |
185 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
186 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
187 | if (!TypeId) |
188 | break; |
189 | GlobalValue::GUID Guid = GlobalValue::getGUID(GlobalName: TypeId->getString()); |
190 | |
191 | // Produce a summary from type.test intrinsics. We only summarize type.test |
192 | // intrinsics that are used other than by an llvm.assume intrinsic. |
193 | // Intrinsics that are assumed are relevant only to the devirtualization |
194 | // pass, not the type test lowering pass. |
195 | bool HasNonAssumeUses = llvm::any_of(Range: CI->uses(), P: [](const Use &CIU) { |
196 | return !isa<AssumeInst>(Val: CIU.getUser()); |
197 | }); |
198 | if (HasNonAssumeUses) |
199 | TypeTests.insert(X: Guid); |
200 | |
201 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
202 | SmallVector<CallInst *, 4> Assumes; |
203 | findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); |
204 | for (auto &Call : DevirtCalls) |
205 | addVCallToSet(Call, Guid, VCalls&: TypeTestAssumeVCalls, |
206 | ConstVCalls&: TypeTestAssumeConstVCalls); |
207 | |
208 | break; |
209 | } |
210 | |
211 | case Intrinsic::type_checked_load_relative: |
212 | case Intrinsic::type_checked_load: { |
213 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2)); |
214 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
215 | if (!TypeId) |
216 | break; |
217 | GlobalValue::GUID Guid = GlobalValue::getGUID(GlobalName: TypeId->getString()); |
218 | |
219 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
220 | SmallVector<Instruction *, 4> LoadedPtrs; |
221 | SmallVector<Instruction *, 4> Preds; |
222 | bool HasNonCallUses = false; |
223 | findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds, |
224 | HasNonCallUses, CI, DT); |
225 | // Any non-call uses of the result of llvm.type.checked.load will |
226 | // prevent us from optimizing away the llvm.type.test. |
227 | if (HasNonCallUses) |
228 | TypeTests.insert(X: Guid); |
229 | for (auto &Call : DevirtCalls) |
230 | addVCallToSet(Call, Guid, VCalls&: TypeCheckedLoadVCalls, |
231 | ConstVCalls&: TypeCheckedLoadConstVCalls); |
232 | |
233 | break; |
234 | } |
235 | default: |
236 | break; |
237 | } |
238 | } |
239 | |
240 | static bool isNonVolatileLoad(const Instruction *I) { |
241 | if (const auto *LI = dyn_cast<LoadInst>(Val: I)) |
242 | return !LI->isVolatile(); |
243 | |
244 | return false; |
245 | } |
246 | |
247 | static bool isNonVolatileStore(const Instruction *I) { |
248 | if (const auto *SI = dyn_cast<StoreInst>(Val: I)) |
249 | return !SI->isVolatile(); |
250 | |
251 | return false; |
252 | } |
253 | |
254 | // Returns true if the function definition must be unreachable. |
255 | // |
256 | // Note if this helper function returns true, `F` is guaranteed |
257 | // to be unreachable; if it returns false, `F` might still |
258 | // be unreachable but not covered by this helper function. |
259 | static bool mustBeUnreachableFunction(const Function &F) { |
260 | // A function must be unreachable if its entry block ends with an |
261 | // 'unreachable'. |
262 | assert(!F.isDeclaration()); |
263 | return isa<UnreachableInst>(Val: F.getEntryBlock().getTerminator()); |
264 | } |
265 | |
266 | static void computeFunctionSummary( |
267 | ModuleSummaryIndex &Index, const Module &M, const Function &F, |
268 | BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, |
269 | bool HasLocalsInUsedOrAsm, DenseSet<GlobalValue::GUID> &CantBePromoted, |
270 | bool IsThinLTO, |
271 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
272 | // Summary not currently supported for anonymous functions, they should |
273 | // have been named. |
274 | assert(F.hasName()); |
275 | |
276 | unsigned NumInsts = 0; |
277 | // Map from callee ValueId to profile count. Used to accumulate profile |
278 | // counts for all static calls to a given callee. |
279 | MapVector<ValueInfo, CalleeInfo, DenseMap<ValueInfo, unsigned>, |
280 | std::vector<std::pair<ValueInfo, CalleeInfo>>> |
281 | CallGraphEdges; |
282 | SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges, LoadRefEdges, |
283 | StoreRefEdges; |
284 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> TypeTests; |
285 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
286 | TypeTestAssumeVCalls, TypeCheckedLoadVCalls; |
287 | SetVector<FunctionSummary::ConstVCall, |
288 | std::vector<FunctionSummary::ConstVCall>> |
289 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls; |
290 | ICallPromotionAnalysis ICallAnalysis; |
291 | SmallPtrSet<const User *, 8> Visited; |
292 | |
293 | // Add personality function, prefix data and prologue data to function's ref |
294 | // list. |
295 | findRefEdges(Index, CurUser: &F, RefEdges, Visited); |
296 | std::vector<const Instruction *> NonVolatileLoads; |
297 | std::vector<const Instruction *> NonVolatileStores; |
298 | |
299 | std::vector<CallsiteInfo> Callsites; |
300 | std::vector<AllocInfo> Allocs; |
301 | |
302 | #ifndef NDEBUG |
303 | DenseSet<const CallBase *> CallsThatMayHaveMemprofSummary; |
304 | #endif |
305 | |
306 | bool HasInlineAsmMaybeReferencingInternal = false; |
307 | bool HasIndirBranchToBlockAddress = false; |
308 | bool HasIFuncCall = false; |
309 | bool HasUnknownCall = false; |
310 | bool MayThrow = false; |
311 | for (const BasicBlock &BB : F) { |
312 | // We don't allow inlining of function with indirect branch to blockaddress. |
313 | // If the blockaddress escapes the function, e.g., via a global variable, |
314 | // inlining may lead to an invalid cross-function reference. So we shouldn't |
315 | // import such function either. |
316 | if (BB.hasAddressTaken()) { |
317 | for (User *U : BlockAddress::get(BB: const_cast<BasicBlock *>(&BB))->users()) |
318 | if (!isa<CallBrInst>(Val: *U)) { |
319 | HasIndirBranchToBlockAddress = true; |
320 | break; |
321 | } |
322 | } |
323 | |
324 | for (const Instruction &I : BB) { |
325 | if (I.isDebugOrPseudoInst()) |
326 | continue; |
327 | ++NumInsts; |
328 | |
329 | // Regular LTO module doesn't participate in ThinLTO import, |
330 | // so no reference from it can be read/writeonly, since this |
331 | // would require importing variable as local copy |
332 | if (IsThinLTO) { |
333 | if (isNonVolatileLoad(I: &I)) { |
334 | // Postpone processing of non-volatile load instructions |
335 | // See comments below |
336 | Visited.insert(Ptr: &I); |
337 | NonVolatileLoads.push_back(x: &I); |
338 | continue; |
339 | } else if (isNonVolatileStore(I: &I)) { |
340 | Visited.insert(Ptr: &I); |
341 | NonVolatileStores.push_back(x: &I); |
342 | // All references from second operand of store (destination address) |
343 | // can be considered write-only if they're not referenced by any |
344 | // non-store instruction. References from first operand of store |
345 | // (stored value) can't be treated either as read- or as write-only |
346 | // so we add them to RefEdges as we do with all other instructions |
347 | // except non-volatile load. |
348 | Value *Stored = I.getOperand(i: 0); |
349 | if (auto *GV = dyn_cast<GlobalValue>(Val: Stored)) |
350 | // findRefEdges will try to examine GV operands, so instead |
351 | // of calling it we should add GV to RefEdges directly. |
352 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
353 | else if (auto *U = dyn_cast<User>(Val: Stored)) |
354 | findRefEdges(Index, CurUser: U, RefEdges, Visited); |
355 | continue; |
356 | } |
357 | } |
358 | findRefEdges(Index, CurUser: &I, RefEdges, Visited); |
359 | const auto *CB = dyn_cast<CallBase>(Val: &I); |
360 | if (!CB) { |
361 | if (I.mayThrow()) |
362 | MayThrow = true; |
363 | continue; |
364 | } |
365 | |
366 | const auto *CI = dyn_cast<CallInst>(Val: &I); |
367 | // Since we don't know exactly which local values are referenced in inline |
368 | // assembly, conservatively mark the function as possibly referencing |
369 | // a local value from inline assembly to ensure we don't export a |
370 | // reference (which would require renaming and promotion of the |
371 | // referenced value). |
372 | if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) |
373 | HasInlineAsmMaybeReferencingInternal = true; |
374 | |
375 | auto *CalledValue = CB->getCalledOperand(); |
376 | auto *CalledFunction = CB->getCalledFunction(); |
377 | if (CalledValue && !CalledFunction) { |
378 | CalledValue = CalledValue->stripPointerCasts(); |
379 | // Stripping pointer casts can reveal a called function. |
380 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
381 | } |
382 | // Check if this is an alias to a function. If so, get the |
383 | // called aliasee for the checks below. |
384 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
385 | assert(!CalledFunction && "Expected null called function in callsite for alias" ); |
386 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
387 | } |
388 | // Check if this is a direct call to a known function or a known |
389 | // intrinsic, or an indirect call with profile data. |
390 | if (CalledFunction) { |
391 | if (CI && CalledFunction->isIntrinsic()) { |
392 | addIntrinsicToSummary( |
393 | CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls, |
394 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT); |
395 | continue; |
396 | } |
397 | // We should have named any anonymous globals |
398 | assert(CalledFunction->hasName()); |
399 | auto ScaledCount = PSI->getProfileCount(CallInst: *CB, BFI); |
400 | auto Hotness = ScaledCount ? getHotness(ProfileCount: *ScaledCount, PSI) |
401 | : CalleeInfo::HotnessType::Unknown; |
402 | if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) |
403 | Hotness = CalleeInfo::HotnessType::Cold; |
404 | |
405 | // Use the original CalledValue, in case it was an alias. We want |
406 | // to record the call edge to the alias in that case. Eventually |
407 | // an alias summary will be created to associate the alias and |
408 | // aliasee. |
409 | auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo( |
410 | GV: cast<GlobalValue>(Val: CalledValue))]; |
411 | ValueInfo.updateHotness(OtherHotness: Hotness); |
412 | if (CB->isTailCall()) |
413 | ValueInfo.setHasTailCall(true); |
414 | // Add the relative block frequency to CalleeInfo if there is no profile |
415 | // information. |
416 | if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { |
417 | uint64_t BBFreq = BFI->getBlockFreq(BB: &BB).getFrequency(); |
418 | uint64_t EntryFreq = BFI->getEntryFreq().getFrequency(); |
419 | ValueInfo.updateRelBlockFreq(BlockFreq: BBFreq, EntryFreq); |
420 | } |
421 | } else { |
422 | HasUnknownCall = true; |
423 | // If F is imported, a local linkage ifunc (e.g. target_clones on a |
424 | // static function) called by F will be cloned. Since summaries don't |
425 | // track ifunc, we do not know implementation functions referenced by |
426 | // the ifunc resolver need to be promoted in the exporter, and we will |
427 | // get linker errors due to cloned declarations for implementation |
428 | // functions. As a simple fix, just mark F as not eligible for import. |
429 | // Non-local ifunc is not cloned and does not have the issue. |
430 | if (auto *GI = dyn_cast_if_present<GlobalIFunc>(Val: CalledValue)) |
431 | if (GI->hasLocalLinkage()) |
432 | HasIFuncCall = true; |
433 | // Skip inline assembly calls. |
434 | if (CI && CI->isInlineAsm()) |
435 | continue; |
436 | // Skip direct calls. |
437 | if (!CalledValue || isa<Constant>(Val: CalledValue)) |
438 | continue; |
439 | |
440 | // Check if the instruction has a callees metadata. If so, add callees |
441 | // to CallGraphEdges to reflect the references from the metadata, and |
442 | // to enable importing for subsequent indirect call promotion and |
443 | // inlining. |
444 | if (auto *MD = I.getMetadata(KindID: LLVMContext::MD_callees)) { |
445 | for (const auto &Op : MD->operands()) { |
446 | Function *Callee = mdconst::extract_or_null<Function>(MD: Op); |
447 | if (Callee) |
448 | CallGraphEdges[Index.getOrInsertValueInfo(GV: Callee)]; |
449 | } |
450 | } |
451 | |
452 | uint32_t NumVals, NumCandidates; |
453 | uint64_t TotalCount; |
454 | auto CandidateProfileData = |
455 | ICallAnalysis.getPromotionCandidatesForInstruction( |
456 | I: &I, NumVals, TotalCount, NumCandidates); |
457 | for (const auto &Candidate : CandidateProfileData) |
458 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: Candidate.Value)] |
459 | .updateHotness(OtherHotness: getHotness(ProfileCount: Candidate.Count, PSI)); |
460 | } |
461 | |
462 | // Summarize memprof related metadata. This is only needed for ThinLTO. |
463 | if (!IsThinLTO) |
464 | continue; |
465 | |
466 | // TODO: Skip indirect calls for now. Need to handle these better, likely |
467 | // by creating multiple Callsites, one per target, then speculatively |
468 | // devirtualize while applying clone info in the ThinLTO backends. This |
469 | // will also be important because we will have a different set of clone |
470 | // versions per target. This handling needs to match that in the ThinLTO |
471 | // backend so we handle things consistently for matching of callsite |
472 | // summaries to instructions. |
473 | if (!CalledFunction) |
474 | continue; |
475 | |
476 | // Ensure we keep this analysis in sync with the handling in the ThinLTO |
477 | // backend (see MemProfContextDisambiguation::applyImport). Save this call |
478 | // so that we can skip it in checking the reverse case later. |
479 | assert(mayHaveMemprofSummary(CB)); |
480 | #ifndef NDEBUG |
481 | CallsThatMayHaveMemprofSummary.insert(V: CB); |
482 | #endif |
483 | |
484 | // Compute the list of stack ids first (so we can trim them from the stack |
485 | // ids on any MIBs). |
486 | CallStack<MDNode, MDNode::op_iterator> InstCallsite( |
487 | I.getMetadata(KindID: LLVMContext::MD_callsite)); |
488 | auto *MemProfMD = I.getMetadata(KindID: LLVMContext::MD_memprof); |
489 | if (MemProfMD) { |
490 | std::vector<MIBInfo> MIBs; |
491 | for (auto &MDOp : MemProfMD->operands()) { |
492 | auto *MIBMD = cast<const MDNode>(Val: MDOp); |
493 | MDNode *StackNode = getMIBStackNode(MIB: MIBMD); |
494 | assert(StackNode); |
495 | SmallVector<unsigned> StackIdIndices; |
496 | CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); |
497 | // Collapse out any on the allocation call (inlining). |
498 | for (auto ContextIter = |
499 | StackContext.beginAfterSharedPrefix(Other&: InstCallsite); |
500 | ContextIter != StackContext.end(); ++ContextIter) { |
501 | unsigned StackIdIdx = Index.addOrGetStackIdIndex(StackId: *ContextIter); |
502 | // If this is a direct recursion, simply skip the duplicate |
503 | // entries. If this is mutual recursion, handling is left to |
504 | // the LTO link analysis client. |
505 | if (StackIdIndices.empty() || StackIdIndices.back() != StackIdIdx) |
506 | StackIdIndices.push_back(Elt: StackIdIdx); |
507 | } |
508 | MIBs.push_back( |
509 | x: MIBInfo(getMIBAllocType(MIB: MIBMD), std::move(StackIdIndices))); |
510 | } |
511 | Allocs.push_back(x: AllocInfo(std::move(MIBs))); |
512 | } else if (!InstCallsite.empty()) { |
513 | SmallVector<unsigned> StackIdIndices; |
514 | for (auto StackId : InstCallsite) |
515 | StackIdIndices.push_back(Elt: Index.addOrGetStackIdIndex(StackId)); |
516 | // Use the original CalledValue, in case it was an alias. We want |
517 | // to record the call edge to the alias in that case. Eventually |
518 | // an alias summary will be created to associate the alias and |
519 | // aliasee. |
520 | auto CalleeValueInfo = |
521 | Index.getOrInsertValueInfo(GV: cast<GlobalValue>(Val: CalledValue)); |
522 | Callsites.push_back(x: {CalleeValueInfo, StackIdIndices}); |
523 | } |
524 | } |
525 | } |
526 | |
527 | if (PSI->hasPartialSampleProfile() && ScalePartialSampleProfileWorkingSetSize) |
528 | Index.addBlockCount(C: F.size()); |
529 | |
530 | std::vector<ValueInfo> Refs; |
531 | if (IsThinLTO) { |
532 | auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs, |
533 | SetVector<ValueInfo, std::vector<ValueInfo>> &Edges, |
534 | SmallPtrSet<const User *, 8> &Cache) { |
535 | for (const auto *I : Instrs) { |
536 | Cache.erase(Ptr: I); |
537 | findRefEdges(Index, CurUser: I, RefEdges&: Edges, Visited&: Cache); |
538 | } |
539 | }; |
540 | |
541 | // By now we processed all instructions in a function, except |
542 | // non-volatile loads and non-volatile value stores. Let's find |
543 | // ref edges for both of instruction sets |
544 | AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited); |
545 | // We can add some values to the Visited set when processing load |
546 | // instructions which are also used by stores in NonVolatileStores. |
547 | // For example this can happen if we have following code: |
548 | // |
549 | // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) |
550 | // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) |
551 | // |
552 | // After processing loads we'll add bitcast to the Visited set, and if |
553 | // we use the same set while processing stores, we'll never see store |
554 | // to @bar and @bar will be mistakenly treated as readonly. |
555 | SmallPtrSet<const llvm::User *, 8> StoreCache; |
556 | AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache); |
557 | |
558 | // If both load and store instruction reference the same variable |
559 | // we won't be able to optimize it. Add all such reference edges |
560 | // to RefEdges set. |
561 | for (const auto &VI : StoreRefEdges) |
562 | if (LoadRefEdges.remove(X: VI)) |
563 | RefEdges.insert(X: VI); |
564 | |
565 | unsigned RefCnt = RefEdges.size(); |
566 | // All new reference edges inserted in two loops below are either |
567 | // read or write only. They will be grouped in the end of RefEdges |
568 | // vector, so we can use a single integer value to identify them. |
569 | for (const auto &VI : LoadRefEdges) |
570 | RefEdges.insert(X: VI); |
571 | |
572 | unsigned FirstWORef = RefEdges.size(); |
573 | for (const auto &VI : StoreRefEdges) |
574 | RefEdges.insert(X: VI); |
575 | |
576 | Refs = RefEdges.takeVector(); |
577 | for (; RefCnt < FirstWORef; ++RefCnt) |
578 | Refs[RefCnt].setReadOnly(); |
579 | |
580 | for (; RefCnt < Refs.size(); ++RefCnt) |
581 | Refs[RefCnt].setWriteOnly(); |
582 | } else { |
583 | Refs = RefEdges.takeVector(); |
584 | } |
585 | // Explicit add hot edges to enforce importing for designated GUIDs for |
586 | // sample PGO, to enable the same inlines as the profiled optimized binary. |
587 | for (auto &I : F.getImportGUIDs()) |
588 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: I)].updateHotness( |
589 | OtherHotness: ForceSummaryEdgesCold == FunctionSummary::FSHT_All |
590 | ? CalleeInfo::HotnessType::Cold |
591 | : CalleeInfo::HotnessType::Critical); |
592 | |
593 | #ifndef NDEBUG |
594 | // Make sure that all calls we decided could not have memprof summaries get a |
595 | // false value for mayHaveMemprofSummary, to ensure that this handling remains |
596 | // in sync with the ThinLTO backend handling. |
597 | if (IsThinLTO) { |
598 | for (const BasicBlock &BB : F) { |
599 | for (const Instruction &I : BB) { |
600 | const auto *CB = dyn_cast<CallBase>(Val: &I); |
601 | if (!CB) |
602 | continue; |
603 | // We already checked these above. |
604 | if (CallsThatMayHaveMemprofSummary.count(V: CB)) |
605 | continue; |
606 | assert(!mayHaveMemprofSummary(CB)); |
607 | } |
608 | } |
609 | } |
610 | #endif |
611 | |
612 | bool NonRenamableLocal = isNonRenamableLocal(GV: F); |
613 | bool NotEligibleForImport = NonRenamableLocal || |
614 | HasInlineAsmMaybeReferencingInternal || |
615 | HasIndirBranchToBlockAddress || HasIFuncCall; |
616 | GlobalValueSummary::GVFlags Flags( |
617 | F.getLinkage(), F.getVisibility(), NotEligibleForImport, |
618 | /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable()); |
619 | FunctionSummary::FFlags FunFlags{ |
620 | F.doesNotAccessMemory(), F.onlyReadsMemory() && !F.doesNotAccessMemory(), |
621 | F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(), |
622 | // FIXME: refactor this to use the same code that inliner is using. |
623 | // Don't try to import functions with noinline attribute. |
624 | F.getAttributes().hasFnAttr(Attribute::NoInline), |
625 | F.hasFnAttribute(Attribute::AlwaysInline), |
626 | F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall, |
627 | mustBeUnreachableFunction(F)}; |
628 | std::vector<FunctionSummary::ParamAccess> ParamAccesses; |
629 | if (auto *SSI = GetSSICallback(F)) |
630 | ParamAccesses = SSI->getParamAccesses(Index); |
631 | auto FuncSummary = std::make_unique<FunctionSummary>( |
632 | args&: Flags, args&: NumInsts, args&: FunFlags, /*EntryCount=*/args: 0, args: std::move(Refs), |
633 | args: CallGraphEdges.takeVector(), args: TypeTests.takeVector(), |
634 | args: TypeTestAssumeVCalls.takeVector(), args: TypeCheckedLoadVCalls.takeVector(), |
635 | args: TypeTestAssumeConstVCalls.takeVector(), |
636 | args: TypeCheckedLoadConstVCalls.takeVector(), args: std::move(ParamAccesses), |
637 | args: std::move(Callsites), args: std::move(Allocs)); |
638 | if (NonRenamableLocal) |
639 | CantBePromoted.insert(V: F.getGUID()); |
640 | Index.addGlobalValueSummary(GV: F, Summary: std::move(FuncSummary)); |
641 | } |
642 | |
643 | /// Find function pointers referenced within the given vtable initializer |
644 | /// (or subset of an initializer) \p I. The starting offset of \p I within |
645 | /// the vtable initializer is \p StartingOffset. Any discovered function |
646 | /// pointers are added to \p VTableFuncs along with their cumulative offset |
647 | /// within the initializer. |
648 | static void findFuncPointers(const Constant *I, uint64_t StartingOffset, |
649 | const Module &M, ModuleSummaryIndex &Index, |
650 | VTableFuncList &VTableFuncs) { |
651 | // First check if this is a function pointer. |
652 | if (I->getType()->isPointerTy()) { |
653 | auto C = I->stripPointerCasts(); |
654 | auto A = dyn_cast<GlobalAlias>(Val: C); |
655 | if (isa<Function>(Val: C) || (A && isa<Function>(Val: A->getAliasee()))) { |
656 | auto GV = dyn_cast<GlobalValue>(Val: C); |
657 | assert(GV); |
658 | // We can disregard __cxa_pure_virtual as a possible call target, as |
659 | // calls to pure virtuals are UB. |
660 | if (GV && GV->getName() != "__cxa_pure_virtual" ) |
661 | VTableFuncs.push_back(x: {Index.getOrInsertValueInfo(GV), StartingOffset}); |
662 | return; |
663 | } |
664 | } |
665 | |
666 | // Walk through the elements in the constant struct or array and recursively |
667 | // look for virtual function pointers. |
668 | const DataLayout &DL = M.getDataLayout(); |
669 | if (auto *C = dyn_cast<ConstantStruct>(Val: I)) { |
670 | StructType *STy = dyn_cast<StructType>(Val: C->getType()); |
671 | assert(STy); |
672 | const StructLayout *SL = DL.getStructLayout(Ty: C->getType()); |
673 | |
674 | for (auto EI : llvm::enumerate(First: STy->elements())) { |
675 | auto Offset = SL->getElementOffset(Idx: EI.index()); |
676 | unsigned Op = SL->getElementContainingOffset(FixedOffset: Offset); |
677 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i: Op)), |
678 | StartingOffset: StartingOffset + Offset, M, Index, VTableFuncs); |
679 | } |
680 | } else if (auto *C = dyn_cast<ConstantArray>(Val: I)) { |
681 | ArrayType *ATy = C->getType(); |
682 | Type *EltTy = ATy->getElementType(); |
683 | uint64_t EltSize = DL.getTypeAllocSize(Ty: EltTy); |
684 | for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { |
685 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i)), |
686 | StartingOffset: StartingOffset + i * EltSize, M, Index, VTableFuncs); |
687 | } |
688 | } |
689 | } |
690 | |
691 | // Identify the function pointers referenced by vtable definition \p V. |
692 | static void computeVTableFuncs(ModuleSummaryIndex &Index, |
693 | const GlobalVariable &V, const Module &M, |
694 | VTableFuncList &VTableFuncs) { |
695 | if (!V.isConstant()) |
696 | return; |
697 | |
698 | findFuncPointers(I: V.getInitializer(), /*StartingOffset=*/0, M, Index, |
699 | VTableFuncs); |
700 | |
701 | #ifndef NDEBUG |
702 | // Validate that the VTableFuncs list is ordered by offset. |
703 | uint64_t PrevOffset = 0; |
704 | for (auto &P : VTableFuncs) { |
705 | // The findVFuncPointers traversal should have encountered the |
706 | // functions in offset order. We need to use ">=" since PrevOffset |
707 | // starts at 0. |
708 | assert(P.VTableOffset >= PrevOffset); |
709 | PrevOffset = P.VTableOffset; |
710 | } |
711 | #endif |
712 | } |
713 | |
714 | /// Record vtable definition \p V for each type metadata it references. |
715 | static void |
716 | recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index, |
717 | const GlobalVariable &V, |
718 | SmallVectorImpl<MDNode *> &Types) { |
719 | for (MDNode *Type : Types) { |
720 | auto TypeID = Type->getOperand(I: 1).get(); |
721 | |
722 | uint64_t Offset = |
723 | cast<ConstantInt>( |
724 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
725 | ->getZExtValue(); |
726 | |
727 | if (auto *TypeId = dyn_cast<MDString>(Val: TypeID)) |
728 | Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId: TypeId->getString()) |
729 | .push_back(x: {Offset, Index.getOrInsertValueInfo(GV: &V)}); |
730 | } |
731 | } |
732 | |
733 | static void computeVariableSummary(ModuleSummaryIndex &Index, |
734 | const GlobalVariable &V, |
735 | DenseSet<GlobalValue::GUID> &CantBePromoted, |
736 | const Module &M, |
737 | SmallVectorImpl<MDNode *> &Types) { |
738 | SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges; |
739 | SmallPtrSet<const User *, 8> Visited; |
740 | bool HasBlockAddress = findRefEdges(Index, CurUser: &V, RefEdges, Visited); |
741 | bool NonRenamableLocal = isNonRenamableLocal(GV: V); |
742 | GlobalValueSummary::GVFlags Flags( |
743 | V.getLinkage(), V.getVisibility(), NonRenamableLocal, |
744 | /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable()); |
745 | |
746 | VTableFuncList VTableFuncs; |
747 | // If splitting is not enabled, then we compute the summary information |
748 | // necessary for index-based whole program devirtualization. |
749 | if (!Index.enableSplitLTOUnit()) { |
750 | Types.clear(); |
751 | V.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
752 | if (!Types.empty()) { |
753 | // Identify the function pointers referenced by this vtable definition. |
754 | computeVTableFuncs(Index, V, M, VTableFuncs); |
755 | |
756 | // Record this vtable definition for each type metadata it references. |
757 | recordTypeIdCompatibleVtableReferences(Index, V, Types); |
758 | } |
759 | } |
760 | |
761 | // Don't mark variables we won't be able to internalize as read/write-only. |
762 | bool CanBeInternalized = |
763 | !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && |
764 | !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); |
765 | bool Constant = V.isConstant(); |
766 | GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, |
767 | Constant ? false : CanBeInternalized, |
768 | Constant, V.getVCallVisibility()); |
769 | auto GVarSummary = std::make_unique<GlobalVarSummary>(args&: Flags, args&: VarFlags, |
770 | args: RefEdges.takeVector()); |
771 | if (NonRenamableLocal) |
772 | CantBePromoted.insert(V: V.getGUID()); |
773 | if (HasBlockAddress) |
774 | GVarSummary->setNotEligibleToImport(); |
775 | if (!VTableFuncs.empty()) |
776 | GVarSummary->setVTableFuncs(VTableFuncs); |
777 | Index.addGlobalValueSummary(GV: V, Summary: std::move(GVarSummary)); |
778 | } |
779 | |
780 | static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, |
781 | DenseSet<GlobalValue::GUID> &CantBePromoted) { |
782 | // Skip summary for indirect function aliases as summary for aliasee will not |
783 | // be emitted. |
784 | const GlobalObject *Aliasee = A.getAliaseeObject(); |
785 | if (isa<GlobalIFunc>(Val: Aliasee)) |
786 | return; |
787 | bool NonRenamableLocal = isNonRenamableLocal(GV: A); |
788 | GlobalValueSummary::GVFlags Flags( |
789 | A.getLinkage(), A.getVisibility(), NonRenamableLocal, |
790 | /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable()); |
791 | auto AS = std::make_unique<AliasSummary>(args&: Flags); |
792 | auto AliaseeVI = Index.getValueInfo(GUID: Aliasee->getGUID()); |
793 | assert(AliaseeVI && "Alias expects aliasee summary to be available" ); |
794 | assert(AliaseeVI.getSummaryList().size() == 1 && |
795 | "Expected a single entry per aliasee in per-module index" ); |
796 | AS->setAliasee(AliaseeVI, Aliasee: AliaseeVI.getSummaryList()[0].get()); |
797 | if (NonRenamableLocal) |
798 | CantBePromoted.insert(V: A.getGUID()); |
799 | Index.addGlobalValueSummary(GV: A, Summary: std::move(AS)); |
800 | } |
801 | |
802 | // Set LiveRoot flag on entries matching the given value name. |
803 | static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { |
804 | if (ValueInfo VI = Index.getValueInfo(GUID: GlobalValue::getGUID(GlobalName: Name))) |
805 | for (const auto &Summary : VI.getSummaryList()) |
806 | Summary->setLive(true); |
807 | } |
808 | |
809 | ModuleSummaryIndex llvm::buildModuleSummaryIndex( |
810 | const Module &M, |
811 | std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, |
812 | ProfileSummaryInfo *PSI, |
813 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
814 | assert(PSI); |
815 | bool EnableSplitLTOUnit = false; |
816 | bool UnifiedLTO = false; |
817 | if (auto *MD = mdconst::extract_or_null<ConstantInt>( |
818 | MD: M.getModuleFlag(Key: "EnableSplitLTOUnit" ))) |
819 | EnableSplitLTOUnit = MD->getZExtValue(); |
820 | if (auto *MD = |
821 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "UnifiedLTO" ))) |
822 | UnifiedLTO = MD->getZExtValue(); |
823 | ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO); |
824 | |
825 | // Identify the local values in the llvm.used and llvm.compiler.used sets, |
826 | // which should not be exported as they would then require renaming and |
827 | // promotion, but we may have opaque uses e.g. in inline asm. We collect them |
828 | // here because we use this information to mark functions containing inline |
829 | // assembly calls as not importable. |
830 | SmallPtrSet<GlobalValue *, 4> LocalsUsed; |
831 | SmallVector<GlobalValue *, 4> Used; |
832 | // First collect those in the llvm.used set. |
833 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/false); |
834 | // Next collect those in the llvm.compiler.used set. |
835 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/true); |
836 | DenseSet<GlobalValue::GUID> CantBePromoted; |
837 | for (auto *V : Used) { |
838 | if (V->hasLocalLinkage()) { |
839 | LocalsUsed.insert(Ptr: V); |
840 | CantBePromoted.insert(V: V->getGUID()); |
841 | } |
842 | } |
843 | |
844 | bool HasLocalInlineAsmSymbol = false; |
845 | if (!M.getModuleInlineAsm().empty()) { |
846 | // Collect the local values defined by module level asm, and set up |
847 | // summaries for these symbols so that they can be marked as NoRename, |
848 | // to prevent export of any use of them in regular IR that would require |
849 | // renaming within the module level asm. Note we don't need to create a |
850 | // summary for weak or global defs, as they don't need to be flagged as |
851 | // NoRename, and defs in module level asm can't be imported anyway. |
852 | // Also, any values used but not defined within module level asm should |
853 | // be listed on the llvm.used or llvm.compiler.used global and marked as |
854 | // referenced from there. |
855 | ModuleSymbolTable::CollectAsmSymbols( |
856 | M, AsmSymbol: [&](StringRef Name, object::BasicSymbolRef::Flags Flags) { |
857 | // Symbols not marked as Weak or Global are local definitions. |
858 | if (Flags & (object::BasicSymbolRef::SF_Weak | |
859 | object::BasicSymbolRef::SF_Global)) |
860 | return; |
861 | HasLocalInlineAsmSymbol = true; |
862 | GlobalValue *GV = M.getNamedValue(Name); |
863 | if (!GV) |
864 | return; |
865 | assert(GV->isDeclaration() && "Def in module asm already has definition" ); |
866 | GlobalValueSummary::GVFlags GVFlags( |
867 | GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility, |
868 | /* NotEligibleToImport = */ true, |
869 | /* Live = */ true, |
870 | /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable()); |
871 | CantBePromoted.insert(V: GV->getGUID()); |
872 | // Create the appropriate summary type. |
873 | if (Function *F = dyn_cast<Function>(Val: GV)) { |
874 | std::unique_ptr<FunctionSummary> Summary = |
875 | std::make_unique<FunctionSummary>( |
876 | GVFlags, /*InstCount=*/0, |
877 | FunctionSummary::FFlags{ |
878 | F->hasFnAttribute(Attribute::ReadNone), |
879 | F->hasFnAttribute(Attribute::ReadOnly), |
880 | F->hasFnAttribute(Attribute::NoRecurse), |
881 | F->returnDoesNotAlias(), |
882 | /* NoInline = */ false, |
883 | F->hasFnAttribute(Attribute::AlwaysInline), |
884 | F->hasFnAttribute(Attribute::NoUnwind), |
885 | /* MayThrow */ true, |
886 | /* HasUnknownCall */ true, |
887 | /* MustBeUnreachable */ false}, |
888 | /*EntryCount=*/0, ArrayRef<ValueInfo>{}, |
889 | ArrayRef<FunctionSummary::EdgeTy>{}, |
890 | ArrayRef<GlobalValue::GUID>{}, |
891 | ArrayRef<FunctionSummary::VFuncId>{}, |
892 | ArrayRef<FunctionSummary::VFuncId>{}, |
893 | ArrayRef<FunctionSummary::ConstVCall>{}, |
894 | ArrayRef<FunctionSummary::ConstVCall>{}, |
895 | ArrayRef<FunctionSummary::ParamAccess>{}, |
896 | ArrayRef<CallsiteInfo>{}, ArrayRef<AllocInfo>{}); |
897 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
898 | } else { |
899 | std::unique_ptr<GlobalVarSummary> Summary = |
900 | std::make_unique<GlobalVarSummary>( |
901 | args&: GVFlags, |
902 | args: GlobalVarSummary::GVarFlags( |
903 | false, false, cast<GlobalVariable>(Val: GV)->isConstant(), |
904 | GlobalObject::VCallVisibilityPublic), |
905 | args: ArrayRef<ValueInfo>{}); |
906 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
907 | } |
908 | }); |
909 | } |
910 | |
911 | bool IsThinLTO = true; |
912 | if (auto *MD = |
913 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "ThinLTO" ))) |
914 | IsThinLTO = MD->getZExtValue(); |
915 | |
916 | // Compute summaries for all functions defined in module, and save in the |
917 | // index. |
918 | for (const auto &F : M) { |
919 | if (F.isDeclaration()) |
920 | continue; |
921 | |
922 | DominatorTree DT(const_cast<Function &>(F)); |
923 | BlockFrequencyInfo *BFI = nullptr; |
924 | std::unique_ptr<BlockFrequencyInfo> BFIPtr; |
925 | if (GetBFICallback) |
926 | BFI = GetBFICallback(F); |
927 | else if (F.hasProfileData()) { |
928 | LoopInfo LI{DT}; |
929 | BranchProbabilityInfo BPI{F, LI}; |
930 | BFIPtr = std::make_unique<BlockFrequencyInfo>(args: F, args&: BPI, args&: LI); |
931 | BFI = BFIPtr.get(); |
932 | } |
933 | |
934 | computeFunctionSummary(Index, M, F, BFI, PSI, DT, |
935 | HasLocalsInUsedOrAsm: !LocalsUsed.empty() || HasLocalInlineAsmSymbol, |
936 | CantBePromoted, IsThinLTO, GetSSICallback); |
937 | } |
938 | |
939 | // Compute summaries for all variables defined in module, and save in the |
940 | // index. |
941 | SmallVector<MDNode *, 2> Types; |
942 | for (const GlobalVariable &G : M.globals()) { |
943 | if (G.isDeclaration()) |
944 | continue; |
945 | computeVariableSummary(Index, V: G, CantBePromoted, M, Types); |
946 | } |
947 | |
948 | // Compute summaries for all aliases defined in module, and save in the |
949 | // index. |
950 | for (const GlobalAlias &A : M.aliases()) |
951 | computeAliasSummary(Index, A, CantBePromoted); |
952 | |
953 | // Iterate through ifuncs, set their resolvers all alive. |
954 | for (const GlobalIFunc &I : M.ifuncs()) { |
955 | I.applyAlongResolverPath(Op: [&Index](const GlobalValue &GV) { |
956 | Index.getGlobalValueSummary(GV)->setLive(true); |
957 | }); |
958 | } |
959 | |
960 | for (auto *V : LocalsUsed) { |
961 | auto *Summary = Index.getGlobalValueSummary(GV: *V); |
962 | assert(Summary && "Missing summary for global value" ); |
963 | Summary->setNotEligibleToImport(); |
964 | } |
965 | |
966 | // The linker doesn't know about these LLVM produced values, so we need |
967 | // to flag them as live in the index to ensure index-based dead value |
968 | // analysis treats them as live roots of the analysis. |
969 | setLiveRoot(Index, Name: "llvm.used" ); |
970 | setLiveRoot(Index, Name: "llvm.compiler.used" ); |
971 | setLiveRoot(Index, Name: "llvm.global_ctors" ); |
972 | setLiveRoot(Index, Name: "llvm.global_dtors" ); |
973 | setLiveRoot(Index, Name: "llvm.global.annotations" ); |
974 | |
975 | for (auto &GlobalList : Index) { |
976 | // Ignore entries for references that are undefined in the current module. |
977 | if (GlobalList.second.SummaryList.empty()) |
978 | continue; |
979 | |
980 | assert(GlobalList.second.SummaryList.size() == 1 && |
981 | "Expected module's index to have one summary per GUID" ); |
982 | auto &Summary = GlobalList.second.SummaryList[0]; |
983 | if (!IsThinLTO) { |
984 | Summary->setNotEligibleToImport(); |
985 | continue; |
986 | } |
987 | |
988 | bool AllRefsCanBeExternallyReferenced = |
989 | llvm::all_of(Range: Summary->refs(), P: [&](const ValueInfo &VI) { |
990 | return !CantBePromoted.count(V: VI.getGUID()); |
991 | }); |
992 | if (!AllRefsCanBeExternallyReferenced) { |
993 | Summary->setNotEligibleToImport(); |
994 | continue; |
995 | } |
996 | |
997 | if (auto *FuncSummary = dyn_cast<FunctionSummary>(Val: Summary.get())) { |
998 | bool AllCallsCanBeExternallyReferenced = llvm::all_of( |
999 | Range: FuncSummary->calls(), P: [&](const FunctionSummary::EdgeTy &Edge) { |
1000 | return !CantBePromoted.count(V: Edge.first.getGUID()); |
1001 | }); |
1002 | if (!AllCallsCanBeExternallyReferenced) |
1003 | Summary->setNotEligibleToImport(); |
1004 | } |
1005 | } |
1006 | |
1007 | if (!ModuleSummaryDotFile.empty()) { |
1008 | std::error_code EC; |
1009 | raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None); |
1010 | if (EC) |
1011 | report_fatal_error(reason: Twine("Failed to open dot file " ) + |
1012 | ModuleSummaryDotFile + ": " + EC.message() + "\n" ); |
1013 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols: {}); |
1014 | } |
1015 | |
1016 | return Index; |
1017 | } |
1018 | |
1019 | AnalysisKey ModuleSummaryIndexAnalysis::Key; |
1020 | |
1021 | ModuleSummaryIndex |
1022 | ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { |
1023 | ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(IR&: M); |
1024 | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1025 | bool NeedSSI = needsParamAccessSummary(M); |
1026 | return buildModuleSummaryIndex( |
1027 | M, |
1028 | GetBFICallback: [&FAM](const Function &F) { |
1029 | return &FAM.getResult<BlockFrequencyAnalysis>( |
1030 | IR&: *const_cast<Function *>(&F)); |
1031 | }, |
1032 | PSI: &PSI, |
1033 | GetSSICallback: [&FAM, NeedSSI](const Function &F) -> const StackSafetyInfo * { |
1034 | return NeedSSI ? &FAM.getResult<StackSafetyAnalysis>( |
1035 | IR&: const_cast<Function &>(F)) |
1036 | : nullptr; |
1037 | }); |
1038 | } |
1039 | |
1040 | char ModuleSummaryIndexWrapperPass::ID = 0; |
1041 | |
1042 | INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1043 | "Module Summary Analysis" , false, true) |
1044 | INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) |
1045 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
1046 | INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) |
1047 | INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1048 | "Module Summary Analysis" , false, true) |
1049 | |
1050 | ModulePass *llvm::createModuleSummaryIndexWrapperPass() { |
1051 | return new ModuleSummaryIndexWrapperPass(); |
1052 | } |
1053 | |
1054 | ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() |
1055 | : ModulePass(ID) { |
1056 | initializeModuleSummaryIndexWrapperPassPass(Registry&: *PassRegistry::getPassRegistry()); |
1057 | } |
1058 | |
1059 | bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { |
1060 | auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
1061 | bool NeedSSI = needsParamAccessSummary(M); |
1062 | Index.emplace(args: buildModuleSummaryIndex( |
1063 | M, |
1064 | GetBFICallback: [this](const Function &F) { |
1065 | return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>( |
1066 | F&: *const_cast<Function *>(&F)) |
1067 | .getBFI()); |
1068 | }, |
1069 | PSI, |
1070 | GetSSICallback: [&](const Function &F) -> const StackSafetyInfo * { |
1071 | return NeedSSI ? &getAnalysis<StackSafetyInfoWrapperPass>( |
1072 | F&: const_cast<Function &>(F)) |
1073 | .getResult() |
1074 | : nullptr; |
1075 | })); |
1076 | return false; |
1077 | } |
1078 | |
1079 | bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) { |
1080 | Index.reset(); |
1081 | return false; |
1082 | } |
1083 | |
1084 | void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1085 | AU.setPreservesAll(); |
1086 | AU.addRequired<BlockFrequencyInfoWrapperPass>(); |
1087 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
1088 | AU.addRequired<StackSafetyInfoWrapperPass>(); |
1089 | } |
1090 | |
1091 | char ImmutableModuleSummaryIndexWrapperPass::ID = 0; |
1092 | |
1093 | ImmutableModuleSummaryIndexWrapperPass::ImmutableModuleSummaryIndexWrapperPass( |
1094 | const ModuleSummaryIndex *Index) |
1095 | : ImmutablePass(ID), Index(Index) { |
1096 | initializeImmutableModuleSummaryIndexWrapperPassPass( |
1097 | *PassRegistry::getPassRegistry()); |
1098 | } |
1099 | |
1100 | void ImmutableModuleSummaryIndexWrapperPass::getAnalysisUsage( |
1101 | AnalysisUsage &AU) const { |
1102 | AU.setPreservesAll(); |
1103 | } |
1104 | |
1105 | ImmutablePass *llvm::createImmutableModuleSummaryIndexWrapperPass( |
1106 | const ModuleSummaryIndex *Index) { |
1107 | return new ImmutableModuleSummaryIndexWrapperPass(Index); |
1108 | } |
1109 | |
1110 | INITIALIZE_PASS(ImmutableModuleSummaryIndexWrapperPass, "module-summary-info" , |
1111 | "Module summary info" , false, true) |
1112 | |
1113 | bool llvm::mayHaveMemprofSummary(const CallBase *CB) { |
1114 | if (!CB) |
1115 | return false; |
1116 | if (CB->isDebugOrPseudoInst()) |
1117 | return false; |
1118 | auto *CI = dyn_cast<CallInst>(Val: CB); |
1119 | auto *CalledValue = CB->getCalledOperand(); |
1120 | auto *CalledFunction = CB->getCalledFunction(); |
1121 | if (CalledValue && !CalledFunction) { |
1122 | CalledValue = CalledValue->stripPointerCasts(); |
1123 | // Stripping pointer casts can reveal a called function. |
1124 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
1125 | } |
1126 | // Check if this is an alias to a function. If so, get the |
1127 | // called aliasee for the checks below. |
1128 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
1129 | assert(!CalledFunction && |
1130 | "Expected null called function in callsite for alias" ); |
1131 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
1132 | } |
1133 | // Check if this is a direct call to a known function or a known |
1134 | // intrinsic, or an indirect call with profile data. |
1135 | if (CalledFunction) { |
1136 | if (CI && CalledFunction->isIntrinsic()) |
1137 | return false; |
1138 | } else { |
1139 | // TODO: For now skip indirect calls. See comments in |
1140 | // computeFunctionSummary for what is needed to handle this. |
1141 | return false; |
1142 | } |
1143 | return true; |
1144 | } |
1145 | |