1 | //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the "backend" phase of LTO, i.e. it performs |
10 | // optimization and code generation on a loaded module. It is generally used |
11 | // internally by the LTO class but can also be used independently, for example |
12 | // to implement a standalone ThinLTO backend. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/LTO/LTOBackend.h" |
17 | #include "llvm/Analysis/AliasAnalysis.h" |
18 | #include "llvm/Analysis/CGSCCPassManager.h" |
19 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
20 | #include "llvm/Analysis/TargetLibraryInfo.h" |
21 | #include "llvm/Bitcode/BitcodeReader.h" |
22 | #include "llvm/Bitcode/BitcodeWriter.h" |
23 | #include "llvm/IR/LLVMRemarkStreamer.h" |
24 | #include "llvm/IR/LegacyPassManager.h" |
25 | #include "llvm/IR/PassManager.h" |
26 | #include "llvm/IR/Verifier.h" |
27 | #include "llvm/LTO/LTO.h" |
28 | #include "llvm/MC/TargetRegistry.h" |
29 | #include "llvm/Object/ModuleSymbolTable.h" |
30 | #include "llvm/Passes/PassBuilder.h" |
31 | #include "llvm/Passes/PassPlugin.h" |
32 | #include "llvm/Passes/StandardInstrumentations.h" |
33 | #include "llvm/Support/Error.h" |
34 | #include "llvm/Support/FileSystem.h" |
35 | #include "llvm/Support/MemoryBuffer.h" |
36 | #include "llvm/Support/Path.h" |
37 | #include "llvm/Support/Program.h" |
38 | #include "llvm/Support/ThreadPool.h" |
39 | #include "llvm/Support/ToolOutputFile.h" |
40 | #include "llvm/Support/VirtualFileSystem.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include "llvm/Target/TargetMachine.h" |
43 | #include "llvm/TargetParser/SubtargetFeature.h" |
44 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
45 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
46 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
47 | #include "llvm/Transforms/Utils/SplitModule.h" |
48 | #include <optional> |
49 | |
50 | using namespace llvm; |
51 | using namespace lto; |
52 | |
53 | #define DEBUG_TYPE "lto-backend" |
54 | |
55 | enum class LTOBitcodeEmbedding { |
56 | DoNotEmbed = 0, |
57 | EmbedOptimized = 1, |
58 | EmbedPostMergePreOptimized = 2 |
59 | }; |
60 | |
61 | static cl::opt<LTOBitcodeEmbedding> EmbedBitcode( |
62 | "lto-embed-bitcode" , cl::init(Val: LTOBitcodeEmbedding::DoNotEmbed), |
63 | cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none" , |
64 | "Do not embed" ), |
65 | clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized" , |
66 | "Embed after all optimization passes" ), |
67 | clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized, |
68 | "post-merge-pre-opt" , |
69 | "Embed post merge, but before optimizations" )), |
70 | cl::desc("Embed LLVM bitcode in object files produced by LTO" )); |
71 | |
72 | static cl::opt<bool> ThinLTOAssumeMerged( |
73 | "thinlto-assume-merged" , cl::init(Val: false), |
74 | cl::desc("Assume the input has already undergone ThinLTO function " |
75 | "importing and the other pre-optimization pipeline changes." )); |
76 | |
77 | namespace llvm { |
78 | extern cl::opt<bool> NoPGOWarnMismatch; |
79 | } |
80 | |
81 | [[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) { |
82 | errs() << "failed to open " << Path << ": " << Msg << '\n'; |
83 | errs().flush(); |
84 | exit(status: 1); |
85 | } |
86 | |
87 | Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, |
88 | const DenseSet<StringRef> &SaveTempsArgs) { |
89 | ShouldDiscardValueNames = false; |
90 | |
91 | std::error_code EC; |
92 | if (SaveTempsArgs.empty() || SaveTempsArgs.contains(V: "resolution" )) { |
93 | ResolutionFile = |
94 | std::make_unique<raw_fd_ostream>(args: OutputFileName + "resolution.txt" , args&: EC, |
95 | args: sys::fs::OpenFlags::OF_TextWithCRLF); |
96 | if (EC) { |
97 | ResolutionFile.reset(); |
98 | return errorCodeToError(EC); |
99 | } |
100 | } |
101 | |
102 | auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { |
103 | // Keep track of the hook provided by the linker, which also needs to run. |
104 | ModuleHookFn LinkerHook = Hook; |
105 | Hook = [=](unsigned Task, const Module &M) { |
106 | // If the linker's hook returned false, we need to pass that result |
107 | // through. |
108 | if (LinkerHook && !LinkerHook(Task, M)) |
109 | return false; |
110 | |
111 | std::string PathPrefix; |
112 | // If this is the combined module (not a ThinLTO backend compile) or the |
113 | // user hasn't requested using the input module's path, emit to a file |
114 | // named from the provided OutputFileName with the Task ID appended. |
115 | if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { |
116 | PathPrefix = OutputFileName; |
117 | if (Task != (unsigned)-1) |
118 | PathPrefix += utostr(X: Task) + "." ; |
119 | } else |
120 | PathPrefix = M.getModuleIdentifier() + "." ; |
121 | std::string Path = PathPrefix + PathSuffix + ".bc" ; |
122 | std::error_code EC; |
123 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
124 | // Because -save-temps is a debugging feature, we report the error |
125 | // directly and exit. |
126 | if (EC) |
127 | reportOpenError(Path, Msg: EC.message()); |
128 | WriteBitcodeToFile(M, Out&: OS, /*ShouldPreserveUseListOrder=*/false); |
129 | return true; |
130 | }; |
131 | }; |
132 | |
133 | auto SaveCombinedIndex = |
134 | [=](const ModuleSummaryIndex &Index, |
135 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { |
136 | std::string Path = OutputFileName + "index.bc" ; |
137 | std::error_code EC; |
138 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
139 | // Because -save-temps is a debugging feature, we report the error |
140 | // directly and exit. |
141 | if (EC) |
142 | reportOpenError(Path, Msg: EC.message()); |
143 | writeIndexToFile(Index, Out&: OS); |
144 | |
145 | Path = OutputFileName + "index.dot" ; |
146 | raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None); |
147 | if (EC) |
148 | reportOpenError(Path, Msg: EC.message()); |
149 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols); |
150 | return true; |
151 | }; |
152 | |
153 | if (SaveTempsArgs.empty()) { |
154 | setHook("0.preopt" , PreOptModuleHook); |
155 | setHook("1.promote" , PostPromoteModuleHook); |
156 | setHook("2.internalize" , PostInternalizeModuleHook); |
157 | setHook("3.import" , PostImportModuleHook); |
158 | setHook("4.opt" , PostOptModuleHook); |
159 | setHook("5.precodegen" , PreCodeGenModuleHook); |
160 | CombinedIndexHook = SaveCombinedIndex; |
161 | } else { |
162 | if (SaveTempsArgs.contains(V: "preopt" )) |
163 | setHook("0.preopt" , PreOptModuleHook); |
164 | if (SaveTempsArgs.contains(V: "promote" )) |
165 | setHook("1.promote" , PostPromoteModuleHook); |
166 | if (SaveTempsArgs.contains(V: "internalize" )) |
167 | setHook("2.internalize" , PostInternalizeModuleHook); |
168 | if (SaveTempsArgs.contains(V: "import" )) |
169 | setHook("3.import" , PostImportModuleHook); |
170 | if (SaveTempsArgs.contains(V: "opt" )) |
171 | setHook("4.opt" , PostOptModuleHook); |
172 | if (SaveTempsArgs.contains(V: "precodegen" )) |
173 | setHook("5.precodegen" , PreCodeGenModuleHook); |
174 | if (SaveTempsArgs.contains(V: "combinedindex" )) |
175 | CombinedIndexHook = SaveCombinedIndex; |
176 | } |
177 | |
178 | return Error::success(); |
179 | } |
180 | |
181 | #define HANDLE_EXTENSION(Ext) \ |
182 | llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); |
183 | #include "llvm/Support/Extension.def" |
184 | |
185 | static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins, |
186 | PassBuilder &PB) { |
187 | #define HANDLE_EXTENSION(Ext) \ |
188 | get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); |
189 | #include "llvm/Support/Extension.def" |
190 | |
191 | // Load requested pass plugins and let them register pass builder callbacks |
192 | for (auto &PluginFN : PassPlugins) { |
193 | auto PassPlugin = PassPlugin::Load(Filename: PluginFN); |
194 | if (!PassPlugin) { |
195 | errs() << "Failed to load passes from '" << PluginFN |
196 | << "'. Request ignored.\n" ; |
197 | continue; |
198 | } |
199 | |
200 | PassPlugin->registerPassBuilderCallbacks(PB); |
201 | } |
202 | } |
203 | |
204 | static std::unique_ptr<TargetMachine> |
205 | createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { |
206 | StringRef TheTriple = M.getTargetTriple(); |
207 | SubtargetFeatures Features; |
208 | Features.getDefaultSubtargetFeatures(Triple: Triple(TheTriple)); |
209 | for (const std::string &A : Conf.MAttrs) |
210 | Features.AddFeature(String: A); |
211 | |
212 | std::optional<Reloc::Model> RelocModel; |
213 | if (Conf.RelocModel) |
214 | RelocModel = *Conf.RelocModel; |
215 | else if (M.getModuleFlag(Key: "PIC Level" )) |
216 | RelocModel = |
217 | M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; |
218 | |
219 | std::optional<CodeModel::Model> CodeModel; |
220 | if (Conf.CodeModel) |
221 | CodeModel = *Conf.CodeModel; |
222 | else |
223 | CodeModel = M.getCodeModel(); |
224 | |
225 | std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( |
226 | TT: TheTriple, CPU: Conf.CPU, Features: Features.getString(), Options: Conf.Options, RM: RelocModel, |
227 | CM: CodeModel, OL: Conf.CGOptLevel)); |
228 | |
229 | assert(TM && "Failed to create target machine" ); |
230 | |
231 | if (std::optional<uint64_t> LargeDataThreshold = M.getLargeDataThreshold()) |
232 | TM->setLargeDataThreshold(*LargeDataThreshold); |
233 | |
234 | return TM; |
235 | } |
236 | |
237 | static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, |
238 | unsigned OptLevel, bool IsThinLTO, |
239 | ModuleSummaryIndex *ExportSummary, |
240 | const ModuleSummaryIndex *ImportSummary) { |
241 | auto FS = vfs::getRealFileSystem(); |
242 | std::optional<PGOOptions> PGOOpt; |
243 | if (!Conf.SampleProfile.empty()) |
244 | PGOOpt = PGOOptions(Conf.SampleProfile, "" , Conf.ProfileRemapping, |
245 | /*MemoryProfile=*/"" , FS, PGOOptions::SampleUse, |
246 | PGOOptions::NoCSAction, |
247 | PGOOptions::ColdFuncOpt::Default, true); |
248 | else if (Conf.RunCSIRInstr) { |
249 | PGOOpt = PGOOptions("" , Conf.CSIRProfile, Conf.ProfileRemapping, |
250 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
251 | PGOOptions::CSIRInstr, PGOOptions::ColdFuncOpt::Default, |
252 | Conf.AddFSDiscriminator); |
253 | } else if (!Conf.CSIRProfile.empty()) { |
254 | PGOOpt = PGOOptions(Conf.CSIRProfile, "" , Conf.ProfileRemapping, |
255 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
256 | PGOOptions::CSIRUse, PGOOptions::ColdFuncOpt::Default, |
257 | Conf.AddFSDiscriminator); |
258 | NoPGOWarnMismatch = !Conf.PGOWarnMismatch; |
259 | } else if (Conf.AddFSDiscriminator) { |
260 | PGOOpt = PGOOptions("" , "" , "" , /*MemoryProfile=*/"" , nullptr, |
261 | PGOOptions::NoAction, PGOOptions::NoCSAction, |
262 | PGOOptions::ColdFuncOpt::Default, true); |
263 | } |
264 | TM->setPGOOption(PGOOpt); |
265 | |
266 | LoopAnalysisManager LAM; |
267 | FunctionAnalysisManager FAM; |
268 | CGSCCAnalysisManager CGAM; |
269 | ModuleAnalysisManager MAM; |
270 | |
271 | PassInstrumentationCallbacks PIC; |
272 | StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager, |
273 | Conf.VerifyEach); |
274 | SI.registerCallbacks(PIC, MAM: &MAM); |
275 | PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); |
276 | |
277 | RegisterPassPlugins(PassPlugins: Conf.PassPlugins, PB); |
278 | |
279 | std::unique_ptr<TargetLibraryInfoImpl> TLII( |
280 | new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()))); |
281 | if (Conf.Freestanding) |
282 | TLII->disableAllFunctions(); |
283 | FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); }); |
284 | |
285 | // Parse a custom AA pipeline if asked to. |
286 | if (!Conf.AAPipeline.empty()) { |
287 | AAManager AA; |
288 | if (auto Err = PB.parseAAPipeline(AA, PipelineText: Conf.AAPipeline)) { |
289 | report_fatal_error(reason: Twine("unable to parse AA pipeline description '" ) + |
290 | Conf.AAPipeline + "': " + toString(E: std::move(Err))); |
291 | } |
292 | // Register the AA manager first so that our version is the one used. |
293 | FAM.registerPass(PassBuilder: [&] { return std::move(AA); }); |
294 | } |
295 | |
296 | // Register all the basic analyses with the managers. |
297 | PB.registerModuleAnalyses(MAM); |
298 | PB.registerCGSCCAnalyses(CGAM); |
299 | PB.registerFunctionAnalyses(FAM); |
300 | PB.registerLoopAnalyses(LAM); |
301 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
302 | |
303 | ModulePassManager MPM; |
304 | |
305 | if (!Conf.DisableVerify) |
306 | MPM.addPass(Pass: VerifierPass()); |
307 | |
308 | OptimizationLevel OL; |
309 | |
310 | switch (OptLevel) { |
311 | default: |
312 | llvm_unreachable("Invalid optimization level" ); |
313 | case 0: |
314 | OL = OptimizationLevel::O0; |
315 | break; |
316 | case 1: |
317 | OL = OptimizationLevel::O1; |
318 | break; |
319 | case 2: |
320 | OL = OptimizationLevel::O2; |
321 | break; |
322 | case 3: |
323 | OL = OptimizationLevel::O3; |
324 | break; |
325 | } |
326 | |
327 | // Parse a custom pipeline if asked to. |
328 | if (!Conf.OptPipeline.empty()) { |
329 | if (auto Err = PB.parsePassPipeline(MPM, PipelineText: Conf.OptPipeline)) { |
330 | report_fatal_error(reason: Twine("unable to parse pass pipeline description '" ) + |
331 | Conf.OptPipeline + "': " + toString(E: std::move(Err))); |
332 | } |
333 | } else if (Conf.UseDefaultPipeline) { |
334 | MPM.addPass(Pass: PB.buildPerModuleDefaultPipeline(Level: OL)); |
335 | } else if (IsThinLTO) { |
336 | MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary)); |
337 | } else { |
338 | MPM.addPass(Pass: PB.buildLTODefaultPipeline(Level: OL, ExportSummary)); |
339 | } |
340 | |
341 | if (!Conf.DisableVerify) |
342 | MPM.addPass(Pass: VerifierPass()); |
343 | |
344 | MPM.run(IR&: Mod, AM&: MAM); |
345 | } |
346 | |
347 | bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, |
348 | bool IsThinLTO, ModuleSummaryIndex *ExportSummary, |
349 | const ModuleSummaryIndex *ImportSummary, |
350 | const std::vector<uint8_t> &CmdArgs) { |
351 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) { |
352 | // FIXME: the motivation for capturing post-merge bitcode and command line |
353 | // is replicating the compilation environment from bitcode, without needing |
354 | // to understand the dependencies (the functions to be imported). This |
355 | // assumes a clang - based invocation, case in which we have the command |
356 | // line. |
357 | // It's not very clear how the above motivation would map in the |
358 | // linker-based case, so we currently don't plumb the command line args in |
359 | // that case. |
360 | if (CmdArgs.empty()) |
361 | LLVM_DEBUG( |
362 | dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " |
363 | "command line arguments are not available" ); |
364 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
365 | /*EmbedBitcode*/ true, /*EmbedCmdline*/ true, |
366 | /*Cmdline*/ CmdArgs); |
367 | } |
368 | // FIXME: Plumb the combined index into the new pass manager. |
369 | runNewPMPasses(Conf, Mod, TM, OptLevel: Conf.OptLevel, IsThinLTO, ExportSummary, |
370 | ImportSummary); |
371 | return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); |
372 | } |
373 | |
374 | static void codegen(const Config &Conf, TargetMachine *TM, |
375 | AddStreamFn AddStream, unsigned Task, Module &Mod, |
376 | const ModuleSummaryIndex &CombinedIndex) { |
377 | if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) |
378 | return; |
379 | |
380 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) |
381 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
382 | /*EmbedBitcode*/ true, |
383 | /*EmbedCmdline*/ false, |
384 | /*CmdArgs*/ std::vector<uint8_t>()); |
385 | |
386 | std::unique_ptr<ToolOutputFile> DwoOut; |
387 | SmallString<1024> DwoFile(Conf.SplitDwarfOutput); |
388 | if (!Conf.DwoDir.empty()) { |
389 | std::error_code EC; |
390 | if (auto EC = llvm::sys::fs::create_directories(path: Conf.DwoDir)) |
391 | report_fatal_error(reason: Twine("Failed to create directory " ) + Conf.DwoDir + |
392 | ": " + EC.message()); |
393 | |
394 | DwoFile = Conf.DwoDir; |
395 | sys::path::append(path&: DwoFile, a: std::to_string(val: Task) + ".dwo" ); |
396 | TM->Options.MCOptions.SplitDwarfFile = std::string(DwoFile); |
397 | } else |
398 | TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile; |
399 | |
400 | if (!DwoFile.empty()) { |
401 | std::error_code EC; |
402 | DwoOut = std::make_unique<ToolOutputFile>(args&: DwoFile, args&: EC, args: sys::fs::OF_None); |
403 | if (EC) |
404 | report_fatal_error(reason: Twine("Failed to open " ) + DwoFile + ": " + |
405 | EC.message()); |
406 | } |
407 | |
408 | Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = |
409 | AddStream(Task, Mod.getModuleIdentifier()); |
410 | if (Error Err = StreamOrErr.takeError()) |
411 | report_fatal_error(Err: std::move(Err)); |
412 | std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; |
413 | TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; |
414 | |
415 | legacy::PassManager CodeGenPasses; |
416 | TargetLibraryInfoImpl TLII(Triple(Mod.getTargetTriple())); |
417 | CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII)); |
418 | CodeGenPasses.add( |
419 | P: createImmutableModuleSummaryIndexWrapperPass(Index: &CombinedIndex)); |
420 | if (Conf.PreCodeGenPassesHook) |
421 | Conf.PreCodeGenPassesHook(CodeGenPasses); |
422 | if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, |
423 | DwoOut ? &DwoOut->os() : nullptr, |
424 | Conf.CGFileType)) |
425 | report_fatal_error(reason: "Failed to setup codegen" ); |
426 | CodeGenPasses.run(M&: Mod); |
427 | |
428 | if (DwoOut) |
429 | DwoOut->keep(); |
430 | } |
431 | |
432 | static void splitCodeGen(const Config &C, TargetMachine *TM, |
433 | AddStreamFn AddStream, |
434 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
435 | const ModuleSummaryIndex &CombinedIndex) { |
436 | ThreadPool CodegenThreadPool( |
437 | heavyweight_hardware_concurrency(ThreadCount: ParallelCodeGenParallelismLevel)); |
438 | unsigned ThreadCount = 0; |
439 | const Target *T = &TM->getTarget(); |
440 | |
441 | SplitModule( |
442 | M&: Mod, N: ParallelCodeGenParallelismLevel, |
443 | ModuleCallback: [&](std::unique_ptr<Module> MPart) { |
444 | // We want to clone the module in a new context to multi-thread the |
445 | // codegen. We do it by serializing partition modules to bitcode |
446 | // (while still on the main thread, in order to avoid data races) and |
447 | // spinning up new threads which deserialize the partitions into |
448 | // separate contexts. |
449 | // FIXME: Provide a more direct way to do this in LLVM. |
450 | SmallString<0> BC; |
451 | raw_svector_ostream BCOS(BC); |
452 | WriteBitcodeToFile(M: *MPart, Out&: BCOS); |
453 | |
454 | // Enqueue the task |
455 | CodegenThreadPool.async( |
456 | F: [&](const SmallString<0> &BC, unsigned ThreadId) { |
457 | LTOLLVMContext Ctx(C); |
458 | Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( |
459 | Buffer: MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o" ), |
460 | Context&: Ctx); |
461 | if (!MOrErr) |
462 | report_fatal_error(reason: "Failed to read bitcode" ); |
463 | std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); |
464 | |
465 | std::unique_ptr<TargetMachine> TM = |
466 | createTargetMachine(Conf: C, TheTarget: T, M&: *MPartInCtx); |
467 | |
468 | codegen(Conf: C, TM: TM.get(), AddStream, Task: ThreadId, Mod&: *MPartInCtx, |
469 | CombinedIndex); |
470 | }, |
471 | // Pass BC using std::move to ensure that it get moved rather than |
472 | // copied into the thread's context. |
473 | ArgList: std::move(BC), ArgList: ThreadCount++); |
474 | }, |
475 | PreserveLocals: false); |
476 | |
477 | // Because the inner lambda (which runs in a worker thread) captures our local |
478 | // variables, we need to wait for the worker threads to terminate before we |
479 | // can leave the function scope. |
480 | CodegenThreadPool.wait(); |
481 | } |
482 | |
483 | static Expected<const Target *> initAndLookupTarget(const Config &C, |
484 | Module &Mod) { |
485 | if (!C.OverrideTriple.empty()) |
486 | Mod.setTargetTriple(C.OverrideTriple); |
487 | else if (Mod.getTargetTriple().empty()) |
488 | Mod.setTargetTriple(C.DefaultTriple); |
489 | |
490 | std::string Msg; |
491 | const Target *T = TargetRegistry::lookupTarget(Triple: Mod.getTargetTriple(), Error&: Msg); |
492 | if (!T) |
493 | return make_error<StringError>(Args&: Msg, Args: inconvertibleErrorCode()); |
494 | return T; |
495 | } |
496 | |
497 | Error lto::( |
498 | std::unique_ptr<ToolOutputFile> DiagOutputFile) { |
499 | // Make sure we flush the diagnostic remarks file in case the linker doesn't |
500 | // call the global destructors before exiting. |
501 | if (!DiagOutputFile) |
502 | return Error::success(); |
503 | DiagOutputFile->keep(); |
504 | DiagOutputFile->os().flush(); |
505 | return Error::success(); |
506 | } |
507 | |
508 | Error lto::backend(const Config &C, AddStreamFn AddStream, |
509 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
510 | ModuleSummaryIndex &CombinedIndex) { |
511 | Expected<const Target *> TOrErr = initAndLookupTarget(C, Mod); |
512 | if (!TOrErr) |
513 | return TOrErr.takeError(); |
514 | |
515 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf: C, TheTarget: *TOrErr, M&: Mod); |
516 | |
517 | LLVM_DEBUG(dbgs() << "Running regular LTO\n" ); |
518 | if (!C.CodeGenOnly) { |
519 | if (!opt(Conf: C, TM: TM.get(), Task: 0, Mod, /*IsThinLTO=*/false, |
520 | /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, |
521 | /*CmdArgs*/ std::vector<uint8_t>())) |
522 | return Error::success(); |
523 | } |
524 | |
525 | if (ParallelCodeGenParallelismLevel == 1) { |
526 | codegen(Conf: C, TM: TM.get(), AddStream, Task: 0, Mod, CombinedIndex); |
527 | } else { |
528 | splitCodeGen(C, TM: TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod, |
529 | CombinedIndex); |
530 | } |
531 | return Error::success(); |
532 | } |
533 | |
534 | static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, |
535 | const ModuleSummaryIndex &Index) { |
536 | std::vector<GlobalValue*> DeadGVs; |
537 | for (auto &GV : Mod.global_values()) |
538 | if (GlobalValueSummary *GVS = DefinedGlobals.lookup(Val: GV.getGUID())) |
539 | if (!Index.isGlobalValueLive(GVS)) { |
540 | DeadGVs.push_back(x: &GV); |
541 | convertToDeclaration(GV); |
542 | } |
543 | |
544 | // Now that all dead bodies have been dropped, delete the actual objects |
545 | // themselves when possible. |
546 | for (GlobalValue *GV : DeadGVs) { |
547 | GV->removeDeadConstantUsers(); |
548 | // Might reference something defined in native object (i.e. dropped a |
549 | // non-prevailing IR def, but we need to keep the declaration). |
550 | if (GV->use_empty()) |
551 | GV->eraseFromParent(); |
552 | } |
553 | } |
554 | |
555 | Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, |
556 | Module &Mod, const ModuleSummaryIndex &CombinedIndex, |
557 | const FunctionImporter::ImportMapTy &ImportList, |
558 | const GVSummaryMapTy &DefinedGlobals, |
559 | MapVector<StringRef, BitcodeModule> *ModuleMap, |
560 | const std::vector<uint8_t> &CmdArgs) { |
561 | Expected<const Target *> TOrErr = initAndLookupTarget(C: Conf, Mod); |
562 | if (!TOrErr) |
563 | return TOrErr.takeError(); |
564 | |
565 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, TheTarget: *TOrErr, M&: Mod); |
566 | |
567 | // Setup optimization remarks. |
568 | auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( |
569 | Context&: Mod.getContext(), RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses, |
570 | RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness, RemarksHotnessThreshold: Conf.RemarksHotnessThreshold, |
571 | Count: Task); |
572 | if (!DiagFileOrErr) |
573 | return DiagFileOrErr.takeError(); |
574 | auto DiagnosticOutputFile = std::move(*DiagFileOrErr); |
575 | |
576 | // Set the partial sample profile ratio in the profile summary module flag of |
577 | // the module, if applicable. |
578 | Mod.setPartialSampleProfileRatio(CombinedIndex); |
579 | |
580 | LLVM_DEBUG(dbgs() << "Running ThinLTO\n" ); |
581 | if (Conf.CodeGenOnly) { |
582 | codegen(Conf, TM: TM.get(), AddStream, Task, Mod, CombinedIndex); |
583 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
584 | } |
585 | |
586 | if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) |
587 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
588 | |
589 | auto OptimizeAndCodegen = |
590 | [&](Module &Mod, TargetMachine *TM, |
591 | std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) { |
592 | if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, |
593 | /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, |
594 | CmdArgs)) |
595 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
596 | |
597 | codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); |
598 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
599 | }; |
600 | |
601 | if (ThinLTOAssumeMerged) |
602 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
603 | |
604 | // When linking an ELF shared object, dso_local should be dropped. We |
605 | // conservatively do this for -fpic. |
606 | bool ClearDSOLocalOnDeclarations = |
607 | TM->getTargetTriple().isOSBinFormatELF() && |
608 | TM->getRelocationModel() != Reloc::Static && |
609 | Mod.getPIELevel() == PIELevel::Default; |
610 | renameModuleForThinLTO(M&: Mod, Index: CombinedIndex, ClearDSOLocalOnDeclarations); |
611 | |
612 | dropDeadSymbols(Mod, DefinedGlobals, Index: CombinedIndex); |
613 | |
614 | thinLTOFinalizeInModule(TheModule&: Mod, DefinedGlobals, /*PropagateAttrs=*/true); |
615 | |
616 | if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) |
617 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
618 | |
619 | if (!DefinedGlobals.empty()) |
620 | thinLTOInternalizeModule(TheModule&: Mod, DefinedGlobals); |
621 | |
622 | if (Conf.PostInternalizeModuleHook && |
623 | !Conf.PostInternalizeModuleHook(Task, Mod)) |
624 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
625 | |
626 | auto ModuleLoader = [&](StringRef Identifier) { |
627 | assert(Mod.getContext().isODRUniquingDebugTypes() && |
628 | "ODR Type uniquing should be enabled on the context" ); |
629 | if (ModuleMap) { |
630 | auto I = ModuleMap->find(Key: Identifier); |
631 | assert(I != ModuleMap->end()); |
632 | return I->second.getLazyModule(Context&: Mod.getContext(), |
633 | /*ShouldLazyLoadMetadata=*/true, |
634 | /*IsImporting*/ true); |
635 | } |
636 | |
637 | ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr = |
638 | llvm::MemoryBuffer::getFile(Filename: Identifier); |
639 | if (!MBOrErr) |
640 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
641 | Args: Twine("Error loading imported file " ) + Identifier + " : " , |
642 | Args: MBOrErr.getError())); |
643 | |
644 | Expected<BitcodeModule> BMOrErr = findThinLTOModule(MBRef: **MBOrErr); |
645 | if (!BMOrErr) |
646 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
647 | Args: Twine("Error loading imported file " ) + Identifier + " : " + |
648 | toString(E: BMOrErr.takeError()), |
649 | Args: inconvertibleErrorCode())); |
650 | |
651 | Expected<std::unique_ptr<Module>> MOrErr = |
652 | BMOrErr->getLazyModule(Context&: Mod.getContext(), |
653 | /*ShouldLazyLoadMetadata=*/true, |
654 | /*IsImporting*/ true); |
655 | if (MOrErr) |
656 | (*MOrErr)->setOwnedMemoryBuffer(std::move(*MBOrErr)); |
657 | return MOrErr; |
658 | }; |
659 | |
660 | FunctionImporter Importer(CombinedIndex, ModuleLoader, |
661 | ClearDSOLocalOnDeclarations); |
662 | if (Error Err = Importer.importFunctions(M&: Mod, ImportList).takeError()) |
663 | return Err; |
664 | |
665 | // Do this after any importing so that imported code is updated. |
666 | updateMemProfAttributes(Mod, Index: CombinedIndex); |
667 | updatePublicTypeTestCalls(M&: Mod, WholeProgramVisibilityEnabledInLTO: CombinedIndex.withWholeProgramVisibility()); |
668 | |
669 | if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) |
670 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
671 | |
672 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
673 | } |
674 | |
675 | BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { |
676 | if (ThinLTOAssumeMerged && BMs.size() == 1) |
677 | return BMs.begin(); |
678 | |
679 | for (BitcodeModule &BM : BMs) { |
680 | Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); |
681 | if (LTOInfo && LTOInfo->IsThinLTO) |
682 | return &BM; |
683 | } |
684 | return nullptr; |
685 | } |
686 | |
687 | Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) { |
688 | Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(Buffer: MBRef); |
689 | if (!BMsOrErr) |
690 | return BMsOrErr.takeError(); |
691 | |
692 | // The bitcode file may contain multiple modules, we want the one that is |
693 | // marked as being the ThinLTO module. |
694 | if (const BitcodeModule *Bm = lto::findThinLTOModule(BMs: *BMsOrErr)) |
695 | return *Bm; |
696 | |
697 | return make_error<StringError>(Args: "Could not find module summary" , |
698 | Args: inconvertibleErrorCode()); |
699 | } |
700 | |
701 | bool lto::initImportList(const Module &M, |
702 | const ModuleSummaryIndex &CombinedIndex, |
703 | FunctionImporter::ImportMapTy &ImportList) { |
704 | if (ThinLTOAssumeMerged) |
705 | return true; |
706 | // We can simply import the values mentioned in the combined index, since |
707 | // we should only invoke this using the individual indexes written out |
708 | // via a WriteIndexesThinBackend. |
709 | for (const auto &GlobalList : CombinedIndex) { |
710 | // Ignore entries for undefined references. |
711 | if (GlobalList.second.SummaryList.empty()) |
712 | continue; |
713 | |
714 | auto GUID = GlobalList.first; |
715 | for (const auto &Summary : GlobalList.second.SummaryList) { |
716 | // Skip the summaries for the importing module. These are included to |
717 | // e.g. record required linkage changes. |
718 | if (Summary->modulePath() == M.getModuleIdentifier()) |
719 | continue; |
720 | // Add an entry to provoke importing by thinBackend. |
721 | ImportList[Summary->modulePath()].insert(x: GUID); |
722 | } |
723 | } |
724 | return true; |
725 | } |
726 | |