1 | //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the Thin Link Time Optimization library. This library is |
10 | // intended to be used by linker to optimize code at link time. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h" |
15 | #include "llvm/Support/CommandLine.h" |
16 | |
17 | #include "llvm/ADT/ScopeExit.h" |
18 | #include "llvm/ADT/Statistic.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/Analysis/AliasAnalysis.h" |
21 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
22 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
23 | #include "llvm/Analysis/TargetLibraryInfo.h" |
24 | #include "llvm/Bitcode/BitcodeReader.h" |
25 | #include "llvm/Bitcode/BitcodeWriter.h" |
26 | #include "llvm/Bitcode/BitcodeWriterPass.h" |
27 | #include "llvm/Config/llvm-config.h" |
28 | #include "llvm/IR/DebugInfo.h" |
29 | #include "llvm/IR/DiagnosticPrinter.h" |
30 | #include "llvm/IR/LegacyPassManager.h" |
31 | #include "llvm/IR/LLVMContext.h" |
32 | #include "llvm/IR/LLVMRemarkStreamer.h" |
33 | #include "llvm/IR/Mangler.h" |
34 | #include "llvm/IR/PassTimingInfo.h" |
35 | #include "llvm/IR/Verifier.h" |
36 | #include "llvm/IRReader/IRReader.h" |
37 | #include "llvm/LTO/LTO.h" |
38 | #include "llvm/LTO/SummaryBasedOptimizations.h" |
39 | #include "llvm/MC/TargetRegistry.h" |
40 | #include "llvm/Object/IRObjectFile.h" |
41 | #include "llvm/Passes/PassBuilder.h" |
42 | #include "llvm/Passes/StandardInstrumentations.h" |
43 | #include "llvm/Remarks/HotnessThresholdParser.h" |
44 | #include "llvm/Support/CachePruning.h" |
45 | #include "llvm/Support/Debug.h" |
46 | #include "llvm/Support/Error.h" |
47 | #include "llvm/Support/FileSystem.h" |
48 | #include "llvm/Support/Path.h" |
49 | #include "llvm/Support/SHA1.h" |
50 | #include "llvm/Support/SmallVectorMemoryBuffer.h" |
51 | #include "llvm/Support/ThreadPool.h" |
52 | #include "llvm/Support/Threading.h" |
53 | #include "llvm/Support/ToolOutputFile.h" |
54 | #include "llvm/Support/raw_ostream.h" |
55 | #include "llvm/Target/TargetMachine.h" |
56 | #include "llvm/TargetParser/SubtargetFeature.h" |
57 | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
58 | #include "llvm/Transforms/IPO/FunctionImport.h" |
59 | #include "llvm/Transforms/IPO/Internalize.h" |
60 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
61 | #include "llvm/Transforms/ObjCARC.h" |
62 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
63 | |
64 | #include <numeric> |
65 | |
66 | #if !defined(_MSC_VER) && !defined(__MINGW32__) |
67 | #include <unistd.h> |
68 | #else |
69 | #include <io.h> |
70 | #endif |
71 | |
72 | using namespace llvm; |
73 | |
74 | #define DEBUG_TYPE "thinlto" |
75 | |
76 | namespace llvm { |
77 | // Flags -discard-value-names, defined in LTOCodeGenerator.cpp |
78 | extern cl::opt<bool> LTODiscardValueNames; |
79 | extern cl::opt<std::string> ; |
80 | extern cl::opt<std::string> ; |
81 | extern cl::opt<bool> ; |
82 | extern cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser> |
83 | ; |
84 | extern cl::opt<std::string> ; |
85 | } |
86 | |
87 | namespace { |
88 | |
89 | // Default to using all available threads in the system, but using only one |
90 | // thred per core, as indicated by the usage of |
91 | // heavyweight_hardware_concurrency() below. |
92 | static cl::opt<int> ThreadCount("threads" , cl::init(Val: 0)); |
93 | |
94 | // Simple helper to save temporary files for debug. |
95 | static void saveTempBitcode(const Module &TheModule, StringRef TempDir, |
96 | unsigned count, StringRef Suffix) { |
97 | if (TempDir.empty()) |
98 | return; |
99 | // User asked to save temps, let dump the bitcode file after import. |
100 | std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); |
101 | std::error_code EC; |
102 | raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); |
103 | if (EC) |
104 | report_fatal_error(reason: Twine("Failed to open " ) + SaveTempPath + |
105 | " to save optimized bitcode\n" ); |
106 | WriteBitcodeToFile(M: TheModule, Out&: OS, /* ShouldPreserveUseListOrder */ true); |
107 | } |
108 | |
109 | static const GlobalValueSummary * |
110 | getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) { |
111 | // If there is any strong definition anywhere, get it. |
112 | auto StrongDefForLinker = llvm::find_if( |
113 | Range: GVSummaryList, P: [](const std::unique_ptr<GlobalValueSummary> &Summary) { |
114 | auto Linkage = Summary->linkage(); |
115 | return !GlobalValue::isAvailableExternallyLinkage(Linkage) && |
116 | !GlobalValue::isWeakForLinker(Linkage); |
117 | }); |
118 | if (StrongDefForLinker != GVSummaryList.end()) |
119 | return StrongDefForLinker->get(); |
120 | // Get the first *linker visible* definition for this global in the summary |
121 | // list. |
122 | auto FirstDefForLinker = llvm::find_if( |
123 | Range: GVSummaryList, P: [](const std::unique_ptr<GlobalValueSummary> &Summary) { |
124 | auto Linkage = Summary->linkage(); |
125 | return !GlobalValue::isAvailableExternallyLinkage(Linkage); |
126 | }); |
127 | // Extern templates can be emitted as available_externally. |
128 | if (FirstDefForLinker == GVSummaryList.end()) |
129 | return nullptr; |
130 | return FirstDefForLinker->get(); |
131 | } |
132 | |
133 | // Populate map of GUID to the prevailing copy for any multiply defined |
134 | // symbols. Currently assume first copy is prevailing, or any strong |
135 | // definition. Can be refined with Linker information in the future. |
136 | static void computePrevailingCopies( |
137 | const ModuleSummaryIndex &Index, |
138 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) { |
139 | auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) { |
140 | return GVSummaryList.size() > 1; |
141 | }; |
142 | |
143 | for (auto &I : Index) { |
144 | if (HasMultipleCopies(I.second.SummaryList)) |
145 | PrevailingCopy[I.first] = |
146 | getFirstDefinitionForLinker(GVSummaryList: I.second.SummaryList); |
147 | } |
148 | } |
149 | |
150 | static StringMap<lto::InputFile *> |
151 | generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) { |
152 | StringMap<lto::InputFile *> ModuleMap; |
153 | for (auto &M : Modules) { |
154 | LLVM_DEBUG(dbgs() << "Adding module " << M->getName() << " to ModuleMap\n" ); |
155 | assert(!ModuleMap.contains(M->getName()) && |
156 | "Expect unique Buffer Identifier" ); |
157 | ModuleMap[M->getName()] = M.get(); |
158 | } |
159 | return ModuleMap; |
160 | } |
161 | |
162 | static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index, |
163 | bool ClearDSOLocalOnDeclarations) { |
164 | if (renameModuleForThinLTO(M&: TheModule, Index, ClearDSOLocalOnDeclarations)) |
165 | report_fatal_error(reason: "renameModuleForThinLTO failed" ); |
166 | } |
167 | |
168 | namespace { |
169 | class ThinLTODiagnosticInfo : public DiagnosticInfo { |
170 | const Twine &Msg; |
171 | public: |
172 | ThinLTODiagnosticInfo(const Twine &DiagMsg, |
173 | DiagnosticSeverity Severity = DS_Error) |
174 | : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {} |
175 | void print(DiagnosticPrinter &DP) const override { DP << Msg; } |
176 | }; |
177 | } |
178 | |
179 | /// Verify the module and strip broken debug info. |
180 | static void verifyLoadedModule(Module &TheModule) { |
181 | bool BrokenDebugInfo = false; |
182 | if (verifyModule(M: TheModule, OS: &dbgs(), BrokenDebugInfo: &BrokenDebugInfo)) |
183 | report_fatal_error(reason: "Broken module found, compilation aborted!" ); |
184 | if (BrokenDebugInfo) { |
185 | TheModule.getContext().diagnose(DI: ThinLTODiagnosticInfo( |
186 | "Invalid debug info found, debug info will be stripped" , DS_Warning)); |
187 | StripDebugInfo(M&: TheModule); |
188 | } |
189 | } |
190 | |
191 | static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input, |
192 | LLVMContext &Context, |
193 | bool Lazy, |
194 | bool IsImporting) { |
195 | auto &Mod = Input->getSingleBitcodeModule(); |
196 | SMDiagnostic Err; |
197 | Expected<std::unique_ptr<Module>> ModuleOrErr = |
198 | Lazy ? Mod.getLazyModule(Context, |
199 | /* ShouldLazyLoadMetadata */ true, IsImporting) |
200 | : Mod.parseModule(Context); |
201 | if (!ModuleOrErr) { |
202 | handleAllErrors(E: ModuleOrErr.takeError(), Handlers: [&](ErrorInfoBase &EIB) { |
203 | SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(), |
204 | SourceMgr::DK_Error, EIB.message()); |
205 | Err.print(ProgName: "ThinLTO" , S&: errs()); |
206 | }); |
207 | report_fatal_error(reason: "Can't load module, abort." ); |
208 | } |
209 | if (!Lazy) |
210 | verifyLoadedModule(TheModule&: *ModuleOrErr.get()); |
211 | return std::move(*ModuleOrErr); |
212 | } |
213 | |
214 | static void |
215 | crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, |
216 | StringMap<lto::InputFile *> &ModuleMap, |
217 | const FunctionImporter::ImportMapTy &ImportList, |
218 | bool ClearDSOLocalOnDeclarations) { |
219 | auto Loader = [&](StringRef Identifier) { |
220 | auto &Input = ModuleMap[Identifier]; |
221 | return loadModuleFromInput(Input, Context&: TheModule.getContext(), |
222 | /*Lazy=*/true, /*IsImporting*/ true); |
223 | }; |
224 | |
225 | FunctionImporter Importer(Index, Loader, ClearDSOLocalOnDeclarations); |
226 | Expected<bool> Result = Importer.importFunctions(M&: TheModule, ImportList); |
227 | if (!Result) { |
228 | handleAllErrors(E: Result.takeError(), Handlers: [&](ErrorInfoBase &EIB) { |
229 | SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(), |
230 | SourceMgr::DK_Error, EIB.message()); |
231 | Err.print(ProgName: "ThinLTO" , S&: errs()); |
232 | }); |
233 | report_fatal_error(reason: "importFunctions failed" ); |
234 | } |
235 | // Verify again after cross-importing. |
236 | verifyLoadedModule(TheModule); |
237 | } |
238 | |
239 | static void optimizeModule(Module &TheModule, TargetMachine &TM, |
240 | unsigned OptLevel, bool Freestanding, |
241 | bool DebugPassManager, ModuleSummaryIndex *Index) { |
242 | std::optional<PGOOptions> PGOOpt; |
243 | LoopAnalysisManager LAM; |
244 | FunctionAnalysisManager FAM; |
245 | CGSCCAnalysisManager CGAM; |
246 | ModuleAnalysisManager MAM; |
247 | |
248 | PassInstrumentationCallbacks PIC; |
249 | StandardInstrumentations SI(TheModule.getContext(), DebugPassManager); |
250 | SI.registerCallbacks(PIC, MAM: &MAM); |
251 | PipelineTuningOptions PTO; |
252 | PTO.LoopVectorization = true; |
253 | PTO.SLPVectorization = true; |
254 | PassBuilder PB(&TM, PTO, PGOOpt, &PIC); |
255 | |
256 | std::unique_ptr<TargetLibraryInfoImpl> TLII( |
257 | new TargetLibraryInfoImpl(Triple(TM.getTargetTriple()))); |
258 | if (Freestanding) |
259 | TLII->disableAllFunctions(); |
260 | FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); }); |
261 | |
262 | // Register all the basic analyses with the managers. |
263 | PB.registerModuleAnalyses(MAM); |
264 | PB.registerCGSCCAnalyses(CGAM); |
265 | PB.registerFunctionAnalyses(FAM); |
266 | PB.registerLoopAnalyses(LAM); |
267 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
268 | |
269 | ModulePassManager MPM; |
270 | |
271 | OptimizationLevel OL; |
272 | |
273 | switch (OptLevel) { |
274 | default: |
275 | llvm_unreachable("Invalid optimization level" ); |
276 | case 0: |
277 | OL = OptimizationLevel::O0; |
278 | break; |
279 | case 1: |
280 | OL = OptimizationLevel::O1; |
281 | break; |
282 | case 2: |
283 | OL = OptimizationLevel::O2; |
284 | break; |
285 | case 3: |
286 | OL = OptimizationLevel::O3; |
287 | break; |
288 | } |
289 | |
290 | MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary: Index)); |
291 | |
292 | MPM.run(IR&: TheModule, AM&: MAM); |
293 | } |
294 | |
295 | static void |
296 | addUsedSymbolToPreservedGUID(const lto::InputFile &File, |
297 | DenseSet<GlobalValue::GUID> &PreservedGUID) { |
298 | for (const auto &Sym : File.symbols()) { |
299 | if (Sym.isUsed()) |
300 | PreservedGUID.insert(V: GlobalValue::getGUID(GlobalName: Sym.getIRName())); |
301 | } |
302 | } |
303 | |
304 | // Convert the PreservedSymbols map from "Name" based to "GUID" based. |
305 | static void computeGUIDPreservedSymbols(const lto::InputFile &File, |
306 | const StringSet<> &PreservedSymbols, |
307 | const Triple &TheTriple, |
308 | DenseSet<GlobalValue::GUID> &GUIDs) { |
309 | // Iterate the symbols in the input file and if the input has preserved symbol |
310 | // compute the GUID for the symbol. |
311 | for (const auto &Sym : File.symbols()) { |
312 | if (PreservedSymbols.count(Key: Sym.getName()) && !Sym.getIRName().empty()) |
313 | GUIDs.insert(V: GlobalValue::getGUID(GlobalName: GlobalValue::getGlobalIdentifier( |
314 | Name: Sym.getIRName(), Linkage: GlobalValue::ExternalLinkage, FileName: "" ))); |
315 | } |
316 | } |
317 | |
318 | static DenseSet<GlobalValue::GUID> |
319 | computeGUIDPreservedSymbols(const lto::InputFile &File, |
320 | const StringSet<> &PreservedSymbols, |
321 | const Triple &TheTriple) { |
322 | DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size()); |
323 | computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple, |
324 | GUIDs&: GUIDPreservedSymbols); |
325 | return GUIDPreservedSymbols; |
326 | } |
327 | |
328 | std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule, |
329 | TargetMachine &TM) { |
330 | SmallVector<char, 128> OutputBuffer; |
331 | |
332 | // CodeGen |
333 | { |
334 | raw_svector_ostream OS(OutputBuffer); |
335 | legacy::PassManager PM; |
336 | |
337 | // If the bitcode files contain ARC code and were compiled with optimization, |
338 | // the ObjCARCContractPass must be run, so do it unconditionally here. |
339 | PM.add(P: createObjCARCContractPass()); |
340 | |
341 | // Setup the codegen now. |
342 | if (TM.addPassesToEmitFile(PM, OS, nullptr, CodeGenFileType::ObjectFile, |
343 | /* DisableVerify */ true)) |
344 | report_fatal_error(reason: "Failed to setup codegen" ); |
345 | |
346 | // Run codegen now. resulting binary is in OutputBuffer. |
347 | PM.run(M&: TheModule); |
348 | } |
349 | return std::make_unique<SmallVectorMemoryBuffer>( |
350 | args: std::move(OutputBuffer), /*RequiresNullTerminator=*/args: false); |
351 | } |
352 | |
353 | /// Manage caching for a single Module. |
354 | class ModuleCacheEntry { |
355 | SmallString<128> EntryPath; |
356 | |
357 | public: |
358 | // Create a cache entry. This compute a unique hash for the Module considering |
359 | // the current list of export/import, and offer an interface to query to |
360 | // access the content in the cache. |
361 | ModuleCacheEntry( |
362 | StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID, |
363 | const FunctionImporter::ImportMapTy &ImportList, |
364 | const FunctionImporter::ExportSetTy &ExportList, |
365 | const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, |
366 | const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel, |
367 | bool Freestanding, const TargetMachineBuilder &TMBuilder) { |
368 | if (CachePath.empty()) |
369 | return; |
370 | |
371 | if (!Index.modulePaths().count(Key: ModuleID)) |
372 | // The module does not have an entry, it can't have a hash at all |
373 | return; |
374 | |
375 | if (all_of(Range: Index.getModuleHash(ModPath: ModuleID), |
376 | P: [](uint32_t V) { return V == 0; })) |
377 | // No hash entry, no caching! |
378 | return; |
379 | |
380 | llvm::lto::Config Conf; |
381 | Conf.OptLevel = OptLevel; |
382 | Conf.Options = TMBuilder.Options; |
383 | Conf.CPU = TMBuilder.MCpu; |
384 | Conf.MAttrs.push_back(x: TMBuilder.MAttr); |
385 | Conf.RelocModel = TMBuilder.RelocModel; |
386 | Conf.CGOptLevel = TMBuilder.CGOptLevel; |
387 | Conf.Freestanding = Freestanding; |
388 | SmallString<40> Key; |
389 | computeLTOCacheKey(Key, Conf, Index, ModuleID, ImportList, ExportList, |
390 | ResolvedODR, DefinedGlobals: DefinedGVSummaries); |
391 | |
392 | // This choice of file name allows the cache to be pruned (see pruneCache() |
393 | // in include/llvm/Support/CachePruning.h). |
394 | sys::path::append(path&: EntryPath, a: CachePath, b: "llvmcache-" + Key); |
395 | } |
396 | |
397 | // Access the path to this entry in the cache. |
398 | StringRef getEntryPath() { return EntryPath; } |
399 | |
400 | // Try loading the buffer for this cache entry. |
401 | ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() { |
402 | if (EntryPath.empty()) |
403 | return std::error_code(); |
404 | SmallString<64> ResultPath; |
405 | Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead( |
406 | Name: Twine(EntryPath), Flags: sys::fs::OF_UpdateAtime, RealPath: &ResultPath); |
407 | if (!FDOrErr) |
408 | return errorToErrorCode(Err: FDOrErr.takeError()); |
409 | ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getOpenFile( |
410 | FD: *FDOrErr, Filename: EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false); |
411 | sys::fs::closeFile(F&: *FDOrErr); |
412 | return MBOrErr; |
413 | } |
414 | |
415 | // Cache the Produced object file |
416 | void write(const MemoryBuffer &OutputBuffer) { |
417 | if (EntryPath.empty()) |
418 | return; |
419 | |
420 | if (auto Err = llvm::writeToOutput( |
421 | OutputFileName: EntryPath, Write: [&OutputBuffer](llvm::raw_ostream &OS) -> llvm::Error { |
422 | OS << OutputBuffer.getBuffer(); |
423 | return llvm::Error::success(); |
424 | })) |
425 | report_fatal_error(reason: llvm::formatv(Fmt: "ThinLTO: Can't write file {0}: {1}" , |
426 | Vals&: EntryPath, |
427 | Vals: toString(E: std::move(Err)).c_str())); |
428 | } |
429 | }; |
430 | |
431 | static std::unique_ptr<MemoryBuffer> |
432 | ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, |
433 | StringMap<lto::InputFile *> &ModuleMap, TargetMachine &TM, |
434 | const FunctionImporter::ImportMapTy &ImportList, |
435 | const FunctionImporter::ExportSetTy &ExportList, |
436 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, |
437 | const GVSummaryMapTy &DefinedGlobals, |
438 | const ThinLTOCodeGenerator::CachingOptions &CacheOptions, |
439 | bool DisableCodeGen, StringRef SaveTempsDir, |
440 | bool Freestanding, unsigned OptLevel, unsigned count, |
441 | bool DebugPassManager) { |
442 | // "Benchmark"-like optimization: single-source case |
443 | bool SingleModule = (ModuleMap.size() == 1); |
444 | |
445 | // When linking an ELF shared object, dso_local should be dropped. We |
446 | // conservatively do this for -fpic. |
447 | bool ClearDSOLocalOnDeclarations = |
448 | TM.getTargetTriple().isOSBinFormatELF() && |
449 | TM.getRelocationModel() != Reloc::Static && |
450 | TheModule.getPIELevel() == PIELevel::Default; |
451 | |
452 | if (!SingleModule) { |
453 | promoteModule(TheModule, Index, ClearDSOLocalOnDeclarations); |
454 | |
455 | // Apply summary-based prevailing-symbol resolution decisions. |
456 | thinLTOFinalizeInModule(TheModule, DefinedGlobals, /*PropagateAttrs=*/true); |
457 | |
458 | // Save temps: after promotion. |
459 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".1.promoted.bc" ); |
460 | } |
461 | |
462 | // Be friendly and don't nuke totally the module when the client didn't |
463 | // supply anything to preserve. |
464 | if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) { |
465 | // Apply summary-based internalization decisions. |
466 | thinLTOInternalizeModule(TheModule, DefinedGlobals); |
467 | } |
468 | |
469 | // Save internalized bitcode |
470 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".2.internalized.bc" ); |
471 | |
472 | if (!SingleModule) |
473 | crossImportIntoModule(TheModule, Index, ModuleMap, ImportList, |
474 | ClearDSOLocalOnDeclarations); |
475 | |
476 | // Do this after any importing so that imported code is updated. |
477 | // See comment at call to updateVCallVisibilityInIndex() for why |
478 | // WholeProgramVisibilityEnabledInLTO is false. |
479 | updatePublicTypeTestCalls(M&: TheModule, |
480 | /* WholeProgramVisibilityEnabledInLTO */ false); |
481 | |
482 | // Save temps: after cross-module import. |
483 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".3.imported.bc" ); |
484 | |
485 | optimizeModule(TheModule, TM, OptLevel, Freestanding, DebugPassManager, |
486 | Index: &Index); |
487 | |
488 | saveTempBitcode(TheModule, TempDir: SaveTempsDir, count, Suffix: ".4.opt.bc" ); |
489 | |
490 | if (DisableCodeGen) { |
491 | // Configured to stop before CodeGen, serialize the bitcode and return. |
492 | SmallVector<char, 128> OutputBuffer; |
493 | { |
494 | raw_svector_ostream OS(OutputBuffer); |
495 | ProfileSummaryInfo PSI(TheModule); |
496 | auto Index = buildModuleSummaryIndex(M: TheModule, GetBFICallback: nullptr, PSI: &PSI); |
497 | WriteBitcodeToFile(M: TheModule, Out&: OS, ShouldPreserveUseListOrder: true, Index: &Index); |
498 | } |
499 | return std::make_unique<SmallVectorMemoryBuffer>( |
500 | args: std::move(OutputBuffer), /*RequiresNullTerminator=*/args: false); |
501 | } |
502 | |
503 | return codegenModule(TheModule, TM); |
504 | } |
505 | |
506 | /// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map |
507 | /// for caching, and in the \p Index for application during the ThinLTO |
508 | /// backends. This is needed for correctness for exported symbols (ensure |
509 | /// at least one copy kept) and a compile-time optimization (to drop duplicate |
510 | /// copies when possible). |
511 | static void resolvePrevailingInIndex( |
512 | ModuleSummaryIndex &Index, |
513 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> |
514 | &ResolvedODR, |
515 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, |
516 | const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> |
517 | &PrevailingCopy) { |
518 | |
519 | auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { |
520 | const auto &Prevailing = PrevailingCopy.find(Val: GUID); |
521 | // Not in map means that there was only one copy, which must be prevailing. |
522 | if (Prevailing == PrevailingCopy.end()) |
523 | return true; |
524 | return Prevailing->second == S; |
525 | }; |
526 | |
527 | auto recordNewLinkage = [&](StringRef ModuleIdentifier, |
528 | GlobalValue::GUID GUID, |
529 | GlobalValue::LinkageTypes NewLinkage) { |
530 | ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; |
531 | }; |
532 | |
533 | // TODO Conf.VisibilityScheme can be lto::Config::ELF for ELF. |
534 | lto::Config Conf; |
535 | thinLTOResolvePrevailingInIndex(C: Conf, Index, isPrevailing, recordNewLinkage, |
536 | GUIDPreservedSymbols); |
537 | } |
538 | |
539 | // Initialize the TargetMachine builder for a given Triple |
540 | static void initTMBuilder(TargetMachineBuilder &TMBuilder, |
541 | const Triple &TheTriple) { |
542 | if (TMBuilder.MCpu.empty()) |
543 | TMBuilder.MCpu = lto::getThinLTODefaultCPU(TheTriple); |
544 | TMBuilder.TheTriple = std::move(TheTriple); |
545 | } |
546 | |
547 | } // end anonymous namespace |
548 | |
549 | void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { |
550 | MemoryBufferRef Buffer(Data, Identifier); |
551 | |
552 | auto InputOrError = lto::InputFile::create(Object: Buffer); |
553 | if (!InputOrError) |
554 | report_fatal_error(reason: Twine("ThinLTO cannot create input file: " ) + |
555 | toString(E: InputOrError.takeError())); |
556 | |
557 | auto TripleStr = (*InputOrError)->getTargetTriple(); |
558 | Triple TheTriple(TripleStr); |
559 | |
560 | if (Modules.empty()) |
561 | initTMBuilder(TMBuilder, TheTriple: Triple(TheTriple)); |
562 | else if (TMBuilder.TheTriple != TheTriple) { |
563 | if (!TMBuilder.TheTriple.isCompatibleWith(Other: TheTriple)) |
564 | report_fatal_error(reason: "ThinLTO modules with incompatible triples not " |
565 | "supported" ); |
566 | initTMBuilder(TMBuilder, TheTriple: Triple(TMBuilder.TheTriple.merge(Other: TheTriple))); |
567 | } |
568 | |
569 | Modules.emplace_back(args: std::move(*InputOrError)); |
570 | } |
571 | |
572 | void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { |
573 | PreservedSymbols.insert(key: Name); |
574 | } |
575 | |
576 | void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { |
577 | // FIXME: At the moment, we don't take advantage of this extra information, |
578 | // we're conservatively considering cross-references as preserved. |
579 | // CrossReferencedSymbols.insert(Name); |
580 | PreservedSymbols.insert(key: Name); |
581 | } |
582 | |
583 | // TargetMachine factory |
584 | std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { |
585 | std::string ErrMsg; |
586 | const Target *TheTarget = |
587 | TargetRegistry::lookupTarget(Triple: TheTriple.str(), Error&: ErrMsg); |
588 | if (!TheTarget) { |
589 | report_fatal_error(reason: Twine("Can't load target for this Triple: " ) + ErrMsg); |
590 | } |
591 | |
592 | // Use MAttr as the default set of features. |
593 | SubtargetFeatures Features(MAttr); |
594 | Features.getDefaultSubtargetFeatures(Triple: TheTriple); |
595 | std::string FeatureStr = Features.getString(); |
596 | |
597 | std::unique_ptr<TargetMachine> TM( |
598 | TheTarget->createTargetMachine(TT: TheTriple.str(), CPU: MCpu, Features: FeatureStr, Options, |
599 | RM: RelocModel, CM: std::nullopt, OL: CGOptLevel)); |
600 | assert(TM && "Cannot create target machine" ); |
601 | |
602 | return TM; |
603 | } |
604 | |
605 | /** |
606 | * Produce the combined summary index from all the bitcode files: |
607 | * "thin-link". |
608 | */ |
609 | std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { |
610 | std::unique_ptr<ModuleSummaryIndex> CombinedIndex = |
611 | std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/args: false); |
612 | for (auto &Mod : Modules) { |
613 | auto &M = Mod->getSingleBitcodeModule(); |
614 | if (Error Err = M.readSummary(CombinedIndex&: *CombinedIndex, ModulePath: Mod->getName())) { |
615 | // FIXME diagnose |
616 | logAllUnhandledErrors( |
617 | E: std::move(Err), OS&: errs(), |
618 | ErrorBanner: "error: can't create module summary index for buffer: " ); |
619 | return nullptr; |
620 | } |
621 | } |
622 | return CombinedIndex; |
623 | } |
624 | |
625 | namespace { |
626 | struct IsExported { |
627 | const DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists; |
628 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols; |
629 | |
630 | IsExported( |
631 | const DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists, |
632 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) |
633 | : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {} |
634 | |
635 | bool operator()(StringRef ModuleIdentifier, ValueInfo VI) const { |
636 | const auto &ExportList = ExportLists.find(Val: ModuleIdentifier); |
637 | return (ExportList != ExportLists.end() && ExportList->second.count(V: VI)) || |
638 | GUIDPreservedSymbols.count(V: VI.getGUID()); |
639 | } |
640 | }; |
641 | |
642 | struct IsPrevailing { |
643 | const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy; |
644 | IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> |
645 | &PrevailingCopy) |
646 | : PrevailingCopy(PrevailingCopy) {} |
647 | |
648 | bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const { |
649 | const auto &Prevailing = PrevailingCopy.find(Val: GUID); |
650 | // Not in map means that there was only one copy, which must be prevailing. |
651 | if (Prevailing == PrevailingCopy.end()) |
652 | return true; |
653 | return Prevailing->second == S; |
654 | }; |
655 | }; |
656 | } // namespace |
657 | |
658 | static void computeDeadSymbolsInIndex( |
659 | ModuleSummaryIndex &Index, |
660 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { |
661 | // We have no symbols resolution available. And can't do any better now in the |
662 | // case where the prevailing symbol is in a native object. It can be refined |
663 | // with linker information in the future. |
664 | auto isPrevailing = [&](GlobalValue::GUID G) { |
665 | return PrevailingType::Unknown; |
666 | }; |
667 | computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing, |
668 | /* ImportEnabled = */ true); |
669 | } |
670 | |
671 | /** |
672 | * Perform promotion and renaming of exported internal functions. |
673 | * Index is updated to reflect linkage changes from weak resolution. |
674 | */ |
675 | void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index, |
676 | const lto::InputFile &File) { |
677 | auto ModuleCount = Index.modulePaths().size(); |
678 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
679 | |
680 | // Collect for each module the list of function it defines (GUID -> Summary). |
681 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries; |
682 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
683 | |
684 | // Convert the preserved symbols set from string to GUID |
685 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
686 | File, PreservedSymbols, TheTriple: Triple(TheModule.getTargetTriple())); |
687 | |
688 | // Add used symbol to the preserved symbols. |
689 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
690 | |
691 | // Compute "dead" symbols, we don't want to import/export these! |
692 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
693 | |
694 | // Compute prevailing symbols |
695 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
696 | computePrevailingCopies(Index, PrevailingCopy); |
697 | |
698 | // Generate import/export list |
699 | DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount); |
700 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
701 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
702 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
703 | ExportLists); |
704 | |
705 | // Resolve prevailing symbols |
706 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; |
707 | resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, |
708 | PrevailingCopy); |
709 | |
710 | thinLTOFinalizeInModule(TheModule, |
711 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier], |
712 | /*PropagateAttrs=*/false); |
713 | |
714 | // Promote the exported values in the index, so that they are promoted |
715 | // in the module. |
716 | thinLTOInternalizeAndPromoteInIndex( |
717 | Index, isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
718 | isPrevailing: IsPrevailing(PrevailingCopy)); |
719 | |
720 | // FIXME Set ClearDSOLocalOnDeclarations. |
721 | promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false); |
722 | } |
723 | |
724 | /** |
725 | * Perform cross-module importing for the module identified by ModuleIdentifier. |
726 | */ |
727 | void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, |
728 | ModuleSummaryIndex &Index, |
729 | const lto::InputFile &File) { |
730 | auto ModuleMap = generateModuleMap(Modules); |
731 | auto ModuleCount = Index.modulePaths().size(); |
732 | |
733 | // Collect for each module the list of function it defines (GUID -> Summary). |
734 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
735 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
736 | |
737 | // Convert the preserved symbols set from string to GUID |
738 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
739 | File, PreservedSymbols, TheTriple: Triple(TheModule.getTargetTriple())); |
740 | |
741 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
742 | |
743 | // Compute "dead" symbols, we don't want to import/export these! |
744 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
745 | |
746 | // Compute prevailing symbols |
747 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
748 | computePrevailingCopies(Index, PrevailingCopy); |
749 | |
750 | // Generate import/export list |
751 | DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount); |
752 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
753 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
754 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
755 | ExportLists); |
756 | auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; |
757 | |
758 | // FIXME Set ClearDSOLocalOnDeclarations. |
759 | crossImportIntoModule(TheModule, Index, ModuleMap, ImportList, |
760 | /*ClearDSOLocalOnDeclarations=*/false); |
761 | } |
762 | |
763 | /** |
764 | * Compute the list of summaries needed for importing into module. |
765 | */ |
766 | void ThinLTOCodeGenerator::gatherImportedSummariesForModule( |
767 | Module &TheModule, ModuleSummaryIndex &Index, |
768 | std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex, |
769 | const lto::InputFile &File) { |
770 | auto ModuleCount = Index.modulePaths().size(); |
771 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
772 | |
773 | // Collect for each module the list of function it defines (GUID -> Summary). |
774 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
775 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
776 | |
777 | // Convert the preserved symbols set from string to GUID |
778 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
779 | File, PreservedSymbols, TheTriple: Triple(TheModule.getTargetTriple())); |
780 | |
781 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
782 | |
783 | // Compute "dead" symbols, we don't want to import/export these! |
784 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
785 | |
786 | // Compute prevailing symbols |
787 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
788 | computePrevailingCopies(Index, PrevailingCopy); |
789 | |
790 | // Generate import/export list |
791 | DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount); |
792 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
793 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
794 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
795 | ExportLists); |
796 | |
797 | llvm::gatherImportedSummariesForModule( |
798 | ModulePath: ModuleIdentifier, ModuleToDefinedGVSummaries, |
799 | ImportList: ImportLists[ModuleIdentifier], ModuleToSummariesForIndex); |
800 | } |
801 | |
802 | /** |
803 | * Emit the list of files needed for importing into module. |
804 | */ |
805 | void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName, |
806 | ModuleSummaryIndex &Index, |
807 | const lto::InputFile &File) { |
808 | auto ModuleCount = Index.modulePaths().size(); |
809 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
810 | |
811 | // Collect for each module the list of function it defines (GUID -> Summary). |
812 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
813 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
814 | |
815 | // Convert the preserved symbols set from string to GUID |
816 | auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( |
817 | File, PreservedSymbols, TheTriple: Triple(TheModule.getTargetTriple())); |
818 | |
819 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
820 | |
821 | // Compute "dead" symbols, we don't want to import/export these! |
822 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
823 | |
824 | // Compute prevailing symbols |
825 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
826 | computePrevailingCopies(Index, PrevailingCopy); |
827 | |
828 | // Generate import/export list |
829 | DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount); |
830 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
831 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
832 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
833 | ExportLists); |
834 | |
835 | std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex; |
836 | llvm::gatherImportedSummariesForModule( |
837 | ModulePath: ModuleIdentifier, ModuleToDefinedGVSummaries, |
838 | ImportList: ImportLists[ModuleIdentifier], ModuleToSummariesForIndex); |
839 | |
840 | std::error_code EC; |
841 | if ((EC = EmitImportsFiles(ModulePath: ModuleIdentifier, OutputFilename: OutputName, |
842 | ModuleToSummariesForIndex))) |
843 | report_fatal_error(reason: Twine("Failed to open " ) + OutputName + |
844 | " to save imports lists\n" ); |
845 | } |
846 | |
847 | /** |
848 | * Perform internalization. Runs promote and internalization together. |
849 | * Index is updated to reflect linkage changes. |
850 | */ |
851 | void ThinLTOCodeGenerator::internalize(Module &TheModule, |
852 | ModuleSummaryIndex &Index, |
853 | const lto::InputFile &File) { |
854 | initTMBuilder(TMBuilder, TheTriple: Triple(TheModule.getTargetTriple())); |
855 | auto ModuleCount = Index.modulePaths().size(); |
856 | auto ModuleIdentifier = TheModule.getModuleIdentifier(); |
857 | |
858 | // Convert the preserved symbols set from string to GUID |
859 | auto GUIDPreservedSymbols = |
860 | computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple: TMBuilder.TheTriple); |
861 | |
862 | addUsedSymbolToPreservedGUID(File, PreservedGUID&: GUIDPreservedSymbols); |
863 | |
864 | // Collect for each module the list of function it defines (GUID -> Summary). |
865 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
866 | Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
867 | |
868 | // Compute "dead" symbols, we don't want to import/export these! |
869 | computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); |
870 | |
871 | // Compute prevailing symbols |
872 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
873 | computePrevailingCopies(Index, PrevailingCopy); |
874 | |
875 | // Generate import/export list |
876 | DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount); |
877 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
878 | ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, |
879 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
880 | ExportLists); |
881 | auto &ExportList = ExportLists[ModuleIdentifier]; |
882 | |
883 | // Be friendly and don't nuke totally the module when the client didn't |
884 | // supply anything to preserve. |
885 | if (ExportList.empty() && GUIDPreservedSymbols.empty()) |
886 | return; |
887 | |
888 | // Resolve prevailing symbols |
889 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; |
890 | resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, |
891 | PrevailingCopy); |
892 | |
893 | // Promote the exported values in the index, so that they are promoted |
894 | // in the module. |
895 | thinLTOInternalizeAndPromoteInIndex( |
896 | Index, isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
897 | isPrevailing: IsPrevailing(PrevailingCopy)); |
898 | |
899 | // FIXME Set ClearDSOLocalOnDeclarations. |
900 | promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false); |
901 | |
902 | // Internalization |
903 | thinLTOFinalizeInModule(TheModule, |
904 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier], |
905 | /*PropagateAttrs=*/false); |
906 | |
907 | thinLTOInternalizeModule(TheModule, |
908 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier]); |
909 | } |
910 | |
911 | /** |
912 | * Perform post-importing ThinLTO optimizations. |
913 | */ |
914 | void ThinLTOCodeGenerator::optimize(Module &TheModule) { |
915 | initTMBuilder(TMBuilder, TheTriple: Triple(TheModule.getTargetTriple())); |
916 | |
917 | // Optimize now |
918 | optimizeModule(TheModule, TM&: *TMBuilder.create(), OptLevel, Freestanding, |
919 | DebugPassManager, Index: nullptr); |
920 | } |
921 | |
922 | /// Write out the generated object file, either from CacheEntryPath or from |
923 | /// OutputBuffer, preferring hard-link when possible. |
924 | /// Returns the path to the generated file in SavedObjectsDirectoryPath. |
925 | std::string |
926 | ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath, |
927 | const MemoryBuffer &OutputBuffer) { |
928 | auto ArchName = TMBuilder.TheTriple.getArchName(); |
929 | SmallString<128> OutputPath(SavedObjectsDirectoryPath); |
930 | llvm::sys::path::append(path&: OutputPath, |
931 | a: Twine(count) + "." + ArchName + ".thinlto.o" ); |
932 | OutputPath.c_str(); // Ensure the string is null terminated. |
933 | if (sys::fs::exists(Path: OutputPath)) |
934 | sys::fs::remove(path: OutputPath); |
935 | |
936 | // We don't return a memory buffer to the linker, just a list of files. |
937 | if (!CacheEntryPath.empty()) { |
938 | // Cache is enabled, hard-link the entry (or copy if hard-link fails). |
939 | auto Err = sys::fs::create_hard_link(to: CacheEntryPath, from: OutputPath); |
940 | if (!Err) |
941 | return std::string(OutputPath); |
942 | // Hard linking failed, try to copy. |
943 | Err = sys::fs::copy_file(From: CacheEntryPath, To: OutputPath); |
944 | if (!Err) |
945 | return std::string(OutputPath); |
946 | // Copy failed (could be because the CacheEntry was removed from the cache |
947 | // in the meantime by another process), fall back and try to write down the |
948 | // buffer to the output. |
949 | errs() << "remark: can't link or copy from cached entry '" << CacheEntryPath |
950 | << "' to '" << OutputPath << "'\n" ; |
951 | } |
952 | // No cache entry, just write out the buffer. |
953 | std::error_code Err; |
954 | raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None); |
955 | if (Err) |
956 | report_fatal_error(reason: Twine("Can't open output '" ) + OutputPath + "'\n" ); |
957 | OS << OutputBuffer.getBuffer(); |
958 | return std::string(OutputPath); |
959 | } |
960 | |
961 | // Main entry point for the ThinLTO processing |
962 | void ThinLTOCodeGenerator::run() { |
963 | timeTraceProfilerBegin(Name: "ThinLink" , Detail: StringRef("" )); |
964 | auto TimeTraceScopeExit = llvm::make_scope_exit(F: []() { |
965 | if (llvm::timeTraceProfilerEnabled()) |
966 | llvm::timeTraceProfilerEnd(); |
967 | }); |
968 | // Prepare the resulting object vector |
969 | assert(ProducedBinaries.empty() && "The generator should not be reused" ); |
970 | if (SavedObjectsDirectoryPath.empty()) |
971 | ProducedBinaries.resize(new_size: Modules.size()); |
972 | else { |
973 | sys::fs::create_directories(path: SavedObjectsDirectoryPath); |
974 | bool IsDir; |
975 | sys::fs::is_directory(path: SavedObjectsDirectoryPath, result&: IsDir); |
976 | if (!IsDir) |
977 | report_fatal_error(reason: Twine("Unexistent dir: '" ) + SavedObjectsDirectoryPath + "'" ); |
978 | ProducedBinaryFiles.resize(new_size: Modules.size()); |
979 | } |
980 | |
981 | if (CodeGenOnly) { |
982 | // Perform only parallel codegen and return. |
983 | DefaultThreadPool Pool; |
984 | int count = 0; |
985 | for (auto &Mod : Modules) { |
986 | Pool.async(F: [&](int count) { |
987 | LLVMContext Context; |
988 | Context.setDiscardValueNames(LTODiscardValueNames); |
989 | |
990 | // Parse module now |
991 | auto TheModule = loadModuleFromInput(Input: Mod.get(), Context, Lazy: false, |
992 | /*IsImporting*/ false); |
993 | |
994 | // CodeGen |
995 | auto OutputBuffer = codegenModule(TheModule&: *TheModule, TM&: *TMBuilder.create()); |
996 | if (SavedObjectsDirectoryPath.empty()) |
997 | ProducedBinaries[count] = std::move(OutputBuffer); |
998 | else |
999 | ProducedBinaryFiles[count] = |
1000 | writeGeneratedObject(count, CacheEntryPath: "" , OutputBuffer: *OutputBuffer); |
1001 | }, ArgList: count++); |
1002 | } |
1003 | |
1004 | return; |
1005 | } |
1006 | |
1007 | // Sequential linking phase |
1008 | auto Index = linkCombinedIndex(); |
1009 | |
1010 | // Save temps: index. |
1011 | if (!SaveTempsDir.empty()) { |
1012 | auto SaveTempPath = SaveTempsDir + "index.bc" ; |
1013 | std::error_code EC; |
1014 | raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); |
1015 | if (EC) |
1016 | report_fatal_error(reason: Twine("Failed to open " ) + SaveTempPath + |
1017 | " to save optimized bitcode\n" ); |
1018 | writeIndexToFile(Index: *Index, Out&: OS); |
1019 | } |
1020 | |
1021 | |
1022 | // Prepare the module map. |
1023 | auto ModuleMap = generateModuleMap(Modules); |
1024 | auto ModuleCount = Modules.size(); |
1025 | |
1026 | // Collect for each module the list of function it defines (GUID -> Summary). |
1027 | DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); |
1028 | Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); |
1029 | |
1030 | // Convert the preserved symbols set from string to GUID, this is needed for |
1031 | // computing the caching hash and the internalization. |
1032 | DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; |
1033 | for (const auto &M : Modules) |
1034 | computeGUIDPreservedSymbols(File: *M, PreservedSymbols, TheTriple: TMBuilder.TheTriple, |
1035 | GUIDs&: GUIDPreservedSymbols); |
1036 | |
1037 | // Add used symbol from inputs to the preserved symbols. |
1038 | for (const auto &M : Modules) |
1039 | addUsedSymbolToPreservedGUID(File: *M, PreservedGUID&: GUIDPreservedSymbols); |
1040 | |
1041 | // Compute "dead" symbols, we don't want to import/export these! |
1042 | computeDeadSymbolsInIndex(Index&: *Index, GUIDPreservedSymbols); |
1043 | |
1044 | // Synthesize entry counts for functions in the combined index. |
1045 | computeSyntheticCounts(Index&: *Index); |
1046 | |
1047 | // Currently there is no support for enabling whole program visibility via a |
1048 | // linker option in the old LTO API, but this call allows it to be specified |
1049 | // via the internal option. Must be done before WPD below. |
1050 | if (hasWholeProgramVisibility(/* WholeProgramVisibilityEnabledInLTO */ false)) |
1051 | Index->setWithWholeProgramVisibility(); |
1052 | |
1053 | // FIXME: This needs linker information via a TBD new interface |
1054 | updateVCallVisibilityInIndex(Index&: *Index, |
1055 | /*WholeProgramVisibilityEnabledInLTO=*/false, |
1056 | // FIXME: These need linker information via a |
1057 | // TBD new interface. |
1058 | /*DynamicExportSymbols=*/{}, |
1059 | /*VisibleToRegularObjSymbols=*/{}); |
1060 | |
1061 | // Perform index-based WPD. This will return immediately if there are |
1062 | // no index entries in the typeIdMetadata map (e.g. if we are instead |
1063 | // performing IR-based WPD in hybrid regular/thin LTO mode). |
1064 | std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap; |
1065 | std::set<GlobalValue::GUID> ExportedGUIDs; |
1066 | runWholeProgramDevirtOnIndex(Summary&: *Index, ExportedGUIDs, LocalWPDTargetsMap); |
1067 | for (auto GUID : ExportedGUIDs) |
1068 | GUIDPreservedSymbols.insert(V: GUID); |
1069 | |
1070 | // Compute prevailing symbols |
1071 | DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; |
1072 | computePrevailingCopies(Index: *Index, PrevailingCopy); |
1073 | |
1074 | // Collect the import/export lists for all modules from the call-graph in the |
1075 | // combined index. |
1076 | DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount); |
1077 | DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount); |
1078 | ComputeCrossModuleImport(Index: *Index, ModuleToDefinedGVSummaries, |
1079 | isPrevailing: IsPrevailing(PrevailingCopy), ImportLists, |
1080 | ExportLists); |
1081 | |
1082 | // We use a std::map here to be able to have a defined ordering when |
1083 | // producing a hash for the cache entry. |
1084 | // FIXME: we should be able to compute the caching hash for the entry based |
1085 | // on the index, and nuke this map. |
1086 | StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; |
1087 | |
1088 | // Resolve prevailing symbols, this has to be computed early because it |
1089 | // impacts the caching. |
1090 | resolvePrevailingInIndex(Index&: *Index, ResolvedODR, GUIDPreservedSymbols, |
1091 | PrevailingCopy); |
1092 | |
1093 | // Use global summary-based analysis to identify symbols that can be |
1094 | // internalized (because they aren't exported or preserved as per callback). |
1095 | // Changes are made in the index, consumed in the ThinLTO backends. |
1096 | updateIndexWPDForExports(Summary&: *Index, |
1097 | isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
1098 | LocalWPDTargetsMap); |
1099 | thinLTOInternalizeAndPromoteInIndex( |
1100 | Index&: *Index, isExported: IsExported(ExportLists, GUIDPreservedSymbols), |
1101 | isPrevailing: IsPrevailing(PrevailingCopy)); |
1102 | |
1103 | thinLTOPropagateFunctionAttrs(Index&: *Index, isPrevailing: IsPrevailing(PrevailingCopy)); |
1104 | |
1105 | // Make sure that every module has an entry in the ExportLists, ImportList, |
1106 | // GVSummary and ResolvedODR maps to enable threaded access to these maps |
1107 | // below. |
1108 | for (auto &Module : Modules) { |
1109 | auto ModuleIdentifier = Module->getName(); |
1110 | ExportLists[ModuleIdentifier]; |
1111 | ImportLists[ModuleIdentifier]; |
1112 | ResolvedODR[ModuleIdentifier]; |
1113 | ModuleToDefinedGVSummaries[ModuleIdentifier]; |
1114 | } |
1115 | |
1116 | std::vector<BitcodeModule *> ModulesVec; |
1117 | ModulesVec.reserve(n: Modules.size()); |
1118 | for (auto &Mod : Modules) |
1119 | ModulesVec.push_back(x: &Mod->getSingleBitcodeModule()); |
1120 | std::vector<int> ModulesOrdering = lto::generateModulesOrdering(R: ModulesVec); |
1121 | |
1122 | if (llvm::timeTraceProfilerEnabled()) |
1123 | llvm::timeTraceProfilerEnd(); |
1124 | |
1125 | TimeTraceScopeExit.release(); |
1126 | |
1127 | // Parallel optimizer + codegen |
1128 | { |
1129 | DefaultThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount)); |
1130 | for (auto IndexCount : ModulesOrdering) { |
1131 | auto &Mod = Modules[IndexCount]; |
1132 | Pool.async(F: [&](int count) { |
1133 | auto ModuleIdentifier = Mod->getName(); |
1134 | auto &ExportList = ExportLists[ModuleIdentifier]; |
1135 | |
1136 | auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier]; |
1137 | |
1138 | // The module may be cached, this helps handling it. |
1139 | ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, |
1140 | ImportLists[ModuleIdentifier], ExportList, |
1141 | ResolvedODR[ModuleIdentifier], |
1142 | DefinedGVSummaries, OptLevel, Freestanding, |
1143 | TMBuilder); |
1144 | auto CacheEntryPath = CacheEntry.getEntryPath(); |
1145 | |
1146 | { |
1147 | auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); |
1148 | LLVM_DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss" ) |
1149 | << " '" << CacheEntryPath << "' for buffer " |
1150 | << count << " " << ModuleIdentifier << "\n" ); |
1151 | |
1152 | if (ErrOrBuffer) { |
1153 | // Cache Hit! |
1154 | if (SavedObjectsDirectoryPath.empty()) |
1155 | ProducedBinaries[count] = std::move(ErrOrBuffer.get()); |
1156 | else |
1157 | ProducedBinaryFiles[count] = writeGeneratedObject( |
1158 | count, CacheEntryPath, OutputBuffer: *ErrOrBuffer.get()); |
1159 | return; |
1160 | } |
1161 | } |
1162 | |
1163 | LLVMContext Context; |
1164 | Context.setDiscardValueNames(LTODiscardValueNames); |
1165 | Context.enableDebugTypeODRUniquing(); |
1166 | auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( |
1167 | Context, RemarksFilename, RemarksPasses, RemarksFormat, |
1168 | RemarksWithHotness, RemarksHotnessThreshold, Count: count); |
1169 | if (!DiagFileOrErr) { |
1170 | errs() << "Error: " << toString(E: DiagFileOrErr.takeError()) << "\n" ; |
1171 | report_fatal_error(reason: "ThinLTO: Can't get an output file for the " |
1172 | "remarks" ); |
1173 | } |
1174 | |
1175 | // Parse module now |
1176 | auto TheModule = loadModuleFromInput(Input: Mod.get(), Context, Lazy: false, |
1177 | /*IsImporting*/ false); |
1178 | |
1179 | // Save temps: original file. |
1180 | saveTempBitcode(TheModule: *TheModule, TempDir: SaveTempsDir, count, Suffix: ".0.original.bc" ); |
1181 | |
1182 | auto &ImportList = ImportLists[ModuleIdentifier]; |
1183 | // Run the main process now, and generates a binary |
1184 | auto OutputBuffer = ProcessThinLTOModule( |
1185 | TheModule&: *TheModule, Index&: *Index, ModuleMap, TM&: *TMBuilder.create(), ImportList, |
1186 | ExportList, GUIDPreservedSymbols, |
1187 | DefinedGlobals: ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, |
1188 | DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count, |
1189 | DebugPassManager); |
1190 | |
1191 | // Commit to the cache (if enabled) |
1192 | CacheEntry.write(OutputBuffer: *OutputBuffer); |
1193 | |
1194 | if (SavedObjectsDirectoryPath.empty()) { |
1195 | // We need to generated a memory buffer for the linker. |
1196 | if (!CacheEntryPath.empty()) { |
1197 | // When cache is enabled, reload from the cache if possible. |
1198 | // Releasing the buffer from the heap and reloading it from the |
1199 | // cache file with mmap helps us to lower memory pressure. |
1200 | // The freed memory can be used for the next input file. |
1201 | // The final binary link will read from the VFS cache (hopefully!) |
1202 | // or from disk (if the memory pressure was too high). |
1203 | auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); |
1204 | if (auto EC = ReloadedBufferOrErr.getError()) { |
1205 | // On error, keep the preexisting buffer and print a diagnostic. |
1206 | errs() << "remark: can't reload cached file '" << CacheEntryPath |
1207 | << "': " << EC.message() << "\n" ; |
1208 | } else { |
1209 | OutputBuffer = std::move(*ReloadedBufferOrErr); |
1210 | } |
1211 | } |
1212 | ProducedBinaries[count] = std::move(OutputBuffer); |
1213 | return; |
1214 | } |
1215 | ProducedBinaryFiles[count] = writeGeneratedObject( |
1216 | count, CacheEntryPath, OutputBuffer: *OutputBuffer); |
1217 | }, ArgList&: IndexCount); |
1218 | } |
1219 | } |
1220 | |
1221 | pruneCache(Path: CacheOptions.Path, Policy: CacheOptions.Policy, Files: ProducedBinaries); |
1222 | |
1223 | // If statistics were requested, print them out now. |
1224 | if (llvm::AreStatisticsEnabled()) |
1225 | llvm::PrintStatistics(); |
1226 | reportAndResetTimings(); |
1227 | } |
1228 | |