1//===- Parsing, selection, and construction of pass pipelines -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Passes/PassBuilder.h"
18#include "llvm/ADT/StringSwitch.h"
19#include "llvm/Analysis/AliasAnalysisEvaluator.h"
20#include "llvm/Analysis/AliasSetTracker.h"
21#include "llvm/Analysis/AssumptionCache.h"
22#include "llvm/Analysis/BasicAliasAnalysis.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
25#include "llvm/Analysis/CFGPrinter.h"
26#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
27#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
28#include "llvm/Analysis/CGSCCPassManager.h"
29#include "llvm/Analysis/CallGraph.h"
30#include "llvm/Analysis/DDG.h"
31#include "llvm/Analysis/DDGPrinter.h"
32#include "llvm/Analysis/Delinearization.h"
33#include "llvm/Analysis/DemandedBits.h"
34#include "llvm/Analysis/DependenceAnalysis.h"
35#include "llvm/Analysis/DivergenceAnalysis.h"
36#include "llvm/Analysis/DominanceFrontier.h"
37#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
38#include "llvm/Analysis/GlobalsModRef.h"
39#include "llvm/Analysis/IRSimilarityIdentifier.h"
40#include "llvm/Analysis/IVUsers.h"
41#include "llvm/Analysis/InlineAdvisor.h"
42#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
43#include "llvm/Analysis/InstCount.h"
44#include "llvm/Analysis/LazyCallGraph.h"
45#include "llvm/Analysis/LazyValueInfo.h"
46#include "llvm/Analysis/Lint.h"
47#include "llvm/Analysis/LoopAccessAnalysis.h"
48#include "llvm/Analysis/LoopCacheAnalysis.h"
49#include "llvm/Analysis/LoopInfo.h"
50#include "llvm/Analysis/LoopNestAnalysis.h"
51#include "llvm/Analysis/MemDerefPrinter.h"
52#include "llvm/Analysis/MemoryDependenceAnalysis.h"
53#include "llvm/Analysis/MemorySSA.h"
54#include "llvm/Analysis/ModuleDebugInfoPrinter.h"
55#include "llvm/Analysis/ModuleSummaryAnalysis.h"
56#include "llvm/Analysis/MustExecute.h"
57#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
58#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59#include "llvm/Analysis/PhiValues.h"
60#include "llvm/Analysis/PostDominators.h"
61#include "llvm/Analysis/ProfileSummaryInfo.h"
62#include "llvm/Analysis/RegionInfo.h"
63#include "llvm/Analysis/ScalarEvolution.h"
64#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
65#include "llvm/Analysis/ScopedNoAliasAA.h"
66#include "llvm/Analysis/StackLifetime.h"
67#include "llvm/Analysis/StackSafetyAnalysis.h"
68#include "llvm/Analysis/TargetLibraryInfo.h"
69#include "llvm/Analysis/TargetTransformInfo.h"
70#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
71#include "llvm/IR/Dominators.h"
72#include "llvm/IR/IRPrintingPasses.h"
73#include "llvm/IR/PassManager.h"
74#include "llvm/IR/PrintPasses.h"
75#include "llvm/IR/SafepointIRVerifier.h"
76#include "llvm/IR/Verifier.h"
77#include "llvm/Support/CommandLine.h"
78#include "llvm/Support/Debug.h"
79#include "llvm/Support/ErrorHandling.h"
80#include "llvm/Support/FormatVariadic.h"
81#include "llvm/Support/Regex.h"
82#include "llvm/Target/TargetMachine.h"
83#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
84#include "llvm/Transforms/Coroutines/CoroCleanup.h"
85#include "llvm/Transforms/Coroutines/CoroEarly.h"
86#include "llvm/Transforms/Coroutines/CoroElide.h"
87#include "llvm/Transforms/Coroutines/CoroSplit.h"
88#include "llvm/Transforms/IPO/AlwaysInliner.h"
89#include "llvm/Transforms/IPO/Annotation2Metadata.h"
90#include "llvm/Transforms/IPO/ArgumentPromotion.h"
91#include "llvm/Transforms/IPO/Attributor.h"
92#include "llvm/Transforms/IPO/BlockExtractor.h"
93#include "llvm/Transforms/IPO/CalledValuePropagation.h"
94#include "llvm/Transforms/IPO/ConstantMerge.h"
95#include "llvm/Transforms/IPO/CrossDSOCFI.h"
96#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
97#include "llvm/Transforms/IPO/ElimAvailExtern.h"
98#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
99#include "llvm/Transforms/IPO/FunctionAttrs.h"
100#include "llvm/Transforms/IPO/FunctionImport.h"
101#include "llvm/Transforms/IPO/GlobalDCE.h"
102#include "llvm/Transforms/IPO/GlobalOpt.h"
103#include "llvm/Transforms/IPO/GlobalSplit.h"
104#include "llvm/Transforms/IPO/HotColdSplitting.h"
105#include "llvm/Transforms/IPO/IROutliner.h"
106#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
107#include "llvm/Transforms/IPO/Inliner.h"
108#include "llvm/Transforms/IPO/Internalize.h"
109#include "llvm/Transforms/IPO/LoopExtractor.h"
110#include "llvm/Transforms/IPO/LowerTypeTests.h"
111#include "llvm/Transforms/IPO/MergeFunctions.h"
112#include "llvm/Transforms/IPO/OpenMPOpt.h"
113#include "llvm/Transforms/IPO/PartialInlining.h"
114#include "llvm/Transforms/IPO/SCCP.h"
115#include "llvm/Transforms/IPO/SampleProfile.h"
116#include "llvm/Transforms/IPO/SampleProfileProbe.h"
117#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
118#include "llvm/Transforms/IPO/StripSymbols.h"
119#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
120#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
121#include "llvm/Transforms/InstCombine/InstCombine.h"
122#include "llvm/Transforms/Instrumentation.h"
123#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
124#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
125#include "llvm/Transforms/Instrumentation/CGProfile.h"
126#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
127#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
128#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
129#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
130#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
131#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
132#include "llvm/Transforms/Instrumentation/MemProfiler.h"
133#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
134#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
135#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
136#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
137#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
138#include "llvm/Transforms/ObjCARC.h"
139#include "llvm/Transforms/Scalar/ADCE.h"
140#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
141#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
142#include "llvm/Transforms/Scalar/BDCE.h"
143#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
144#include "llvm/Transforms/Scalar/ConstantHoisting.h"
145#include "llvm/Transforms/Scalar/ConstraintElimination.h"
146#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
147#include "llvm/Transforms/Scalar/DCE.h"
148#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
149#include "llvm/Transforms/Scalar/DivRemPairs.h"
150#include "llvm/Transforms/Scalar/EarlyCSE.h"
151#include "llvm/Transforms/Scalar/Float2Int.h"
152#include "llvm/Transforms/Scalar/GVN.h"
153#include "llvm/Transforms/Scalar/GuardWidening.h"
154#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
155#include "llvm/Transforms/Scalar/IndVarSimplify.h"
156#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
157#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
158#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
159#include "llvm/Transforms/Scalar/JumpThreading.h"
160#include "llvm/Transforms/Scalar/LICM.h"
161#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
162#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
163#include "llvm/Transforms/Scalar/LoopDeletion.h"
164#include "llvm/Transforms/Scalar/LoopDistribute.h"
165#include "llvm/Transforms/Scalar/LoopFlatten.h"
166#include "llvm/Transforms/Scalar/LoopFuse.h"
167#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
168#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
169#include "llvm/Transforms/Scalar/LoopInterchange.h"
170#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
171#include "llvm/Transforms/Scalar/LoopPassManager.h"
172#include "llvm/Transforms/Scalar/LoopPredication.h"
173#include "llvm/Transforms/Scalar/LoopReroll.h"
174#include "llvm/Transforms/Scalar/LoopRotation.h"
175#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
176#include "llvm/Transforms/Scalar/LoopSink.h"
177#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
178#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
179#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
180#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
181#include "llvm/Transforms/Scalar/LowerAtomic.h"
182#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
183#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
184#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
185#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
186#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
187#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
188#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
189#include "llvm/Transforms/Scalar/MergeICmps.h"
190#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
191#include "llvm/Transforms/Scalar/NaryReassociate.h"
192#include "llvm/Transforms/Scalar/NewGVN.h"
193#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
194#include "llvm/Transforms/Scalar/Reassociate.h"
195#include "llvm/Transforms/Scalar/Reg2Mem.h"
196#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
197#include "llvm/Transforms/Scalar/SCCP.h"
198#include "llvm/Transforms/Scalar/SROA.h"
199#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
200#include "llvm/Transforms/Scalar/Scalarizer.h"
201#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
202#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
203#include "llvm/Transforms/Scalar/SimplifyCFG.h"
204#include "llvm/Transforms/Scalar/Sink.h"
205#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
206#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
207#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
208#include "llvm/Transforms/Scalar/StructurizeCFG.h"
209#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
210#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
211#include "llvm/Transforms/Utils/AddDiscriminators.h"
212#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
213#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
214#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
215#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
216#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
217#include "llvm/Transforms/Utils/FixIrreducible.h"
218#include "llvm/Transforms/Utils/HelloWorld.h"
219#include "llvm/Transforms/Utils/InjectTLIMappings.h"
220#include "llvm/Transforms/Utils/InstructionNamer.h"
221#include "llvm/Transforms/Utils/LCSSA.h"
222#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
223#include "llvm/Transforms/Utils/LoopSimplify.h"
224#include "llvm/Transforms/Utils/LoopVersioning.h"
225#include "llvm/Transforms/Utils/LowerInvoke.h"
226#include "llvm/Transforms/Utils/LowerSwitch.h"
227#include "llvm/Transforms/Utils/Mem2Reg.h"
228#include "llvm/Transforms/Utils/MetaRenamer.h"
229#include "llvm/Transforms/Utils/NameAnonGlobals.h"
230#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
231#include "llvm/Transforms/Utils/StripGCRelocates.h"
232#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
233#include "llvm/Transforms/Utils/SymbolRewriter.h"
234#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
235#include "llvm/Transforms/Utils/UnifyLoopExits.h"
236#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
237#include "llvm/Transforms/Vectorize/LoopVectorize.h"
238#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
239#include "llvm/Transforms/Vectorize/VectorCombine.h"
240
241using namespace llvm;
242
243extern cl::opt<unsigned> MaxDevirtIterations;
244
245static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
246 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
247 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
248 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
249 "Heuristics-based inliner version."),
250 clEnumValN(InliningAdvisorMode::Development, "development",
251 "Use development mode (runtime-loadable model)."),
252 clEnumValN(InliningAdvisorMode::Release, "release",
253 "Use release mode (AOT-compiled model).")));
254
255static cl::opt<bool> EnableSyntheticCounts(
256 "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
257 cl::desc("Run synthetic function entry count generation "
258 "pass"));
259
260static const Regex DefaultAliasRegex(
261 "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
262
263/// Flag to enable inline deferral during PGO.
264static cl::opt<bool>
265 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
266 cl::Hidden,
267 cl::desc("Enable inline deferral during PGO"));
268
269static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
270 cl::Hidden, cl::ZeroOrMore,
271 cl::desc("Enable memory profiler"));
272
273static cl::opt<bool> PerformMandatoryInliningsFirst(
274 "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
275 cl::desc("Perform mandatory inlinings module-wide, before performing "
276 "inlining."));
277
278static cl::opt<bool> EnableO3NonTrivialUnswitching(
279 "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
280 cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
281
282PipelineTuningOptions::PipelineTuningOptions() {
283 LoopInterleaving = true;
284 LoopVectorization = true;
285 SLPVectorization = false;
286 LoopUnrolling = true;
287 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
288 Coroutines = false;
289 LicmMssaOptCap = SetLicmMssaOptCap;
290 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
291 CallGraphProfile = true;
292 MergeFunctions = false;
293}
294extern cl::opt<bool> ExtraVectorizerPasses;
295
296extern cl::opt<bool> EnableConstraintElimination;
297extern cl::opt<bool> EnableGVNHoist;
298extern cl::opt<bool> EnableGVNSink;
299extern cl::opt<bool> EnableHotColdSplit;
300extern cl::opt<bool> EnableIROutliner;
301extern cl::opt<bool> EnableOrderFileInstrumentation;
302extern cl::opt<bool> EnableCHR;
303extern cl::opt<bool> EnableLoopInterchange;
304extern cl::opt<bool> EnableUnrollAndJam;
305extern cl::opt<bool> EnableLoopFlatten;
306extern cl::opt<bool> RunNewGVN;
307extern cl::opt<bool> RunPartialInlining;
308
309extern cl::opt<bool> FlattenedProfileUsed;
310
311extern cl::opt<AttributorRunOption> AttributorRun;
312extern cl::opt<bool> EnableKnowledgeRetention;
313
314extern cl::opt<bool> EnableMatrix;
315
316extern cl::opt<bool> DisablePreInliner;
317extern cl::opt<int> PreInlineThreshold;
318
319const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
320 /*SpeedLevel*/ 0,
321 /*SizeLevel*/ 0};
322const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
323 /*SpeedLevel*/ 1,
324 /*SizeLevel*/ 0};
325const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
326 /*SpeedLevel*/ 2,
327 /*SizeLevel*/ 0};
328const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
329 /*SpeedLevel*/ 3,
330 /*SizeLevel*/ 0};
331const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
332 /*SpeedLevel*/ 2,
333 /*SizeLevel*/ 1};
334const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
335 /*SpeedLevel*/ 2,
336 /*SizeLevel*/ 2};
337
338namespace {
339
340// The following passes/analyses have custom names, otherwise their name will
341// include `(anonymous namespace)`. These are special since they are only for
342// testing purposes and don't live in a header file.
343
344/// No-op module pass which does nothing.
345struct NoOpModulePass : PassInfoMixin<NoOpModulePass> {
346 PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
347 return PreservedAnalyses::all();
348 }
349
350 static StringRef name() { return "NoOpModulePass"; }
351};
352
353/// No-op module analysis.
354class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> {
355 friend AnalysisInfoMixin<NoOpModuleAnalysis>;
356 static AnalysisKey Key;
357
358public:
359 struct Result {};
360 Result run(Module &, ModuleAnalysisManager &) { return Result(); }
361 static StringRef name() { return "NoOpModuleAnalysis"; }
362};
363
364/// No-op CGSCC pass which does nothing.
365struct NoOpCGSCCPass : PassInfoMixin<NoOpCGSCCPass> {
366 PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &,
367 LazyCallGraph &, CGSCCUpdateResult &UR) {
368 return PreservedAnalyses::all();
369 }
370 static StringRef name() { return "NoOpCGSCCPass"; }
371};
372
373/// No-op CGSCC analysis.
374class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> {
375 friend AnalysisInfoMixin<NoOpCGSCCAnalysis>;
376 static AnalysisKey Key;
377
378public:
379 struct Result {};
380 Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) {
381 return Result();
382 }
383 static StringRef name() { return "NoOpCGSCCAnalysis"; }
384};
385
386/// No-op function pass which does nothing.
387struct NoOpFunctionPass : PassInfoMixin<NoOpFunctionPass> {
388 PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
389 return PreservedAnalyses::all();
390 }
391 static StringRef name() { return "NoOpFunctionPass"; }
392};
393
394/// No-op function analysis.
395class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> {
396 friend AnalysisInfoMixin<NoOpFunctionAnalysis>;
397 static AnalysisKey Key;
398
399public:
400 struct Result {};
401 Result run(Function &, FunctionAnalysisManager &) { return Result(); }
402 static StringRef name() { return "NoOpFunctionAnalysis"; }
403};
404
405/// No-op loop pass which does nothing.
406struct NoOpLoopPass : PassInfoMixin<NoOpLoopPass> {
407 PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
408 LoopStandardAnalysisResults &, LPMUpdater &) {
409 return PreservedAnalyses::all();
410 }
411 static StringRef name() { return "NoOpLoopPass"; }
412};
413
414/// No-op loop analysis.
415class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> {
416 friend AnalysisInfoMixin<NoOpLoopAnalysis>;
417 static AnalysisKey Key;
418
419public:
420 struct Result {};
421 Result run(Loop &, LoopAnalysisManager &, LoopStandardAnalysisResults &) {
422 return Result();
423 }
424 static StringRef name() { return "NoOpLoopAnalysis"; }
425};
426
427AnalysisKey NoOpModuleAnalysis::Key;
428AnalysisKey NoOpCGSCCAnalysis::Key;
429AnalysisKey NoOpFunctionAnalysis::Key;
430AnalysisKey NoOpLoopAnalysis::Key;
431
432/// Whether or not we should populate a PassInstrumentationCallbacks's class to
433/// pass name map.
434///
435/// This is for optimization purposes so we don't populate it if we never use
436/// it. This should be updated if new pass instrumentation wants to use the map.
437/// We currently only use this for --print-before/after.
438bool shouldPopulateClassToPassNames() {
439 return !printBeforePasses().empty() || !printAfterPasses().empty();
440}
441
442} // namespace
443
444PassBuilder::PassBuilder(bool DebugLogging, TargetMachine *TM,
445 PipelineTuningOptions PTO, Optional<PGOOptions> PGOOpt,
446 PassInstrumentationCallbacks *PIC)
447 : DebugLogging(DebugLogging), TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {
448 if (TM)
449 TM->registerPassBuilderCallbacks(*this, DebugLogging);
450 if (PIC && shouldPopulateClassToPassNames()) {
451#define MODULE_PASS(NAME, CREATE_PASS) \
452 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
453#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
454 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
455#define FUNCTION_PASS(NAME, CREATE_PASS) \
456 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
457#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
458 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
459#define LOOP_PASS(NAME, CREATE_PASS) \
460 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
461#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
462 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
463#define CGSCC_PASS(NAME, CREATE_PASS) \
464 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
465#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
466 PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
467#include "PassRegistry.def"
468 }
469}
470
471void PassBuilder::invokePeepholeEPCallbacks(
472 FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
473 for (auto &C : PeepholeEPCallbacks)
474 C(FPM, Level);
475}
476
477void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
478#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
479 MAM.registerPass([&] { return CREATE_PASS; });
480#include "PassRegistry.def"
481
482 for (auto &C : ModuleAnalysisRegistrationCallbacks)
483 C(MAM);
484}
485
486void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
487#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
488 CGAM.registerPass([&] { return CREATE_PASS; });
489#include "PassRegistry.def"
490
491 for (auto &C : CGSCCAnalysisRegistrationCallbacks)
492 C(CGAM);
493}
494
495void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
496#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
497 FAM.registerPass([&] { return CREATE_PASS; });
498#include "PassRegistry.def"
499
500 for (auto &C : FunctionAnalysisRegistrationCallbacks)
501 C(FAM);
502}
503
504void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
505#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
506 LAM.registerPass([&] { return CREATE_PASS; });
507#include "PassRegistry.def"
508
509 for (auto &C : LoopAnalysisRegistrationCallbacks)
510 C(LAM);
511}
512
513// Helper to add AnnotationRemarksPass.
514static void addAnnotationRemarksPass(ModulePassManager &MPM) {
515 FunctionPassManager FPM;
516 FPM.addPass(AnnotationRemarksPass());
517 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
518}
519
520// Helper to check if the current compilation phase is preparing for LTO
521static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
522 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
523 Phase == ThinOrFullLTOPhase::FullLTOPreLink;
524}
525
526// TODO: Investigate the cost/benefit of tail call elimination on debugging.
527FunctionPassManager
528PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
529 ThinOrFullLTOPhase Phase) {
530
531 FunctionPassManager FPM(DebugLogging);
532
533 // Form SSA out of local memory accesses after breaking apart aggregates into
534 // scalars.
535 FPM.addPass(SROA());
536
537 // Catch trivial redundancies
538 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
539
540 // Hoisting of scalars and load expressions.
541 FPM.addPass(SimplifyCFGPass());
542 FPM.addPass(InstCombinePass());
543
544 FPM.addPass(LibCallsShrinkWrapPass());
545
546 invokePeepholeEPCallbacks(FPM, Level);
547
548 FPM.addPass(SimplifyCFGPass());
549
550 // Form canonically associated expression trees, and simplify the trees using
551 // basic mathematical properties. For example, this will form (nearly)
552 // minimal multiplication trees.
553 FPM.addPass(ReassociatePass());
554
555 // Add the primary loop simplification pipeline.
556 // FIXME: Currently this is split into two loop pass pipelines because we run
557 // some function passes in between them. These can and should be removed
558 // and/or replaced by scheduling the loop pass equivalents in the correct
559 // positions. But those equivalent passes aren't powerful enough yet.
560 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
561 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
562 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
563 // `LoopInstSimplify`.
564 LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
565
566 // Simplify the loop body. We do this initially to clean up after other loop
567 // passes run, either when iterating on a loop or on inner loops with
568 // implications on the outer loop.
569 LPM1.addPass(LoopInstSimplifyPass());
570 LPM1.addPass(LoopSimplifyCFGPass());
571
572 // Try to remove as much code from the loop header as possible,
573 // to reduce amount of IR that will have to be duplicated.
574 // TODO: Investigate promotion cap for O1.
575 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
576
577 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
578 isLTOPreLink(Phase)));
579 // TODO: Investigate promotion cap for O1.
580 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
581 LPM1.addPass(SimpleLoopUnswitchPass());
582
583 LPM2.addPass(LoopIdiomRecognizePass());
584 LPM2.addPass(IndVarSimplifyPass());
585
586 for (auto &C : LateLoopOptimizationsEPCallbacks)
587 C(LPM2, Level);
588
589 LPM2.addPass(LoopDeletionPass());
590
591 if (EnableLoopInterchange)
592 LPM2.addPass(LoopInterchangePass());
593
594 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
595 // because it changes IR to makes profile annotation in back compile
596 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
597 // attributes so we need to make sure and allow the full unroll pass to pay
598 // attention to it.
599 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
600 PGOOpt->Action != PGOOptions::SampleUse)
601 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
602 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
603 PTO.ForgetAllSCEVInLoopUnroll));
604
605 for (auto &C : LoopOptimizerEndEPCallbacks)
606 C(LPM2, Level);
607
608 // We provide the opt remark emitter pass for LICM to use. We only need to do
609 // this once as it is immutable.
610 FPM.addPass(
611 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
612 FPM.addPass(createFunctionToLoopPassAdaptor(
613 std::move(LPM1), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
614 DebugLogging));
615 FPM.addPass(SimplifyCFGPass());
616 FPM.addPass(InstCombinePass());
617 if (EnableLoopFlatten)
618 FPM.addPass(LoopFlattenPass());
619 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
620 // *All* loop passes must preserve it, in order to be able to use it.
621 FPM.addPass(createFunctionToLoopPassAdaptor(
622 std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false,
623 DebugLogging));
624
625 // Delete small array after loop unroll.
626 FPM.addPass(SROA());
627
628 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
629 FPM.addPass(MemCpyOptPass());
630
631 // Sparse conditional constant propagation.
632 // FIXME: It isn't clear why we do this *after* loop passes rather than
633 // before...
634 FPM.addPass(SCCPPass());
635
636 // Delete dead bit computations (instcombine runs after to fold away the dead
637 // computations, and then ADCE will run later to exploit any new DCE
638 // opportunities that creates).
639 FPM.addPass(BDCEPass());
640
641 // Run instcombine after redundancy and dead bit elimination to exploit
642 // opportunities opened up by them.
643 FPM.addPass(InstCombinePass());
644 invokePeepholeEPCallbacks(FPM, Level);
645
646 if (PTO.Coroutines)
647 FPM.addPass(CoroElidePass());
648
649 for (auto &C : ScalarOptimizerLateEPCallbacks)
650 C(FPM, Level);
651
652 // Finally, do an expensive DCE pass to catch all the dead code exposed by
653 // the simplifications and basic cleanup after all the simplifications.
654 // TODO: Investigate if this is too expensive.
655 FPM.addPass(ADCEPass());
656 FPM.addPass(SimplifyCFGPass());
657 FPM.addPass(InstCombinePass());
658 invokePeepholeEPCallbacks(FPM, Level);
659
660 return FPM;
661}
662
663FunctionPassManager
664PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
665 ThinOrFullLTOPhase Phase) {
666 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
667
668 // The O1 pipeline has a separate pipeline creation function to simplify
669 // construction readability.
670 if (Level.getSpeedupLevel() == 1)
671 return buildO1FunctionSimplificationPipeline(Level, Phase);
672
673 FunctionPassManager FPM(DebugLogging);
674
675 // Form SSA out of local memory accesses after breaking apart aggregates into
676 // scalars.
677 FPM.addPass(SROA());
678
679 // Catch trivial redundancies
680 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
681 if (EnableKnowledgeRetention)
682 FPM.addPass(AssumeSimplifyPass());
683
684 // Hoisting of scalars and load expressions.
685 if (EnableGVNHoist)
686 FPM.addPass(GVNHoistPass());
687
688 // Global value numbering based sinking.
689 if (EnableGVNSink) {
690 FPM.addPass(GVNSinkPass());
691 FPM.addPass(SimplifyCFGPass());
692 }
693
694 if (EnableConstraintElimination)
695 FPM.addPass(ConstraintEliminationPass());
696
697 // Speculative execution if the target has divergent branches; otherwise nop.
698 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
699
700 // Optimize based on known information about branches, and cleanup afterward.
701 FPM.addPass(JumpThreadingPass());
702 FPM.addPass(CorrelatedValuePropagationPass());
703
704 FPM.addPass(SimplifyCFGPass());
705 if (Level == OptimizationLevel::O3)
706 FPM.addPass(AggressiveInstCombinePass());
707 FPM.addPass(InstCombinePass());
708
709 if (!Level.isOptimizingForSize())
710 FPM.addPass(LibCallsShrinkWrapPass());
711
712 invokePeepholeEPCallbacks(FPM, Level);
713
714 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
715 // using the size value profile. Don't perform this when optimizing for size.
716 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
717 !Level.isOptimizingForSize())
718 FPM.addPass(PGOMemOPSizeOpt());
719
720 FPM.addPass(TailCallElimPass());
721 FPM.addPass(SimplifyCFGPass());
722
723 // Form canonically associated expression trees, and simplify the trees using
724 // basic mathematical properties. For example, this will form (nearly)
725 // minimal multiplication trees.
726 FPM.addPass(ReassociatePass());
727
728 // Add the primary loop simplification pipeline.
729 // FIXME: Currently this is split into two loop pass pipelines because we run
730 // some function passes in between them. These can and should be removed
731 // and/or replaced by scheduling the loop pass equivalents in the correct
732 // positions. But those equivalent passes aren't powerful enough yet.
733 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
734 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
735 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
736 // `LoopInstSimplify`.
737 LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
738
739 // Simplify the loop body. We do this initially to clean up after other loop
740 // passes run, either when iterating on a loop or on inner loops with
741 // implications on the outer loop.
742 LPM1.addPass(LoopInstSimplifyPass());
743 LPM1.addPass(LoopSimplifyCFGPass());
744
745 // Try to remove as much code from the loop header as possible,
746 // to reduce amount of IR that will have to be duplicated.
747 // TODO: Investigate promotion cap for O1.
748 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
749
750 // Disable header duplication in loop rotation at -Oz.
751 LPM1.addPass(
752 LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
753 // TODO: Investigate promotion cap for O1.
754 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
755 LPM1.addPass(
756 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
757 EnableO3NonTrivialUnswitching));
758 LPM2.addPass(LoopIdiomRecognizePass());
759 LPM2.addPass(IndVarSimplifyPass());
760
761 for (auto &C : LateLoopOptimizationsEPCallbacks)
762 C(LPM2, Level);
763
764 LPM2.addPass(LoopDeletionPass());
765
766 if (EnableLoopInterchange)
767 LPM2.addPass(LoopInterchangePass());
768
769 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
770 // because it changes IR to makes profile annotation in back compile
771 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
772 // attributes so we need to make sure and allow the full unroll pass to pay
773 // attention to it.
774 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
775 PGOOpt->Action != PGOOptions::SampleUse)
776 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
777 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
778 PTO.ForgetAllSCEVInLoopUnroll));
779
780 for (auto &C : LoopOptimizerEndEPCallbacks)
781 C(LPM2, Level);
782
783 // We provide the opt remark emitter pass for LICM to use. We only need to do
784 // this once as it is immutable.
785 FPM.addPass(
786 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
787 FPM.addPass(createFunctionToLoopPassAdaptor(
788 std::move(LPM1), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
789 DebugLogging));
790 FPM.addPass(SimplifyCFGPass());
791 FPM.addPass(InstCombinePass());
792 if (EnableLoopFlatten)
793 FPM.addPass(LoopFlattenPass());
794 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
795 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
796 // *All* loop passes must preserve it, in order to be able to use it.
797 FPM.addPass(createFunctionToLoopPassAdaptor(
798 std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false,
799 DebugLogging));
800
801 // Delete small array after loop unroll.
802 FPM.addPass(SROA());
803
804 // Eliminate redundancies.
805 FPM.addPass(MergedLoadStoreMotionPass());
806 if (RunNewGVN)
807 FPM.addPass(NewGVNPass());
808 else
809 FPM.addPass(GVN());
810
811 // Sparse conditional constant propagation.
812 // FIXME: It isn't clear why we do this *after* loop passes rather than
813 // before...
814 FPM.addPass(SCCPPass());
815
816 // Delete dead bit computations (instcombine runs after to fold away the dead
817 // computations, and then ADCE will run later to exploit any new DCE
818 // opportunities that creates).
819 FPM.addPass(BDCEPass());
820
821 // Run instcombine after redundancy and dead bit elimination to exploit
822 // opportunities opened up by them.
823 FPM.addPass(InstCombinePass());
824 invokePeepholeEPCallbacks(FPM, Level);
825
826 // Re-consider control flow based optimizations after redundancy elimination,
827 // redo DCE, etc.
828 FPM.addPass(JumpThreadingPass());
829 FPM.addPass(CorrelatedValuePropagationPass());
830
831 // Finally, do an expensive DCE pass to catch all the dead code exposed by
832 // the simplifications and basic cleanup after all the simplifications.
833 // TODO: Investigate if this is too expensive.
834 FPM.addPass(ADCEPass());
835
836 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
837 FPM.addPass(MemCpyOptPass());
838
839 FPM.addPass(DSEPass());
840 FPM.addPass(createFunctionToLoopPassAdaptor(
841 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
842 EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, DebugLogging));
843
844 if (PTO.Coroutines)
845 FPM.addPass(CoroElidePass());
846
847 for (auto &C : ScalarOptimizerLateEPCallbacks)
848 C(FPM, Level);
849
850 FPM.addPass(SimplifyCFGPass(
851 SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
852 FPM.addPass(InstCombinePass());
853 invokePeepholeEPCallbacks(FPM, Level);
854
855 if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
856 (PGOOpt->Action == PGOOptions::IRUse ||
857 PGOOpt->Action == PGOOptions::SampleUse))
858 FPM.addPass(ControlHeightReductionPass());
859
860 return FPM;
861}
862
863void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
864 MPM.addPass(CanonicalizeAliasesPass());
865 MPM.addPass(NameAnonGlobalPass());
866}
867
868void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
869 PassBuilder::OptimizationLevel Level,
870 bool RunProfileGen, bool IsCS,
871 std::string ProfileFile,
872 std::string ProfileRemappingFile) {
873 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
874 if (!IsCS && !DisablePreInliner) {
875 InlineParams IP;
876
877 IP.DefaultThreshold = PreInlineThreshold;
878
879 // FIXME: The hint threshold has the same value used by the regular inliner
880 // when not optimzing for size. This should probably be lowered after
881 // performance testing.
882 // FIXME: this comment is cargo culted from the old pass manager, revisit).
883 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
884 ModuleInlinerWrapperPass MIWP(IP, DebugLogging);
885 CGSCCPassManager &CGPipeline = MIWP.getPM();
886
887 FunctionPassManager FPM;
888 FPM.addPass(SROA());
889 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
890 FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
891 FPM.addPass(InstCombinePass()); // Combine silly sequences.
892 invokePeepholeEPCallbacks(FPM, Level);
893
894 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
895
896 MPM.addPass(std::move(MIWP));
897
898 // Delete anything that is now dead to make sure that we don't instrument
899 // dead code. Instrumentation can end up keeping dead code around and
900 // dramatically increase code size.
901 MPM.addPass(GlobalDCEPass());
902 }
903
904 if (!RunProfileGen) {
905 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
906 MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
907 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
908 // RequireAnalysisPass for PSI before subsequent non-module passes.
909 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
910 return;
911 }
912
913 // Perform PGO instrumentation.
914 MPM.addPass(PGOInstrumentationGen(IsCS));
915
916 FunctionPassManager FPM;
917 // Disable header duplication in loop rotation at -Oz.
918 FPM.addPass(createFunctionToLoopPassAdaptor(
919 LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency,
920 /*UseBlockFrequencyInfo=*/false, DebugLogging));
921 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
922
923 // Add the profile lowering pass.
924 InstrProfOptions Options;
925 if (!ProfileFile.empty())
926 Options.InstrProfileOutput = ProfileFile;
927 // Do counter promotion at Level greater than O0.
928 Options.DoCounterPromotion = true;
929 Options.UseBFIInPromotion = IsCS;
930 MPM.addPass(InstrProfiling(Options, IsCS));
931}
932
933void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
934 bool RunProfileGen, bool IsCS,
935 std::string ProfileFile,
936 std::string ProfileRemappingFile) {
937 if (!RunProfileGen) {
938 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
939 MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
940 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
941 // RequireAnalysisPass for PSI before subsequent non-module passes.
942 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
943 return;
944 }
945
946 // Perform PGO instrumentation.
947 MPM.addPass(PGOInstrumentationGen(IsCS));
948 // Add the profile lowering pass.
949 InstrProfOptions Options;
950 if (!ProfileFile.empty())
951 Options.InstrProfileOutput = ProfileFile;
952 // Do not do counter promotion at O0.
953 Options.DoCounterPromotion = false;
954 Options.UseBFIInPromotion = IsCS;
955 MPM.addPass(InstrProfiling(Options, IsCS));
956}
957
958static InlineParams
959getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
960 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
961}
962
963ModuleInlinerWrapperPass
964PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
965 ThinOrFullLTOPhase Phase) {
966 InlineParams IP = getInlineParamsFromOptLevel(Level);
967 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
968 PGOOpt->Action == PGOOptions::SampleUse)
969 IP.HotCallSiteThreshold = 0;
970
971 if (PGOOpt)
972 IP.EnableDeferral = EnablePGOInlineDeferral;
973
974 ModuleInlinerWrapperPass MIWP(IP, DebugLogging,
975 PerformMandatoryInliningsFirst,
976 UseInlineAdvisor, MaxDevirtIterations);
977
978 // Require the GlobalsAA analysis for the module so we can query it within
979 // the CGSCC pipeline.
980 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
981 // Invalidate AAManager so it can be recreated and pick up the newly available
982 // GlobalsAA.
983 MIWP.addModulePass(
984 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
985
986 // Require the ProfileSummaryAnalysis for the module so we can query it within
987 // the inliner pass.
988 MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
989
990 // Now begin the main postorder CGSCC pipeline.
991 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
992 // manager and trying to emulate its precise behavior. Much of this doesn't
993 // make a lot of sense and we should revisit the core CGSCC structure.
994 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
995
996 // Note: historically, the PruneEH pass was run first to deduce nounwind and
997 // generally clean up exception handling overhead. It isn't clear this is
998 // valuable as the inliner doesn't currently care whether it is inlining an
999 // invoke or a call.
1000
1001 if (AttributorRun & AttributorRunOption::CGSCC)
1002 MainCGPipeline.addPass(AttributorCGSCCPass());
1003
1004 if (PTO.Coroutines)
1005 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1006
1007 // Now deduce any function attributes based in the current code.
1008 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1009
1010 // When at O3 add argument promotion to the pass pipeline.
1011 // FIXME: It isn't at all clear why this should be limited to O3.
1012 if (Level == OptimizationLevel::O3)
1013 MainCGPipeline.addPass(ArgumentPromotionPass());
1014
1015 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1016 // there are no OpenMP runtime calls present in the module.
1017 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1018 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
1019
1020 for (auto &C : CGSCCOptimizerLateEPCallbacks)
1021 C(MainCGPipeline, Level);
1022
1023 // Lastly, add the core function simplification pipeline nested inside the
1024 // CGSCC walk.
1025 MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
1026 buildFunctionSimplificationPipeline(Level, Phase)));
1027
1028 return MIWP;
1029}
1030
1031ModulePassManager
1032PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1033 ThinOrFullLTOPhase Phase) {
1034 ModulePassManager MPM(DebugLogging);
1035
1036 // Place pseudo probe instrumentation as the first pass of the pipeline to
1037 // minimize the impact of optimization changes.
1038 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1039 Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1040 MPM.addPass(SampleProfileProbePass(TM));
1041
1042 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1043
1044 // In ThinLTO mode, when flattened profile is used, all the available
1045 // profile information will be annotated in PreLink phase so there is
1046 // no need to load the profile again in PostLink.
1047 bool LoadSampleProfile =
1048 HasSampleProfile &&
1049 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1050
1051 // During the ThinLTO backend phase we perform early indirect call promotion
1052 // here, before globalopt. Otherwise imported available_externally functions
1053 // look unreferenced and are removed. If we are going to load the sample
1054 // profile then defer until later.
1055 // TODO: See if we can move later and consolidate with the location where
1056 // we perform ICP when we are loading a sample profile.
1057 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1058 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1059 // determine whether the new direct calls are annotated with prof metadata.
1060 // Ideally this should be determined from whether the IR is annotated with
1061 // sample profile, and not whether the a sample profile was provided on the
1062 // command line. E.g. for flattened profiles where we will not be reloading
1063 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1064 // provide the sample profile file.
1065 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1066 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1067
1068 // Do basic inference of function attributes from known properties of system
1069 // libraries and other oracles.
1070 MPM.addPass(InferFunctionAttrsPass());
1071
1072 // Create an early function pass manager to cleanup the output of the
1073 // frontend.
1074 FunctionPassManager EarlyFPM(DebugLogging);
1075 // Lower llvm.expect to metadata before attempting transforms.
1076 // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
1077 EarlyFPM.addPass(LowerExpectIntrinsicPass());
1078 EarlyFPM.addPass(SimplifyCFGPass());
1079 EarlyFPM.addPass(SROA());
1080 EarlyFPM.addPass(EarlyCSEPass());
1081 if (PTO.Coroutines)
1082 EarlyFPM.addPass(CoroEarlyPass());
1083 if (Level == OptimizationLevel::O3)
1084 EarlyFPM.addPass(CallSiteSplittingPass());
1085
1086 // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
1087 // to convert bitcast to direct calls so that they can be inlined during the
1088 // profile annotation prepration step.
1089 // More details about SamplePGO design can be found in:
1090 // https://research.google.com/pubs/pub45290.html
1091 // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
1092 if (LoadSampleProfile)
1093 EarlyFPM.addPass(InstCombinePass());
1094 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
1095
1096 if (LoadSampleProfile) {
1097 // Annotate sample profile right after early FPM to ensure freshness of
1098 // the debug info.
1099 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1100 PGOOpt->ProfileRemappingFile, Phase));
1101 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1102 // RequireAnalysisPass for PSI before subsequent non-module passes.
1103 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1104 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1105 // for the profile annotation to be accurate in the LTO backend.
1106 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
1107 Phase != ThinOrFullLTOPhase::FullLTOPreLink)
1108 // We perform early indirect call promotion here, before globalopt.
1109 // This is important for the ThinLTO backend phase because otherwise
1110 // imported available_externally functions look unreferenced and are
1111 // removed.
1112 MPM.addPass(
1113 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1114 }
1115
1116 // Try to perform OpenMP specific optimizations on the module. This is a
1117 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1118 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1119 MPM.addPass(OpenMPOptPass());
1120
1121 if (AttributorRun & AttributorRunOption::MODULE)
1122 MPM.addPass(AttributorPass());
1123
1124 // Lower type metadata and the type.test intrinsic in the ThinLTO
1125 // post link pipeline after ICP. This is to enable usage of the type
1126 // tests in ICP sequences.
1127 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1128 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1129
1130 for (auto &C : PipelineEarlySimplificationEPCallbacks)
1131 C(MPM, Level);
1132
1133 // Interprocedural constant propagation now that basic cleanup has occurred
1134 // and prior to optimizing globals.
1135 // FIXME: This position in the pipeline hasn't been carefully considered in
1136 // years, it should be re-analyzed.
1137 MPM.addPass(IPSCCPPass());
1138
1139 // Attach metadata to indirect call sites indicating the set of functions
1140 // they may target at run-time. This should follow IPSCCP.
1141 MPM.addPass(CalledValuePropagationPass());
1142
1143 // Optimize globals to try and fold them into constants.
1144 MPM.addPass(GlobalOptPass());
1145
1146 // Promote any localized globals to SSA registers.
1147 // FIXME: Should this instead by a run of SROA?
1148 // FIXME: We should probably run instcombine and simplify-cfg afterward to
1149 // delete control flows that are dead once globals have been folded to
1150 // constants.
1151 MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1152
1153 // Remove any dead arguments exposed by cleanups and constant folding
1154 // globals.
1155 MPM.addPass(DeadArgumentEliminationPass());
1156
1157 // Create a small function pass pipeline to cleanup after all the global
1158 // optimizations.
1159 FunctionPassManager GlobalCleanupPM(DebugLogging);
1160 GlobalCleanupPM.addPass(InstCombinePass());
1161 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1162
1163 GlobalCleanupPM.addPass(SimplifyCFGPass());
1164 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
1165
1166 // Add all the requested passes for instrumentation PGO, if requested.
1167 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1168 (PGOOpt->Action == PGOOptions::IRInstr ||
1169 PGOOpt->Action == PGOOptions::IRUse)) {
1170 addPGOInstrPasses(MPM, Level,
1171 /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
1172 /* IsCS */ false, PGOOpt->ProfileFile,
1173 PGOOpt->ProfileRemappingFile);
1174 MPM.addPass(PGOIndirectCallPromotion(false, false));
1175 }
1176 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1177 PGOOpt->CSAction == PGOOptions::CSIRInstr)
1178 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1179
1180 // Synthesize function entry counts for non-PGO compilation.
1181 if (EnableSyntheticCounts && !PGOOpt)
1182 MPM.addPass(SyntheticCountsPropagation());
1183
1184 MPM.addPass(buildInlinerPipeline(Level, Phase));
1185
1186 if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1187 MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
1188 MPM.addPass(ModuleMemProfilerPass());
1189 }
1190
1191 return MPM;
1192}
1193
1194ModulePassManager
1195PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1196 bool LTOPreLink) {
1197 ModulePassManager MPM(DebugLogging);
1198
1199 // Optimize globals now that the module is fully simplified.
1200 MPM.addPass(GlobalOptPass());
1201 MPM.addPass(GlobalDCEPass());
1202
1203 // Run partial inlining pass to partially inline functions that have
1204 // large bodies.
1205 if (RunPartialInlining)
1206 MPM.addPass(PartialInlinerPass());
1207
1208 // Remove avail extern fns and globals definitions since we aren't compiling
1209 // an object file for later LTO. For LTO we want to preserve these so they
1210 // are eligible for inlining at link-time. Note if they are unreferenced they
1211 // will be removed by GlobalDCE later, so this only impacts referenced
1212 // available externally globals. Eventually they will be suppressed during
1213 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1214 // may make globals referenced by available external functions dead and saves
1215 // running remaining passes on the eliminated functions. These should be
1216 // preserved during prelinking for link-time inlining decisions.
1217 if (!LTOPreLink)
1218 MPM.addPass(EliminateAvailableExternallyPass());
1219
1220 if (EnableOrderFileInstrumentation)
1221 MPM.addPass(InstrOrderFilePass());
1222
1223 // Do RPO function attribute inference across the module to forward-propagate
1224 // attributes where applicable.
1225 // FIXME: Is this really an optimization rather than a canonicalization?
1226 MPM.addPass(ReversePostOrderFunctionAttrsPass());
1227
1228 // Do a post inline PGO instrumentation and use pass. This is a context
1229 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1230 // cross-module inline has not been done yet. The context sensitive
1231 // instrumentation is after all the inlines are done.
1232 if (!LTOPreLink && PGOOpt) {
1233 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1234 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1235 /* IsCS */ true, PGOOpt->CSProfileGenFile,
1236 PGOOpt->ProfileRemappingFile);
1237 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1238 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1239 /* IsCS */ true, PGOOpt->ProfileFile,
1240 PGOOpt->ProfileRemappingFile);
1241 }
1242
1243 // Re-require GloblasAA here prior to function passes. This is particularly
1244 // useful as the above will have inlined, DCE'ed, and function-attr
1245 // propagated everything. We should at this point have a reasonably minimal
1246 // and richly annotated call graph. By computing aliasing and mod/ref
1247 // information for all local globals here, the late loop passes and notably
1248 // the vectorizer will be able to use them to help recognize vectorizable
1249 // memory operations.
1250 MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
1251
1252 FunctionPassManager OptimizePM(DebugLogging);
1253 OptimizePM.addPass(Float2IntPass());
1254 OptimizePM.addPass(LowerConstantIntrinsicsPass());
1255
1256 if (EnableMatrix) {
1257 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1258 OptimizePM.addPass(EarlyCSEPass());
1259 }
1260
1261 // FIXME: We need to run some loop optimizations to re-rotate loops after
1262 // simplify-cfg and others undo their rotation.
1263
1264 // Optimize the loop execution. These passes operate on entire loop nests
1265 // rather than on each loop in an inside-out manner, and so they are actually
1266 // function passes.
1267
1268 for (auto &C : VectorizerStartEPCallbacks)
1269 C(OptimizePM, Level);
1270
1271 // First rotate loops that may have been un-rotated by prior passes.
1272 // Disable header duplication at -Oz.
1273 OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1274 LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
1275 EnableMSSALoopDependency,
1276 /*UseBlockFrequencyInfo=*/false, DebugLogging));
1277
1278 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1279 // into separate loop that would otherwise inhibit vectorization. This is
1280 // currently only performed for loops marked with the metadata
1281 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1282 OptimizePM.addPass(LoopDistributePass());
1283
1284 // Populates the VFABI attribute with the scalar-to-vector mappings
1285 // from the TargetLibraryInfo.
1286 OptimizePM.addPass(InjectTLIMappings());
1287
1288 // Now run the core loop vectorizer.
1289 OptimizePM.addPass(LoopVectorizePass(
1290 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1291
1292 // Eliminate loads by forwarding stores from the previous iteration to loads
1293 // of the current iteration.
1294 OptimizePM.addPass(LoopLoadEliminationPass());
1295
1296 // Cleanup after the loop optimization passes.
1297 OptimizePM.addPass(InstCombinePass());
1298
1299 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1300 // At higher optimization levels, try to clean up any runtime overlap and
1301 // alignment checks inserted by the vectorizer. We want to track correlated
1302 // runtime checks for two inner loops in the same outer loop, fold any
1303 // common computations, hoist loop-invariant aspects out of any outer loop,
1304 // and unswitch the runtime checks if possible. Once hoisted, we may have
1305 // dead (or speculatable) control flows or more combining opportunities.
1306 OptimizePM.addPass(EarlyCSEPass());
1307 OptimizePM.addPass(CorrelatedValuePropagationPass());
1308 OptimizePM.addPass(InstCombinePass());
1309 LoopPassManager LPM(DebugLogging);
1310 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
1311 LPM.addPass(
1312 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
1313 OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
1314 OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1315 std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
1316 DebugLogging));
1317 OptimizePM.addPass(SimplifyCFGPass());
1318 OptimizePM.addPass(InstCombinePass());
1319 }
1320
1321 // Now that we've formed fast to execute loop structures, we do further
1322 // optimizations. These are run afterward as they might block doing complex
1323 // analyses and transforms such as what are needed for loop vectorization.
1324
1325 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1326 // GVN, loop transforms, and others have already run, so it's now better to
1327 // convert to more optimized IR using more aggressive simplify CFG options.
1328 // The extra sinking transform can create larger basic blocks, so do this
1329 // before SLP vectorization.
1330 OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1331 .forwardSwitchCondToPhi(true)
1332 .convertSwitchToLookupTable(true)
1333 .needCanonicalLoops(false)
1334 .hoistCommonInsts(true)
1335 .sinkCommonInsts(true)));
1336
1337 // Optimize parallel scalar instruction chains into SIMD instructions.
1338 if (PTO.SLPVectorization) {
1339 OptimizePM.addPass(SLPVectorizerPass());
1340 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1341 OptimizePM.addPass(EarlyCSEPass());
1342 }
1343 }
1344
1345 // Enhance/cleanup vector code.
1346 OptimizePM.addPass(VectorCombinePass());
1347 OptimizePM.addPass(InstCombinePass());
1348
1349 // Unroll small loops to hide loop backedge latency and saturate any parallel
1350 // execution resources of an out-of-order processor. We also then need to
1351 // clean up redundancies and loop invariant code.
1352 // FIXME: It would be really good to use a loop-integrated instruction
1353 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1354 // across the loop nests.
1355 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1356 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1357 OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
1358 }
1359 OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(
1360 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1361 PTO.ForgetAllSCEVInLoopUnroll)));
1362 OptimizePM.addPass(WarnMissedTransformationsPass());
1363 OptimizePM.addPass(InstCombinePass());
1364 OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
1365 OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1366 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
1367 EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true, DebugLogging));
1368
1369 // Now that we've vectorized and unrolled loops, we may have more refined
1370 // alignment information, try to re-derive it here.
1371 OptimizePM.addPass(AlignmentFromAssumptionsPass());
1372
1373 // Split out cold code. Splitting is done late to avoid hiding context from
1374 // other optimizations and inadvertently regressing performance. The tradeoff
1375 // is that this has a higher code size cost than splitting early.
1376 if (EnableHotColdSplit && !LTOPreLink)
1377 MPM.addPass(HotColdSplittingPass());
1378
1379 // Search the code for similar regions of code. If enough similar regions can
1380 // be found where extracting the regions into their own function will decrease
1381 // the size of the program, we extract the regions, a deduplicate the
1382 // structurally similar regions.
1383 if (EnableIROutliner)
1384 MPM.addPass(IROutlinerPass());
1385
1386 // Merge functions if requested.
1387 if (PTO.MergeFunctions)
1388 MPM.addPass(MergeFunctionsPass());
1389
1390 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1391 // canonicalization pass that enables other optimizations. As a result,
1392 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1393 // result too early.
1394 OptimizePM.addPass(LoopSinkPass());
1395
1396 // And finally clean up LCSSA form before generating code.
1397 OptimizePM.addPass(InstSimplifyPass());
1398
1399 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1400 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1401 // flattening of blocks.
1402 OptimizePM.addPass(DivRemPairsPass());
1403
1404 // LoopSink (and other loop passes since the last simplifyCFG) might have
1405 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1406 OptimizePM.addPass(SimplifyCFGPass());
1407
1408 // Optimize PHIs by speculating around them when profitable. Note that this
1409 // pass needs to be run after any PRE or similar pass as it is essentially
1410 // inserting redundancies into the program. This even includes SimplifyCFG.
1411 OptimizePM.addPass(SpeculateAroundPHIsPass());
1412
1413 if (PTO.Coroutines)
1414 OptimizePM.addPass(CoroCleanupPass());
1415
1416 // Add the core optimizing pipeline.
1417 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
1418
1419 for (auto &C : OptimizerLastEPCallbacks)
1420 C(MPM, Level);
1421
1422 if (PTO.CallGraphProfile)
1423 MPM.addPass(CGProfilePass());
1424
1425 // Now we need to do some global optimization transforms.
1426 // FIXME: It would seem like these should come first in the optimization
1427 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1428 // ordering here.
1429 MPM.addPass(GlobalDCEPass());
1430 MPM.addPass(ConstantMergePass());
1431
1432 // TODO: Relative look table converter pass caused an issue when full lto is
1433 // enabled. See https://reviews.llvm.org/D94355 for more details.
1434 // Until the issue fixed, disable this pass during pre-linking phase.
1435 if (!LTOPreLink)
1436 MPM.addPass(RelLookupTableConverterPass());
1437
1438 return MPM;
1439}
1440
1441ModulePassManager
1442PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1443 bool LTOPreLink) {
1444 assert(Level != OptimizationLevel::O0 &&
1445 "Must request optimizations for the default pipeline!");
1446
1447 ModulePassManager MPM(DebugLogging);
1448
1449 // Convert @llvm.global.annotations to !annotation metadata.
1450 MPM.addPass(Annotation2MetadataPass());
1451
1452 // Force any function attributes we want the rest of the pipeline to observe.
1453 MPM.addPass(ForceFunctionAttrsPass());
1454
1455 // Apply module pipeline start EP callback.
1456 for (auto &C : PipelineStartEPCallbacks)
1457 C(MPM, Level);
1458
1459 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1460 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1461
1462 // Add the core simplification pipeline.
1463 MPM.addPass(buildModuleSimplificationPipeline(
1464 Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
1465 : ThinOrFullLTOPhase::None));
1466
1467 // Now add the optimization pipeline.
1468 MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
1469
1470 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1471 MPM.addPass(PseudoProbeUpdatePass());
1472
1473 // Emit annotation remarks.
1474 addAnnotationRemarksPass(MPM);
1475
1476 if (LTOPreLink)
1477 addRequiredLTOPreLinkPasses(MPM);
1478
1479 return MPM;
1480}
1481
1482ModulePassManager
1483PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1484 assert(Level != OptimizationLevel::O0 &&
1485 "Must request optimizations for the default pipeline!");
1486
1487 ModulePassManager MPM(DebugLogging);
1488
1489 // Convert @llvm.global.annotations to !annotation metadata.
1490 MPM.addPass(Annotation2MetadataPass());
1491
1492 // Force any function attributes we want the rest of the pipeline to observe.
1493 MPM.addPass(ForceFunctionAttrsPass());
1494
1495 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1496 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1497
1498 // Apply module pipeline start EP callback.
1499 for (auto &C : PipelineStartEPCallbacks)
1500 C(MPM, Level);
1501
1502 // If we are planning to perform ThinLTO later, we don't bloat the code with
1503 // unrolling/vectorization/... now. Just simplify the module as much as we
1504 // can.
1505 MPM.addPass(buildModuleSimplificationPipeline(
1506 Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1507
1508 // Run partial inlining pass to partially inline functions that have
1509 // large bodies.
1510 // FIXME: It isn't clear whether this is really the right place to run this
1511 // in ThinLTO. Because there is another canonicalization and simplification
1512 // phase that will run after the thin link, running this here ends up with
1513 // less information than will be available later and it may grow functions in
1514 // ways that aren't beneficial.
1515 if (RunPartialInlining)
1516 MPM.addPass(PartialInlinerPass());
1517
1518 // Reduce the size of the IR as much as possible.
1519 MPM.addPass(GlobalOptPass());
1520
1521 // Module simplification splits coroutines, but does not fully clean up
1522 // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
1523 // on these, we schedule the cleanup here.
1524 if (PTO.Coroutines)
1525 MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
1526
1527 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1528 MPM.addPass(PseudoProbeUpdatePass());
1529
1530 // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
1531 // optimization is going to be done in PostLink stage, but clang can't
1532 // add callbacks there in case of in-process ThinLTO called by linker.
1533 for (auto &C : OptimizerLastEPCallbacks)
1534 C(MPM, Level);
1535
1536 // Emit annotation remarks.
1537 addAnnotationRemarksPass(MPM);
1538
1539 addRequiredLTOPreLinkPasses(MPM);
1540
1541 return MPM;
1542}
1543
1544ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1545 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1546 ModulePassManager MPM(DebugLogging);
1547
1548 // Convert @llvm.global.annotations to !annotation metadata.
1549 MPM.addPass(Annotation2MetadataPass());
1550
1551 if (ImportSummary) {
1552 // These passes import type identifier resolutions for whole-program
1553 // devirtualization and CFI. They must run early because other passes may
1554 // disturb the specific instruction patterns that these passes look for,
1555 // creating dependencies on resolutions that may not appear in the summary.
1556 //
1557 // For example, GVN may transform the pattern assume(type.test) appearing in
1558 // two basic blocks into assume(phi(type.test, type.test)), which would
1559 // transform a dependency on a WPD resolution into a dependency on a type
1560 // identifier resolution for CFI.
1561 //
1562 // Also, WPD has access to more precise information than ICP and can
1563 // devirtualize more effectively, so it should operate on the IR first.
1564 //
1565 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1566 // metadata and intrinsics.
1567 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1568 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1569 }
1570
1571 if (Level == OptimizationLevel::O0) {
1572 // Run a second time to clean up any type tests left behind by WPD for use
1573 // in ICP.
1574 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1575 // Drop available_externally and unreferenced globals. This is necessary
1576 // with ThinLTO in order to avoid leaving undefined references to dead
1577 // globals in the object file.
1578 MPM.addPass(EliminateAvailableExternallyPass());
1579 MPM.addPass(GlobalDCEPass());
1580 return MPM;
1581 }
1582
1583 // Force any function attributes we want the rest of the pipeline to observe.
1584 MPM.addPass(ForceFunctionAttrsPass());
1585
1586 // Add the core simplification pipeline.
1587 MPM.addPass(buildModuleSimplificationPipeline(
1588 Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1589
1590 // Now add the optimization pipeline.
1591 MPM.addPass(buildModuleOptimizationPipeline(Level));
1592
1593 // Emit annotation remarks.
1594 addAnnotationRemarksPass(MPM);
1595
1596 return MPM;
1597}
1598
1599ModulePassManager
1600PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1601 assert(Level != OptimizationLevel::O0 &&
1602 "Must request optimizations for the default pipeline!");
1603 // FIXME: We should use a customized pre-link pipeline!
1604 return buildPerModuleDefaultPipeline(Level,
1605 /* LTOPreLink */ true);
1606}
1607
1608ModulePassManager
1609PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1610 ModuleSummaryIndex *ExportSummary) {
1611 ModulePassManager MPM(DebugLogging);
1612
1613 // Convert @llvm.global.annotations to !annotation metadata.
1614 MPM.addPass(Annotation2MetadataPass());
1615
1616 if (Level == OptimizationLevel::O0) {
1617 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1618 // metadata and intrinsics.
1619 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1620 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1621 // Run a second time to clean up any type tests left behind by WPD for use
1622 // in ICP.
1623 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1624
1625 // Emit annotation remarks.
1626 addAnnotationRemarksPass(MPM);
1627
1628 return MPM;
1629 }
1630
1631 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1632 // Load sample profile before running the LTO optimization pipeline.
1633 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1634 PGOOpt->ProfileRemappingFile,
1635 ThinOrFullLTOPhase::FullLTOPostLink));
1636 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1637 // RequireAnalysisPass for PSI before subsequent non-module passes.
1638 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1639 }
1640
1641 // Remove unused virtual tables to improve the quality of code generated by
1642 // whole-program devirtualization and bitset lowering.
1643 MPM.addPass(GlobalDCEPass());
1644
1645 // Force any function attributes we want the rest of the pipeline to observe.
1646 MPM.addPass(ForceFunctionAttrsPass());
1647
1648 // Do basic inference of function attributes from known properties of system
1649 // libraries and other oracles.
1650 MPM.addPass(InferFunctionAttrsPass());
1651
1652 if (Level.getSpeedupLevel() > 1) {
1653 FunctionPassManager EarlyFPM(DebugLogging);
1654 EarlyFPM.addPass(CallSiteSplittingPass());
1655 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
1656
1657 // Indirect call promotion. This should promote all the targets that are
1658 // left by the earlier promotion pass that promotes intra-module targets.
1659 // This two-step promotion is to save the compile time. For LTO, it should
1660 // produce the same result as if we only do promotion here.
1661 MPM.addPass(PGOIndirectCallPromotion(
1662 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1663 // Propagate constants at call sites into the functions they call. This
1664 // opens opportunities for globalopt (and inlining) by substituting function
1665 // pointers passed as arguments to direct uses of functions.
1666 MPM.addPass(IPSCCPPass());
1667
1668 // Attach metadata to indirect call sites indicating the set of functions
1669 // they may target at run-time. This should follow IPSCCP.
1670 MPM.addPass(CalledValuePropagationPass());
1671 }
1672
1673 // Now deduce any function attributes based in the current code.
1674 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1675 PostOrderFunctionAttrsPass()));
1676
1677 // Do RPO function attribute inference across the module to forward-propagate
1678 // attributes where applicable.
1679 // FIXME: Is this really an optimization rather than a canonicalization?
1680 MPM.addPass(ReversePostOrderFunctionAttrsPass());
1681
1682 // Use in-range annotations on GEP indices to split globals where beneficial.
1683 MPM.addPass(GlobalSplitPass());
1684
1685 // Run whole program optimization of virtual call when the list of callees
1686 // is fixed.
1687 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1688
1689 // Stop here at -O1.
1690 if (Level == OptimizationLevel::O1) {
1691 // The LowerTypeTestsPass needs to run to lower type metadata and the
1692 // type.test intrinsics. The pass does nothing if CFI is disabled.
1693 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1694 // Run a second time to clean up any type tests left behind by WPD for use
1695 // in ICP (which is performed earlier than this in the regular LTO
1696 // pipeline).
1697 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1698
1699 // Emit annotation remarks.
1700 addAnnotationRemarksPass(MPM);
1701
1702 return MPM;
1703 }
1704
1705 // Optimize globals to try and fold them into constants.
1706 MPM.addPass(GlobalOptPass());
1707
1708 // Promote any localized globals to SSA registers.
1709 MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1710
1711 // Linking modules together can lead to duplicate global constant, only
1712 // keep one copy of each constant.
1713 MPM.addPass(ConstantMergePass());
1714
1715 // Remove unused arguments from functions.
1716 MPM.addPass(DeadArgumentEliminationPass());
1717
1718 // Reduce the code after globalopt and ipsccp. Both can open up significant
1719 // simplification opportunities, and both can propagate functions through
1720 // function pointers. When this happens, we often have to resolve varargs
1721 // calls, etc, so let instcombine do this.
1722 FunctionPassManager PeepholeFPM(DebugLogging);
1723 if (Level == OptimizationLevel::O3)
1724 PeepholeFPM.addPass(AggressiveInstCombinePass());
1725 PeepholeFPM.addPass(InstCombinePass());
1726 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1727
1728 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
1729
1730 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1731 // generally clean up exception handling overhead. It isn't clear this is
1732 // valuable as the inliner doesn't currently care whether it is inlining an
1733 // invoke or a call.
1734 // Run the inliner now.
1735 MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level),
1736 DebugLogging));
1737
1738 // Optimize globals again after we ran the inliner.
1739 MPM.addPass(GlobalOptPass());
1740
1741 // Garbage collect dead functions.
1742 // FIXME: Add ArgumentPromotion pass after once it's ported.
1743 MPM.addPass(GlobalDCEPass());
1744
1745 FunctionPassManager FPM(DebugLogging);
1746 // The IPO Passes may leave cruft around. Clean up after them.
1747 FPM.addPass(InstCombinePass());
1748 invokePeepholeEPCallbacks(FPM, Level);
1749
1750 FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
1751
1752 // Do a post inline PGO instrumentation and use pass. This is a context
1753 // sensitive PGO pass.
1754 if (PGOOpt) {
1755 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1756 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1757 /* IsCS */ true, PGOOpt->CSProfileGenFile,
1758 PGOOpt->ProfileRemappingFile);
1759 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1760 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1761 /* IsCS */ true, PGOOpt->ProfileFile,
1762 PGOOpt->ProfileRemappingFile);
1763 }
1764
1765 // Break up allocas
1766 FPM.addPass(SROA());
1767
1768 // LTO provides additional opportunities for tailcall elimination due to
1769 // link-time inlining, and visibility of nocapture attribute.
1770 FPM.addPass(TailCallElimPass());
1771
1772 // Run a few AA driver optimizations here and now to cleanup the code.
1773 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
1774
1775 MPM.addPass(
1776 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1777 // FIXME: here we run IP alias analysis in the legacy PM.
1778
1779 FunctionPassManager MainFPM;
1780
1781 MainFPM.addPass(createFunctionToLoopPassAdaptor(
1782 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)));
1783
1784 if (RunNewGVN)
1785 MainFPM.addPass(NewGVNPass());
1786 else
1787 MainFPM.addPass(GVN());
1788
1789 // Remove dead memcpy()'s.
1790 MainFPM.addPass(MemCpyOptPass());
1791
1792 // Nuke dead stores.
1793 MainFPM.addPass(DSEPass());
1794 MainFPM.addPass(MergedLoadStoreMotionPass());
1795
1796 // More loops are countable; try to optimize them.
1797 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1798 MainFPM.addPass(LoopFlattenPass());
1799
1800 if (EnableConstraintElimination)
1801 MainFPM.addPass(ConstraintEliminationPass());
1802
1803 LoopPassManager LPM(DebugLogging);
1804 LPM.addPass(IndVarSimplifyPass());
1805 LPM.addPass(LoopDeletionPass());
1806 // FIXME: Add loop interchange.
1807
1808 // Unroll small loops and perform peeling.
1809 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1810 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1811 PTO.ForgetAllSCEVInLoopUnroll));
1812 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1813 // *All* loop passes must preserve it, in order to be able to use it.
1814 MainFPM.addPass(createFunctionToLoopPassAdaptor(
1815 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true,
1816 DebugLogging));
1817
1818 MainFPM.addPass(LoopDistributePass());
1819 MainFPM.addPass(LoopVectorizePass(
1820 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1821 // The vectorizer may have significantly shortened a loop body; unroll again.
1822 MainFPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1823 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1824 PTO.ForgetAllSCEVInLoopUnroll)));
1825
1826 MainFPM.addPass(WarnMissedTransformationsPass());
1827
1828 MainFPM.addPass(InstCombinePass());
1829 MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
1830 MainFPM.addPass(SCCPPass());
1831 MainFPM.addPass(InstCombinePass());
1832 MainFPM.addPass(BDCEPass());
1833
1834 // More scalar chains could be vectorized due to more alias information
1835 if (PTO.SLPVectorization) {
1836 MainFPM.addPass(SLPVectorizerPass());
1837 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1838 MainFPM.addPass(EarlyCSEPass());
1839 }
1840 }
1841
1842 MainFPM.addPass(VectorCombinePass()); // Clean up partial vectorization.
1843
1844 // After vectorization, assume intrinsics may tell us more about pointer
1845 // alignments.
1846 MainFPM.addPass(AlignmentFromAssumptionsPass());
1847
1848 // FIXME: Conditionally run LoadCombine here, after it's ported
1849 // (in case we still have this pass, given its questionable usefulness).
1850
1851 MainFPM.addPass(InstCombinePass());
1852 invokePeepholeEPCallbacks(MainFPM, Level);
1853 MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
1854 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
1855
1856 // Create a function that performs CFI checks for cross-DSO calls with
1857 // targets in the current module.
1858 MPM.addPass(CrossDSOCFIPass());
1859
1860 // Lower type metadata and the type.test intrinsic. This pass supports
1861 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1862 // to be run at link time if CFI is enabled. This pass does nothing if
1863 // CFI is disabled.
1864 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1865 // Run a second time to clean up any type tests left behind by WPD for use
1866 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1867 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1868
1869 // Enable splitting late in the FullLTO post-link pipeline. This is done in
1870 // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
1871 if (EnableHotColdSplit)
1872 MPM.addPass(HotColdSplittingPass());
1873
1874 // Add late LTO optimization passes.
1875 // Delete basic blocks, which optimization passes may have killed.
1876 MPM.addPass(createModuleToFunctionPassAdaptor(
1877 SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
1878
1879 // Drop bodies of available eternally objects to improve GlobalDCE.
1880 MPM.addPass(EliminateAvailableExternallyPass());
1881
1882 // Now that we have optimized the program, discard unreachable functions.
1883 MPM.addPass(GlobalDCEPass());
1884
1885 if (PTO.MergeFunctions)
1886 MPM.addPass(MergeFunctionsPass());
1887
1888 // Emit annotation remarks.
1889 addAnnotationRemarksPass(MPM);
1890
1891 return MPM;
1892}
1893
1894ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
1895 bool LTOPreLink) {
1896 assert(Level == OptimizationLevel::O0 &&
1897 "buildO0DefaultPipeline should only be used with O0");
1898
1899 ModulePassManager MPM(DebugLogging);
1900
1901 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1902 PGOOpt->Action == PGOOptions::IRUse))
1903 addPGOInstrPassesForO0(
1904 MPM,
1905 /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
1906 /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1907
1908 for (auto &C : PipelineStartEPCallbacks)
1909 C(MPM, Level);
1910 for (auto &C : PipelineEarlySimplificationEPCallbacks)
1911 C(MPM, Level);
1912
1913 // Build a minimal pipeline based on the semantics required by LLVM,
1914 // which is just that always inlining occurs. Further, disable generating
1915 // lifetime intrinsics to avoid enabling further optimizations during
1916 // code generation.
1917 // However, we need to insert lifetime intrinsics to avoid invalid access
1918 // caused by multithreaded coroutines.
1919 MPM.addPass(AlwaysInlinerPass(
1920 /*InsertLifetimeIntrinsics=*/PTO.Coroutines));
1921
1922 if (PTO.MergeFunctions)
1923 MPM.addPass(MergeFunctionsPass());
1924
1925 if (EnableMatrix)
1926 MPM.addPass(
1927 createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
1928
1929 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1930 CGSCCPassManager CGPM(DebugLogging);
1931 for (auto &C : CGSCCOptimizerLateEPCallbacks)
1932 C(CGPM, Level);
1933 if (!CGPM.isEmpty())
1934 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1935 }
1936 if (!LateLoopOptimizationsEPCallbacks.empty()) {
1937 LoopPassManager LPM(DebugLogging);
1938 for (auto &C : LateLoopOptimizationsEPCallbacks)
1939 C(LPM, Level);
1940 if (!LPM.isEmpty()) {
1941 MPM.addPass(createModuleToFunctionPassAdaptor(
1942 createFunctionToLoopPassAdaptor(std::move(LPM))));
1943 }
1944 }
1945 if (!LoopOptimizerEndEPCallbacks.empty()) {
1946 LoopPassManager LPM(DebugLogging);
1947 for (auto &C : LoopOptimizerEndEPCallbacks)
1948 C(LPM, Level);
1949 if (!LPM.isEmpty()) {
1950 MPM.addPass(createModuleToFunctionPassAdaptor(
1951 createFunctionToLoopPassAdaptor(std::move(LPM))));
1952 }
1953 }
1954 if (!ScalarOptimizerLateEPCallbacks.empty()) {
1955 FunctionPassManager FPM(DebugLogging);
1956 for (auto &C : ScalarOptimizerLateEPCallbacks)
1957 C(FPM, Level);
1958 if (!FPM.isEmpty())
1959 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
1960 }
1961 if (!VectorizerStartEPCallbacks.empty()) {
1962 FunctionPassManager FPM(DebugLogging);
1963 for (auto &C : VectorizerStartEPCallbacks)
1964 C(FPM, Level);
1965 if (!FPM.isEmpty())
1966 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
1967 }
1968
1969 if (PTO.Coroutines) {
1970 MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
1971
1972 CGSCCPassManager CGPM(DebugLogging);
1973 CGPM.addPass(CoroSplitPass());
1974 CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass()));
1975 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1976
1977 MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
1978 }
1979
1980 for (auto &C : OptimizerLastEPCallbacks)
1981 C(MPM, Level);
1982
1983 if (LTOPreLink)
1984 addRequiredLTOPreLinkPasses(MPM);
1985
1986 return MPM;
1987}
1988
1989AAManager PassBuilder::buildDefaultAAPipeline() {
1990 AAManager AA;
1991
1992 // The order in which these are registered determines their priority when
1993 // being queried.
1994
1995 // First we register the basic alias analysis that provides the majority of
1996 // per-function local AA logic. This is a stateless, on-demand local set of
1997 // AA techniques.
1998 AA.registerFunctionAnalysis<BasicAA>();
1999
2000 // Next we query fast, specialized alias analyses that wrap IR-embedded
2001 // information about aliasing.
2002 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2003 AA.registerFunctionAnalysis<TypeBasedAA>();
2004
2005 // Add support for querying global aliasing information when available.
2006 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2007 // analysis, all that the `AAManager` can do is query for any *cached*
2008 // results from `GlobalsAA` through a readonly proxy.
2009 AA.registerModuleAnalysis<GlobalsAA>();
2010
2011 // Add target-specific alias analyses.
2012 if (TM)
2013 TM->registerDefaultAliasAnalyses(AA);
2014
2015 return AA;
2016}
2017
2018static Optional<int> parseRepeatPassName(StringRef Name) {
2019 if (!Name.consume_front("repeat<") || !Name.consume_back(">"))
2020 return None;
2021 int Count;
2022 if (Name.getAsInteger(0, Count) || Count <= 0)
2023 return None;
2024 return Count;
2025}
2026
2027static Optional<int> parseDevirtPassName(StringRef Name) {
2028 if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
2029 return None;
2030 int Count;
2031 if (Name.getAsInteger(0, Count) || Count < 0)
2032 return None;
2033 return Count;
2034}
2035
2036static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
2037 if (!Name.consume_front(PassName))
2038 return false;
2039 // normal pass name w/o parameters == default parameters
2040 if (Name.empty())
2041 return true;
2042 return Name.startswith("<") && Name.endswith(">");
2043}
2044
2045namespace {
2046
2047/// This performs customized parsing of pass name with parameters.
2048///
2049/// We do not need parametrization of passes in textual pipeline very often,
2050/// yet on a rare occasion ability to specify parameters right there can be
2051/// useful.
2052///
2053/// \p Name - parameterized specification of a pass from a textual pipeline
2054/// is a string in a form of :
2055/// PassName '<' parameter-list '>'
2056///
2057/// Parameter list is being parsed by the parser callable argument, \p Parser,
2058/// It takes a string-ref of parameters and returns either StringError or a
2059/// parameter list in a form of a custom parameters type, all wrapped into
2060/// Expected<> template class.
2061///
2062template <typename ParametersParseCallableT>
2063auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
2064 StringRef PassName) -> decltype(Parser(StringRef{})) {
2065 using ParametersT = typename decltype(Parser(StringRef{}))::value_type;
2066
2067 StringRef Params = Name;
2068 if (!Params.consume_front(PassName)) {
2069 assert(false &&
2070 "unable to strip pass name from parametrized pass specification");
2071 }
2072 if (Params.empty())
2073 return ParametersT{};
2074 if (!Params.consume_front("<") || !Params.consume_back(">")) {
2075 assert(false && "invalid format for parametrized pass name");
2076 }
2077
2078 Expected<ParametersT> Result = Parser(Params);
2079 assert((Result || Result.template errorIsA<StringError>()) &&
2080 "Pass parameter parser can only return StringErrors.");
2081 return Result;
2082}
2083
2084/// Parser of parameters for LoopUnroll pass.
2085Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
2086 LoopUnrollOptions UnrollOpts;
2087 while (!Params.empty()) {
2088 StringRef ParamName;
2089 std::tie(ParamName, Params) = Params.split(';');
2090 int OptLevel = StringSwitch<int>(ParamName)
2091 .Case("O0", 0)
2092 .Case("O1", 1)
2093 .Case("O2", 2)
2094 .Case("O3", 3)
2095 .Default(-1);
2096 if (OptLevel >= 0) {
2097 UnrollOpts.setOptLevel(OptLevel);
2098 continue;
2099 }
2100 if (ParamName.consume_front("full-unroll-max=")) {
2101 int Count;
2102 if (ParamName.getAsInteger(0, Count))
2103 return make_error<StringError>(
2104 formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
2105 inconvertibleErrorCode());
2106 UnrollOpts.setFullUnrollMaxCount(Count);
2107 continue;
2108 }
2109
2110 bool Enable = !ParamName.consume_front("no-");
2111 if (ParamName == "partial") {
2112 UnrollOpts.setPartial(Enable);
2113 } else if (ParamName == "peeling") {
2114 UnrollOpts.setPeeling(Enable);
2115 } else if (ParamName == "profile-peeling") {
2116 UnrollOpts.setProfileBasedPeeling(Enable);
2117 } else if (ParamName == "runtime") {
2118 UnrollOpts.setRuntime(Enable);
2119 } else if (ParamName == "upperbound") {
2120 UnrollOpts.setUpperBound(Enable);
2121 } else {
2122 return make_error<StringError>(
2123 formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
2124 inconvertibleErrorCode());
2125 }
2126 }
2127 return UnrollOpts;
2128}
2129
2130Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
2131 MemorySanitizerOptions Result;
2132 while (!Params.empty()) {
2133 StringRef ParamName;
2134 std::tie(ParamName, Params) = Params.split(';');
2135
2136 if (ParamName == "recover") {
2137 Result.Recover = true;
2138 } else if (ParamName == "kernel") {
2139 Result.Kernel = true;
2140 } else if (ParamName.consume_front("track-origins=")) {
2141 if (ParamName.getAsInteger(0, Result.TrackOrigins))
2142 return make_error<StringError>(
2143 formatv("invalid argument to MemorySanitizer pass track-origins "
2144 "parameter: '{0}' ",
2145 ParamName)
2146 .str(),
2147 inconvertibleErrorCode());
2148 } else {
2149 return make_error<StringError>(
2150 formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName)
2151 .str(),
2152 inconvertibleErrorCode());
2153 }
2154 }
2155 return Result;
2156}
2157
2158/// Parser of parameters for SimplifyCFG pass.
2159Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
2160 SimplifyCFGOptions Result;
2161 while (!Params.empty()) {
2162 StringRef ParamName;
2163 std::tie(ParamName, Params) = Params.split(';');
2164
2165 bool Enable = !ParamName.consume_front("no-");
2166 if (ParamName == "forward-switch-cond") {
2167 Result.forwardSwitchCondToPhi(Enable);
2168 } else if (ParamName == "switch-to-lookup") {
2169 Result.convertSwitchToLookupTable(Enable);
2170 } else if (ParamName == "keep-loops") {
2171 Result.needCanonicalLoops(Enable);
2172 } else if (ParamName == "hoist-common-insts") {
2173 Result.hoistCommonInsts(Enable);
2174 } else if (ParamName == "sink-common-insts") {
2175 Result.sinkCommonInsts(Enable);
2176 } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
2177 APInt BonusInstThreshold;
2178 if (ParamName.getAsInteger(0, BonusInstThreshold))
2179 return make_error<StringError>(
2180 formatv("invalid argument to SimplifyCFG pass bonus-threshold "
2181 "parameter: '{0}' ",
2182 ParamName).str(),
2183 inconvertibleErrorCode());
2184 Result.bonusInstThreshold(BonusInstThreshold.getSExtValue());
2185 } else {
2186 return make_error<StringError>(
2187 formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(),
2188 inconvertibleErrorCode());
2189 }
2190 }
2191 return Result;
2192}
2193
2194/// Parser of parameters for LoopVectorize pass.
2195Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) {
2196 LoopVectorizeOptions Opts;
2197 while (!Params.empty()) {
2198 StringRef ParamName;
2199 std::tie(ParamName, Params) = Params.split(';');
2200
2201 bool Enable = !ParamName.consume_front("no-");
2202 if (ParamName == "interleave-forced-only") {
2203 Opts.setInterleaveOnlyWhenForced(Enable);
2204 } else if (ParamName == "vectorize-forced-only") {
2205 Opts.setVectorizeOnlyWhenForced(Enable);
2206 } else {
2207 return make_error<StringError>(
2208 formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(),
2209 inconvertibleErrorCode());
2210 }
2211 }
2212 return Opts;
2213}
2214
2215Expected<bool> parseLoopUnswitchOptions(StringRef Params) {
2216 bool Result = false;
2217 while (!Params.empty()) {
2218 StringRef ParamName;
2219 std::tie(ParamName, Params) = Params.split(';');
2220
2221 bool Enable = !ParamName.consume_front("no-");
2222 if (ParamName == "nontrivial") {
2223 Result = Enable;
2224 } else {
2225 return make_error<StringError>(
2226 formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName)
2227 .str(),
2228 inconvertibleErrorCode());
2229 }
2230 }
2231 return Result;
2232}
2233
2234Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) {
2235 bool Result = false;
2236 while (!Params.empty()) {
2237 StringRef ParamName;
2238 std::tie(ParamName, Params) = Params.split(';');
2239
2240 bool Enable = !ParamName.consume_front("no-");
2241 if (ParamName == "split-footer-bb") {
2242 Result = Enable;
2243 } else {
2244 return make_error<StringError>(
2245 formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ",
2246 ParamName)
2247 .str(),
2248 inconvertibleErrorCode());
2249 }
2250 }
2251 return Result;
2252}
2253
2254Expected<GVNOptions> parseGVNOptions(StringRef Params) {
2255 GVNOptions Result;
2256 while (!Params.empty()) {
2257 StringRef ParamName;
2258 std::tie(ParamName, Params) = Params.split(';');
2259
2260 bool Enable = !ParamName.consume_front("no-");
2261 if (ParamName == "pre") {
2262 Result.setPRE(Enable);
2263 } else if (ParamName == "load-pre") {
2264 Result.setLoadPRE(Enable);
2265 } else if (ParamName == "split-backedge-load-pre") {
2266 Result.setLoadPRESplitBackedge(Enable);
2267 } else if (ParamName == "memdep") {
2268 Result.setMemDep(Enable);
2269 } else {
2270 return make_error<StringError>(
2271 formatv("invalid GVN pass parameter '{0}' ", ParamName).str(),
2272 inconvertibleErrorCode());
2273 }
2274 }
2275 return Result;
2276}
2277
2278Expected<StackLifetime::LivenessType>
2279parseStackLifetimeOptions(StringRef Params) {
2280 StackLifetime::LivenessType Result = StackLifetime::LivenessType::May;
2281 while (!Params.empty()) {
2282 StringRef ParamName;
2283 std::tie(ParamName, Params) = Params.split(';');
2284
2285 if (ParamName == "may") {
2286 Result = StackLifetime::LivenessType::May;
2287 } else if (ParamName == "must") {
2288 Result = StackLifetime::LivenessType::Must;
2289 } else {
2290 return make_error<StringError>(
2291 formatv("invalid StackLifetime parameter '{0}' ", ParamName).str(),
2292 inconvertibleErrorCode());
2293 }
2294 }
2295 return Result;
2296}
2297
2298} // namespace
2299
2300/// Tests whether a pass name starts with a valid prefix for a default pipeline
2301/// alias.
2302static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
2303 return Name.startswith("default") || Name.startswith("thinlto") ||
2304 Name.startswith("lto");
2305}
2306
2307/// Tests whether registered callbacks will accept a given pass name.
2308///
2309/// When parsing a pipeline text, the type of the outermost pipeline may be
2310/// omitted, in which case the type is automatically determined from the first
2311/// pass name in the text. This may be a name that is handled through one of the
2312/// callbacks. We check this through the oridinary parsing callbacks by setting
2313/// up a dummy PassManager in order to not force the client to also handle this
2314/// type of query.
2315template <typename PassManagerT, typename CallbacksT>
2316static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) {
2317 if (!Callbacks.empty()) {
2318 PassManagerT DummyPM;
2319 for (auto &CB : Callbacks)
2320 if (CB(Name, DummyPM, {}))
2321 return true;
2322 }
2323 return false;
2324}
2325
2326template <typename CallbacksT>
2327static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
2328 // Manually handle aliases for pre-configured pipeline fragments.
2329 if (startsWithDefaultPipelineAliasPrefix(Name))
2330 return DefaultAliasRegex.match(Name);
2331
2332 // Explicitly handle pass manager names.
2333 if (Name == "module")
2334 return true;
2335 if (Name == "cgscc")
2336 return true;
2337 if (Name == "function")
2338 return true;
2339
2340 // Explicitly handle custom-parsed pass names.
2341 if (parseRepeatPassName(Name))
2342 return true;
2343
2344#define MODULE_PASS(NAME, CREATE_PASS) \
2345 if (Name == NAME) \
2346 return true;
2347#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
2348 if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
2349 return true;
2350#include "PassRegistry.def"
2351
2352 return callbacksAcceptPassName<ModulePassManager>(Name, Callbacks);
2353}
2354
2355template <typename CallbacksT>
2356static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
2357 // Explicitly handle pass manager names.
2358 if (Name == "cgscc")
2359 return true;
2360 if (Name == "function")
2361 return true;
2362
2363 // Explicitly handle custom-parsed pass names.
2364 if (parseRepeatPassName(Name))
2365 return true;
2366 if (parseDevirtPassName(Name))
2367 return true;
2368
2369#define CGSCC_PASS(NAME, CREATE_PASS) \
2370 if (Name == NAME) \
2371 return true;
2372#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
2373 if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
2374 return true;
2375#include "PassRegistry.def"
2376
2377 return callbacksAcceptPassName<CGSCCPassManager>(Name, Callbacks);
2378}
2379
2380template <typename CallbacksT>
2381static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
2382 // Explicitly handle pass manager names.
2383 if (Name == "function")
2384 return true;
2385 if (Name == "loop" || Name == "loop-mssa")
2386 return true;
2387
2388 // Explicitly handle custom-parsed pass names.
2389 if (parseRepeatPassName(Name))
2390 return true;
2391
2392#define FUNCTION_PASS(NAME, CREATE_PASS) \
2393 if (Name == NAME) \
2394 return true;
2395#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
2396 if (checkParametrizedPassName(Name, NAME)) \
2397 return true;
2398#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
2399 if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
2400 return true;
2401#include "PassRegistry.def"
2402
2403 return callbacksAcceptPassName<FunctionPassManager>(Name, Callbacks);
2404}
2405
2406template <typename CallbacksT>
2407static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
2408 // Explicitly handle pass manager names.
2409 if (Name == "loop" || Name == "loop-mssa")
2410 return true;
2411
2412 // Explicitly handle custom-parsed pass names.
2413 if (