1 | //===- Construction of pass pipelines -------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file provides the implementation of the PassBuilder based on our |
11 | /// static pass registry as well as related functionality. It also provides |
12 | /// helpers to aid in analyzing, debugging, and testing passes and pass |
13 | /// pipelines. |
14 | /// |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/Analysis/AliasAnalysis.h" |
19 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
20 | #include "llvm/Analysis/CGSCCPassManager.h" |
21 | #include "llvm/Analysis/GlobalsModRef.h" |
22 | #include "llvm/Analysis/InlineAdvisor.h" |
23 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
24 | #include "llvm/Analysis/ScopedNoAliasAA.h" |
25 | #include "llvm/Analysis/TypeBasedAliasAnalysis.h" |
26 | #include "llvm/IR/PassManager.h" |
27 | #include "llvm/Passes/OptimizationLevel.h" |
28 | #include "llvm/Passes/PassBuilder.h" |
29 | #include "llvm/Support/CommandLine.h" |
30 | #include "llvm/Support/ErrorHandling.h" |
31 | #include "llvm/Support/PGOOptions.h" |
32 | #include "llvm/Support/VirtualFileSystem.h" |
33 | #include "llvm/Target/TargetMachine.h" |
34 | #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" |
35 | #include "llvm/Transforms/Coroutines/CoroCleanup.h" |
36 | #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" |
37 | #include "llvm/Transforms/Coroutines/CoroEarly.h" |
38 | #include "llvm/Transforms/Coroutines/CoroElide.h" |
39 | #include "llvm/Transforms/Coroutines/CoroSplit.h" |
40 | #include "llvm/Transforms/HipStdPar/HipStdPar.h" |
41 | #include "llvm/Transforms/IPO/AlwaysInliner.h" |
42 | #include "llvm/Transforms/IPO/Annotation2Metadata.h" |
43 | #include "llvm/Transforms/IPO/ArgumentPromotion.h" |
44 | #include "llvm/Transforms/IPO/Attributor.h" |
45 | #include "llvm/Transforms/IPO/CalledValuePropagation.h" |
46 | #include "llvm/Transforms/IPO/ConstantMerge.h" |
47 | #include "llvm/Transforms/IPO/CrossDSOCFI.h" |
48 | #include "llvm/Transforms/IPO/DeadArgumentElimination.h" |
49 | #include "llvm/Transforms/IPO/ElimAvailExtern.h" |
50 | #include "llvm/Transforms/IPO/EmbedBitcodePass.h" |
51 | #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" |
52 | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
53 | #include "llvm/Transforms/IPO/GlobalDCE.h" |
54 | #include "llvm/Transforms/IPO/GlobalOpt.h" |
55 | #include "llvm/Transforms/IPO/GlobalSplit.h" |
56 | #include "llvm/Transforms/IPO/HotColdSplitting.h" |
57 | #include "llvm/Transforms/IPO/IROutliner.h" |
58 | #include "llvm/Transforms/IPO/InferFunctionAttrs.h" |
59 | #include "llvm/Transforms/IPO/Inliner.h" |
60 | #include "llvm/Transforms/IPO/LowerTypeTests.h" |
61 | #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" |
62 | #include "llvm/Transforms/IPO/MergeFunctions.h" |
63 | #include "llvm/Transforms/IPO/ModuleInliner.h" |
64 | #include "llvm/Transforms/IPO/OpenMPOpt.h" |
65 | #include "llvm/Transforms/IPO/PartialInlining.h" |
66 | #include "llvm/Transforms/IPO/SCCP.h" |
67 | #include "llvm/Transforms/IPO/SampleProfile.h" |
68 | #include "llvm/Transforms/IPO/SampleProfileProbe.h" |
69 | #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" |
70 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
71 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
72 | #include "llvm/Transforms/Instrumentation/CGProfile.h" |
73 | #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" |
74 | #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" |
75 | #include "llvm/Transforms/Instrumentation/InstrProfiling.h" |
76 | #include "llvm/Transforms/Instrumentation/MemProfiler.h" |
77 | #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" |
78 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
79 | #include "llvm/Transforms/Scalar/ADCE.h" |
80 | #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" |
81 | #include "llvm/Transforms/Scalar/AnnotationRemarks.h" |
82 | #include "llvm/Transforms/Scalar/BDCE.h" |
83 | #include "llvm/Transforms/Scalar/CallSiteSplitting.h" |
84 | #include "llvm/Transforms/Scalar/ConstraintElimination.h" |
85 | #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" |
86 | #include "llvm/Transforms/Scalar/DFAJumpThreading.h" |
87 | #include "llvm/Transforms/Scalar/DeadStoreElimination.h" |
88 | #include "llvm/Transforms/Scalar/DivRemPairs.h" |
89 | #include "llvm/Transforms/Scalar/EarlyCSE.h" |
90 | #include "llvm/Transforms/Scalar/Float2Int.h" |
91 | #include "llvm/Transforms/Scalar/GVN.h" |
92 | #include "llvm/Transforms/Scalar/IndVarSimplify.h" |
93 | #include "llvm/Transforms/Scalar/InferAlignment.h" |
94 | #include "llvm/Transforms/Scalar/InstSimplifyPass.h" |
95 | #include "llvm/Transforms/Scalar/JumpTableToSwitch.h" |
96 | #include "llvm/Transforms/Scalar/JumpThreading.h" |
97 | #include "llvm/Transforms/Scalar/LICM.h" |
98 | #include "llvm/Transforms/Scalar/LoopDeletion.h" |
99 | #include "llvm/Transforms/Scalar/LoopDistribute.h" |
100 | #include "llvm/Transforms/Scalar/LoopFlatten.h" |
101 | #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" |
102 | #include "llvm/Transforms/Scalar/LoopInstSimplify.h" |
103 | #include "llvm/Transforms/Scalar/LoopInterchange.h" |
104 | #include "llvm/Transforms/Scalar/LoopLoadElimination.h" |
105 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
106 | #include "llvm/Transforms/Scalar/LoopRotation.h" |
107 | #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" |
108 | #include "llvm/Transforms/Scalar/LoopSink.h" |
109 | #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" |
110 | #include "llvm/Transforms/Scalar/LoopUnrollPass.h" |
111 | #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" |
112 | #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" |
113 | #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" |
114 | #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" |
115 | #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" |
116 | #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" |
117 | #include "llvm/Transforms/Scalar/NewGVN.h" |
118 | #include "llvm/Transforms/Scalar/Reassociate.h" |
119 | #include "llvm/Transforms/Scalar/SCCP.h" |
120 | #include "llvm/Transforms/Scalar/SROA.h" |
121 | #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" |
122 | #include "llvm/Transforms/Scalar/SimplifyCFG.h" |
123 | #include "llvm/Transforms/Scalar/SpeculativeExecution.h" |
124 | #include "llvm/Transforms/Scalar/TailRecursionElimination.h" |
125 | #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" |
126 | #include "llvm/Transforms/Utils/AddDiscriminators.h" |
127 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
128 | #include "llvm/Transforms/Utils/CanonicalizeAliases.h" |
129 | #include "llvm/Transforms/Utils/CountVisits.h" |
130 | #include "llvm/Transforms/Utils/InjectTLIMappings.h" |
131 | #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" |
132 | #include "llvm/Transforms/Utils/Mem2Reg.h" |
133 | #include "llvm/Transforms/Utils/MoveAutoInit.h" |
134 | #include "llvm/Transforms/Utils/NameAnonGlobals.h" |
135 | #include "llvm/Transforms/Utils/RelLookupTableConverter.h" |
136 | #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" |
137 | #include "llvm/Transforms/Vectorize/LoopVectorize.h" |
138 | #include "llvm/Transforms/Vectorize/SLPVectorizer.h" |
139 | #include "llvm/Transforms/Vectorize/VectorCombine.h" |
140 | |
141 | using namespace llvm; |
142 | |
143 | static cl::opt<InliningAdvisorMode> UseInlineAdvisor( |
144 | "enable-ml-inliner" , cl::init(Val: InliningAdvisorMode::Default), cl::Hidden, |
145 | cl::desc("Enable ML policy for inliner. Currently trained for -Oz only" ), |
146 | cl::values(clEnumValN(InliningAdvisorMode::Default, "default" , |
147 | "Heuristics-based inliner version" ), |
148 | clEnumValN(InliningAdvisorMode::Development, "development" , |
149 | "Use development mode (runtime-loadable model)" ), |
150 | clEnumValN(InliningAdvisorMode::Release, "release" , |
151 | "Use release mode (AOT-compiled model)" ))); |
152 | |
153 | static cl::opt<bool> EnableSyntheticCounts( |
154 | "enable-npm-synthetic-counts" , cl::Hidden, |
155 | cl::desc("Run synthetic function entry count generation " |
156 | "pass" )); |
157 | |
158 | /// Flag to enable inline deferral during PGO. |
159 | static cl::opt<bool> |
160 | EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral" , cl::init(Val: true), |
161 | cl::Hidden, |
162 | cl::desc("Enable inline deferral during PGO" )); |
163 | |
164 | static cl::opt<bool> EnableModuleInliner("enable-module-inliner" , |
165 | cl::init(Val: false), cl::Hidden, |
166 | cl::desc("Enable module inliner" )); |
167 | |
168 | static cl::opt<bool> PerformMandatoryInliningsFirst( |
169 | "mandatory-inlining-first" , cl::init(Val: false), cl::Hidden, |
170 | cl::desc("Perform mandatory inlinings module-wide, before performing " |
171 | "inlining" )); |
172 | |
173 | static cl::opt<bool> EnableEagerlyInvalidateAnalyses( |
174 | "eagerly-invalidate-analyses" , cl::init(Val: true), cl::Hidden, |
175 | cl::desc("Eagerly invalidate more analyses in default pipelines" )); |
176 | |
177 | static cl::opt<bool> EnableMergeFunctions( |
178 | "enable-merge-functions" , cl::init(Val: false), cl::Hidden, |
179 | cl::desc("Enable function merging as part of the optimization pipeline" )); |
180 | |
181 | static cl::opt<bool> EnablePostPGOLoopRotation( |
182 | "enable-post-pgo-loop-rotation" , cl::init(Val: true), cl::Hidden, |
183 | cl::desc("Run the loop rotation transformation after PGO instrumentation" )); |
184 | |
185 | static cl::opt<bool> EnableGlobalAnalyses( |
186 | "enable-global-analyses" , cl::init(Val: true), cl::Hidden, |
187 | cl::desc("Enable inter-procedural analyses" )); |
188 | |
189 | static cl::opt<bool> |
190 | RunPartialInlining("enable-partial-inlining" , cl::init(Val: false), cl::Hidden, |
191 | cl::desc("Run Partial inlinining pass" )); |
192 | |
193 | static cl::opt<bool> ( |
194 | "extra-vectorizer-passes" , cl::init(Val: false), cl::Hidden, |
195 | cl::desc("Run cleanup optimization passes after vectorization" )); |
196 | |
197 | static cl::opt<bool> RunNewGVN("enable-newgvn" , cl::init(Val: false), cl::Hidden, |
198 | cl::desc("Run the NewGVN pass" )); |
199 | |
200 | static cl::opt<bool> EnableLoopInterchange( |
201 | "enable-loopinterchange" , cl::init(Val: false), cl::Hidden, |
202 | cl::desc("Enable the experimental LoopInterchange Pass" )); |
203 | |
204 | static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam" , |
205 | cl::init(Val: false), cl::Hidden, |
206 | cl::desc("Enable Unroll And Jam Pass" )); |
207 | |
208 | static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten" , cl::init(Val: false), |
209 | cl::Hidden, |
210 | cl::desc("Enable the LoopFlatten Pass" )); |
211 | |
212 | // Experimentally allow loop header duplication. This should allow for better |
213 | // optimization at Oz, since loop-idiom recognition can then recognize things |
214 | // like memcpy. If this ends up being useful for many targets, we should drop |
215 | // this flag and make a code generation option that can be controlled |
216 | // independent of the opt level and exposed through the frontend. |
217 | static cl::opt<bool> ( |
218 | "enable-loop-header-duplication" , cl::init(Val: false), cl::Hidden, |
219 | cl::desc("Enable loop header duplication at any optimization level" )); |
220 | |
221 | static cl::opt<bool> |
222 | EnableDFAJumpThreading("enable-dfa-jump-thread" , |
223 | cl::desc("Enable DFA jump threading" ), |
224 | cl::init(Val: false), cl::Hidden); |
225 | |
226 | // TODO: turn on and remove flag |
227 | static cl::opt<bool> EnablePGOForceFunctionAttrs( |
228 | "enable-pgo-force-function-attrs" , |
229 | cl::desc("Enable pass to set function attributes based on PGO profiles" ), |
230 | cl::init(Val: false)); |
231 | |
232 | static cl::opt<bool> |
233 | EnableHotColdSplit("hot-cold-split" , |
234 | cl::desc("Enable hot-cold splitting pass" )); |
235 | |
236 | static cl::opt<bool> EnableIROutliner("ir-outliner" , cl::init(Val: false), |
237 | cl::Hidden, |
238 | cl::desc("Enable ir outliner pass" )); |
239 | |
240 | static cl::opt<bool> |
241 | DisablePreInliner("disable-preinline" , cl::init(Val: false), cl::Hidden, |
242 | cl::desc("Disable pre-instrumentation inliner" )); |
243 | |
244 | static cl::opt<int> PreInlineThreshold( |
245 | "preinline-threshold" , cl::Hidden, cl::init(Val: 75), |
246 | cl::desc("Control the amount of inlining in pre-instrumentation inliner " |
247 | "(default = 75)" )); |
248 | |
249 | static cl::opt<bool> |
250 | EnableGVNHoist("enable-gvn-hoist" , |
251 | cl::desc("Enable the GVN hoisting pass (default = off)" )); |
252 | |
253 | static cl::opt<bool> |
254 | EnableGVNSink("enable-gvn-sink" , |
255 | cl::desc("Enable the GVN sinking pass (default = off)" )); |
256 | |
257 | static cl::opt<bool> EnableJumpTableToSwitch( |
258 | "enable-jump-table-to-switch" , |
259 | cl::desc("Enable JumpTableToSwitch pass (default = off)" )); |
260 | |
261 | // This option is used in simplifying testing SampleFDO optimizations for |
262 | // profile loading. |
263 | static cl::opt<bool> |
264 | EnableCHR("enable-chr" , cl::init(Val: true), cl::Hidden, |
265 | cl::desc("Enable control height reduction optimization (CHR)" )); |
266 | |
267 | static cl::opt<bool> FlattenedProfileUsed( |
268 | "flattened-profile-used" , cl::init(Val: false), cl::Hidden, |
269 | cl::desc("Indicate the sample profile being used is flattened, i.e., " |
270 | "no inline hierachy exists in the profile" )); |
271 | |
272 | static cl::opt<bool> EnableOrderFileInstrumentation( |
273 | "enable-order-file-instrumentation" , cl::init(Val: false), cl::Hidden, |
274 | cl::desc("Enable order file instrumentation (default = off)" )); |
275 | |
276 | static cl::opt<bool> |
277 | EnableMatrix("enable-matrix" , cl::init(Val: false), cl::Hidden, |
278 | cl::desc("Enable lowering of the matrix intrinsics" )); |
279 | |
280 | static cl::opt<bool> EnableConstraintElimination( |
281 | "enable-constraint-elimination" , cl::init(Val: true), cl::Hidden, |
282 | cl::desc( |
283 | "Enable pass to eliminate conditions based on linear constraints" )); |
284 | |
285 | static cl::opt<AttributorRunOption> AttributorRun( |
286 | "attributor-enable" , cl::Hidden, cl::init(Val: AttributorRunOption::NONE), |
287 | cl::desc("Enable the attributor inter-procedural deduction pass" ), |
288 | cl::values(clEnumValN(AttributorRunOption::ALL, "all" , |
289 | "enable all attributor runs" ), |
290 | clEnumValN(AttributorRunOption::MODULE, "module" , |
291 | "enable module-wide attributor runs" ), |
292 | clEnumValN(AttributorRunOption::CGSCC, "cgscc" , |
293 | "enable call graph SCC attributor runs" ), |
294 | clEnumValN(AttributorRunOption::NONE, "none" , |
295 | "disable attributor runs" ))); |
296 | |
297 | static cl::opt<bool> UseLoopVersioningLICM( |
298 | "enable-loop-versioning-licm" , cl::init(Val: false), cl::Hidden, |
299 | cl::desc("Enable the experimental Loop Versioning LICM pass" )); |
300 | |
301 | namespace llvm { |
302 | extern cl::opt<bool> EnableMemProfContextDisambiguation; |
303 | |
304 | extern cl::opt<bool> EnableInferAlignmentPass; |
305 | } // namespace llvm |
306 | |
307 | PipelineTuningOptions::PipelineTuningOptions() { |
308 | LoopInterleaving = true; |
309 | LoopVectorization = true; |
310 | SLPVectorization = false; |
311 | LoopUnrolling = true; |
312 | ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; |
313 | LicmMssaOptCap = SetLicmMssaOptCap; |
314 | LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; |
315 | CallGraphProfile = true; |
316 | UnifiedLTO = false; |
317 | MergeFunctions = EnableMergeFunctions; |
318 | InlinerThreshold = -1; |
319 | EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; |
320 | } |
321 | |
322 | namespace llvm { |
323 | extern cl::opt<unsigned> MaxDevirtIterations; |
324 | } // namespace llvm |
325 | |
326 | void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, |
327 | OptimizationLevel Level) { |
328 | for (auto &C : PeepholeEPCallbacks) |
329 | C(FPM, Level); |
330 | } |
331 | void PassBuilder::invokeLateLoopOptimizationsEPCallbacks( |
332 | LoopPassManager &LPM, OptimizationLevel Level) { |
333 | for (auto &C : LateLoopOptimizationsEPCallbacks) |
334 | C(LPM, Level); |
335 | } |
336 | void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, |
337 | OptimizationLevel Level) { |
338 | for (auto &C : LoopOptimizerEndEPCallbacks) |
339 | C(LPM, Level); |
340 | } |
341 | void PassBuilder::invokeScalarOptimizerLateEPCallbacks( |
342 | FunctionPassManager &FPM, OptimizationLevel Level) { |
343 | for (auto &C : ScalarOptimizerLateEPCallbacks) |
344 | C(FPM, Level); |
345 | } |
346 | void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, |
347 | OptimizationLevel Level) { |
348 | for (auto &C : CGSCCOptimizerLateEPCallbacks) |
349 | C(CGPM, Level); |
350 | } |
351 | void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, |
352 | OptimizationLevel Level) { |
353 | for (auto &C : VectorizerStartEPCallbacks) |
354 | C(FPM, Level); |
355 | } |
356 | void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, |
357 | OptimizationLevel Level) { |
358 | for (auto &C : OptimizerEarlyEPCallbacks) |
359 | C(MPM, Level); |
360 | } |
361 | void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, |
362 | OptimizationLevel Level) { |
363 | for (auto &C : OptimizerLastEPCallbacks) |
364 | C(MPM, Level); |
365 | } |
366 | void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( |
367 | ModulePassManager &MPM, OptimizationLevel Level) { |
368 | for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) |
369 | C(MPM, Level); |
370 | } |
371 | void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks( |
372 | ModulePassManager &MPM, OptimizationLevel Level) { |
373 | for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) |
374 | C(MPM, Level); |
375 | } |
376 | void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM, |
377 | OptimizationLevel Level) { |
378 | for (auto &C : PipelineStartEPCallbacks) |
379 | C(MPM, Level); |
380 | } |
381 | void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( |
382 | ModulePassManager &MPM, OptimizationLevel Level) { |
383 | for (auto &C : PipelineEarlySimplificationEPCallbacks) |
384 | C(MPM, Level); |
385 | } |
386 | |
387 | // Helper to add AnnotationRemarksPass. |
388 | static void (ModulePassManager &MPM) { |
389 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass())); |
390 | } |
391 | |
392 | // Helper to check if the current compilation phase is preparing for LTO |
393 | static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { |
394 | return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || |
395 | Phase == ThinOrFullLTOPhase::FullLTOPreLink; |
396 | } |
397 | |
398 | // TODO: Investigate the cost/benefit of tail call elimination on debugging. |
399 | FunctionPassManager |
400 | PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, |
401 | ThinOrFullLTOPhase Phase) { |
402 | |
403 | FunctionPassManager FPM; |
404 | |
405 | if (AreStatisticsEnabled()) |
406 | FPM.addPass(Pass: CountVisitsPass()); |
407 | |
408 | // Form SSA out of local memory accesses after breaking apart aggregates into |
409 | // scalars. |
410 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
411 | |
412 | // Catch trivial redundancies |
413 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
414 | |
415 | // Hoisting of scalars and load expressions. |
416 | FPM.addPass( |
417 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
418 | FPM.addPass(Pass: InstCombinePass()); |
419 | |
420 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
421 | |
422 | invokePeepholeEPCallbacks(FPM, Level); |
423 | |
424 | FPM.addPass( |
425 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
426 | |
427 | // Form canonically associated expression trees, and simplify the trees using |
428 | // basic mathematical properties. For example, this will form (nearly) |
429 | // minimal multiplication trees. |
430 | FPM.addPass(Pass: ReassociatePass()); |
431 | |
432 | // Add the primary loop simplification pipeline. |
433 | // FIXME: Currently this is split into two loop pass pipelines because we run |
434 | // some function passes in between them. These can and should be removed |
435 | // and/or replaced by scheduling the loop pass equivalents in the correct |
436 | // positions. But those equivalent passes aren't powerful enough yet. |
437 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
438 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
439 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
440 | // `LoopInstSimplify`. |
441 | LoopPassManager LPM1, LPM2; |
442 | |
443 | // Simplify the loop body. We do this initially to clean up after other loop |
444 | // passes run, either when iterating on a loop or on inner loops with |
445 | // implications on the outer loop. |
446 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
447 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
448 | |
449 | // Try to remove as much code from the loop header as possible, |
450 | // to reduce amount of IR that will have to be duplicated. However, |
451 | // do not perform speculative hoisting the first time as LICM |
452 | // will destroy metadata that may not need to be destroyed if run |
453 | // after loop rotation. |
454 | // TODO: Investigate promotion cap for O1. |
455 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
456 | /*AllowSpeculation=*/false)); |
457 | |
458 | LPM1.addPass(Pass: LoopRotatePass(/* Disable header duplication */ true, |
459 | isLTOPreLink(Phase))); |
460 | // TODO: Investigate promotion cap for O1. |
461 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
462 | /*AllowSpeculation=*/true)); |
463 | LPM1.addPass(Pass: SimpleLoopUnswitchPass()); |
464 | if (EnableLoopFlatten) |
465 | LPM1.addPass(Pass: LoopFlattenPass()); |
466 | |
467 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
468 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
469 | |
470 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
471 | |
472 | LPM2.addPass(Pass: LoopDeletionPass()); |
473 | |
474 | if (EnableLoopInterchange) |
475 | LPM2.addPass(Pass: LoopInterchangePass()); |
476 | |
477 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
478 | // because it changes IR to makes profile annotation in back compile |
479 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
480 | // attributes so we need to make sure and allow the full unroll pass to pay |
481 | // attention to it. |
482 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
483 | PGOOpt->Action != PGOOptions::SampleUse) |
484 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
485 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
486 | PTO.ForgetAllSCEVInLoopUnroll)); |
487 | |
488 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
489 | |
490 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
491 | /*UseMemorySSA=*/true, |
492 | /*UseBlockFrequencyInfo=*/true)); |
493 | FPM.addPass( |
494 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
495 | FPM.addPass(Pass: InstCombinePass()); |
496 | // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. |
497 | // *All* loop passes must preserve it, in order to be able to use it. |
498 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
499 | /*UseMemorySSA=*/false, |
500 | /*UseBlockFrequencyInfo=*/false)); |
501 | |
502 | // Delete small array after loop unroll. |
503 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
504 | |
505 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
506 | FPM.addPass(Pass: MemCpyOptPass()); |
507 | |
508 | // Sparse conditional constant propagation. |
509 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
510 | // before... |
511 | FPM.addPass(Pass: SCCPPass()); |
512 | |
513 | // Delete dead bit computations (instcombine runs after to fold away the dead |
514 | // computations, and then ADCE will run later to exploit any new DCE |
515 | // opportunities that creates). |
516 | FPM.addPass(Pass: BDCEPass()); |
517 | |
518 | // Run instcombine after redundancy and dead bit elimination to exploit |
519 | // opportunities opened up by them. |
520 | FPM.addPass(Pass: InstCombinePass()); |
521 | invokePeepholeEPCallbacks(FPM, Level); |
522 | |
523 | FPM.addPass(Pass: CoroElidePass()); |
524 | |
525 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
526 | |
527 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
528 | // the simplifications and basic cleanup after all the simplifications. |
529 | // TODO: Investigate if this is too expensive. |
530 | FPM.addPass(Pass: ADCEPass()); |
531 | FPM.addPass( |
532 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
533 | FPM.addPass(Pass: InstCombinePass()); |
534 | invokePeepholeEPCallbacks(FPM, Level); |
535 | |
536 | return FPM; |
537 | } |
538 | |
539 | FunctionPassManager |
540 | PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, |
541 | ThinOrFullLTOPhase Phase) { |
542 | assert(Level != OptimizationLevel::O0 && "Must request optimizations!" ); |
543 | |
544 | // The O1 pipeline has a separate pipeline creation function to simplify |
545 | // construction readability. |
546 | if (Level.getSpeedupLevel() == 1) |
547 | return buildO1FunctionSimplificationPipeline(Level, Phase); |
548 | |
549 | FunctionPassManager FPM; |
550 | |
551 | if (AreStatisticsEnabled()) |
552 | FPM.addPass(Pass: CountVisitsPass()); |
553 | |
554 | // Form SSA out of local memory accesses after breaking apart aggregates into |
555 | // scalars. |
556 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
557 | |
558 | // Catch trivial redundancies |
559 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
560 | if (EnableKnowledgeRetention) |
561 | FPM.addPass(Pass: AssumeSimplifyPass()); |
562 | |
563 | // Hoisting of scalars and load expressions. |
564 | if (EnableGVNHoist) |
565 | FPM.addPass(Pass: GVNHoistPass()); |
566 | |
567 | // Global value numbering based sinking. |
568 | if (EnableGVNSink) { |
569 | FPM.addPass(Pass: GVNSinkPass()); |
570 | FPM.addPass( |
571 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
572 | } |
573 | |
574 | // Speculative execution if the target has divergent branches; otherwise nop. |
575 | FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); |
576 | |
577 | // Optimize based on known information about branches, and cleanup afterward. |
578 | FPM.addPass(Pass: JumpThreadingPass()); |
579 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
580 | |
581 | // Jump table to switch conversion. |
582 | if (EnableJumpTableToSwitch) |
583 | FPM.addPass(Pass: JumpTableToSwitchPass()); |
584 | |
585 | FPM.addPass( |
586 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
587 | FPM.addPass(Pass: InstCombinePass()); |
588 | FPM.addPass(Pass: AggressiveInstCombinePass()); |
589 | |
590 | if (!Level.isOptimizingForSize()) |
591 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
592 | |
593 | invokePeepholeEPCallbacks(FPM, Level); |
594 | |
595 | // For PGO use pipeline, try to optimize memory intrinsics such as memcpy |
596 | // using the size value profile. Don't perform this when optimizing for size. |
597 | if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && |
598 | !Level.isOptimizingForSize()) |
599 | FPM.addPass(Pass: PGOMemOPSizeOpt()); |
600 | |
601 | FPM.addPass(Pass: TailCallElimPass()); |
602 | FPM.addPass( |
603 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
604 | |
605 | // Form canonically associated expression trees, and simplify the trees using |
606 | // basic mathematical properties. For example, this will form (nearly) |
607 | // minimal multiplication trees. |
608 | FPM.addPass(Pass: ReassociatePass()); |
609 | |
610 | if (EnableConstraintElimination) |
611 | FPM.addPass(Pass: ConstraintEliminationPass()); |
612 | |
613 | // Add the primary loop simplification pipeline. |
614 | // FIXME: Currently this is split into two loop pass pipelines because we run |
615 | // some function passes in between them. These can and should be removed |
616 | // and/or replaced by scheduling the loop pass equivalents in the correct |
617 | // positions. But those equivalent passes aren't powerful enough yet. |
618 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
619 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
620 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
621 | // `LoopInstSimplify`. |
622 | LoopPassManager LPM1, LPM2; |
623 | |
624 | // Simplify the loop body. We do this initially to clean up after other loop |
625 | // passes run, either when iterating on a loop or on inner loops with |
626 | // implications on the outer loop. |
627 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
628 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
629 | |
630 | // Try to remove as much code from the loop header as possible, |
631 | // to reduce amount of IR that will have to be duplicated. However, |
632 | // do not perform speculative hoisting the first time as LICM |
633 | // will destroy metadata that may not need to be destroyed if run |
634 | // after loop rotation. |
635 | // TODO: Investigate promotion cap for O1. |
636 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
637 | /*AllowSpeculation=*/false)); |
638 | |
639 | // Disable header duplication in loop rotation at -Oz. |
640 | LPM1.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
641 | Level != OptimizationLevel::Oz, |
642 | isLTOPreLink(Phase))); |
643 | // TODO: Investigate promotion cap for O1. |
644 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
645 | /*AllowSpeculation=*/true)); |
646 | LPM1.addPass( |
647 | Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); |
648 | if (EnableLoopFlatten) |
649 | LPM1.addPass(Pass: LoopFlattenPass()); |
650 | |
651 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
652 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
653 | |
654 | { |
655 | ExtraSimpleLoopUnswitchPassManager ; |
656 | ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
657 | OptimizationLevel::O3)); |
658 | LPM2.addPass(Pass: std::move(ExtraPasses)); |
659 | } |
660 | |
661 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
662 | |
663 | LPM2.addPass(Pass: LoopDeletionPass()); |
664 | |
665 | if (EnableLoopInterchange) |
666 | LPM2.addPass(Pass: LoopInterchangePass()); |
667 | |
668 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
669 | // because it changes IR to makes profile annotation in back compile |
670 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
671 | // attributes so we need to make sure and allow the full unroll pass to pay |
672 | // attention to it. |
673 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
674 | PGOOpt->Action != PGOOptions::SampleUse) |
675 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
676 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
677 | PTO.ForgetAllSCEVInLoopUnroll)); |
678 | |
679 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
680 | |
681 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
682 | /*UseMemorySSA=*/true, |
683 | /*UseBlockFrequencyInfo=*/true)); |
684 | FPM.addPass( |
685 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
686 | FPM.addPass(Pass: InstCombinePass()); |
687 | // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, |
688 | // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. |
689 | // *All* loop passes must preserve it, in order to be able to use it. |
690 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
691 | /*UseMemorySSA=*/false, |
692 | /*UseBlockFrequencyInfo=*/false)); |
693 | |
694 | // Delete small array after loop unroll. |
695 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
696 | |
697 | // Try vectorization/scalarization transforms that are both improvements |
698 | // themselves and can allow further folds with GVN and InstCombine. |
699 | FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); |
700 | |
701 | // Eliminate redundancies. |
702 | FPM.addPass(Pass: MergedLoadStoreMotionPass()); |
703 | if (RunNewGVN) |
704 | FPM.addPass(Pass: NewGVNPass()); |
705 | else |
706 | FPM.addPass(Pass: GVNPass()); |
707 | |
708 | // Sparse conditional constant propagation. |
709 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
710 | // before... |
711 | FPM.addPass(Pass: SCCPPass()); |
712 | |
713 | // Delete dead bit computations (instcombine runs after to fold away the dead |
714 | // computations, and then ADCE will run later to exploit any new DCE |
715 | // opportunities that creates). |
716 | FPM.addPass(Pass: BDCEPass()); |
717 | |
718 | // Run instcombine after redundancy and dead bit elimination to exploit |
719 | // opportunities opened up by them. |
720 | FPM.addPass(Pass: InstCombinePass()); |
721 | invokePeepholeEPCallbacks(FPM, Level); |
722 | |
723 | // Re-consider control flow based optimizations after redundancy elimination, |
724 | // redo DCE, etc. |
725 | if (EnableDFAJumpThreading) |
726 | FPM.addPass(Pass: DFAJumpThreadingPass()); |
727 | |
728 | FPM.addPass(Pass: JumpThreadingPass()); |
729 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
730 | |
731 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
732 | // the simplifications and basic cleanup after all the simplifications. |
733 | // TODO: Investigate if this is too expensive. |
734 | FPM.addPass(Pass: ADCEPass()); |
735 | |
736 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
737 | FPM.addPass(Pass: MemCpyOptPass()); |
738 | |
739 | FPM.addPass(Pass: DSEPass()); |
740 | FPM.addPass(Pass: MoveAutoInitPass()); |
741 | |
742 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
743 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
744 | /*AllowSpeculation=*/true), |
745 | /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); |
746 | |
747 | FPM.addPass(Pass: CoroElidePass()); |
748 | |
749 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
750 | |
751 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
752 | .convertSwitchRangeToICmp(B: true) |
753 | .hoistCommonInsts(B: true) |
754 | .sinkCommonInsts(B: true))); |
755 | FPM.addPass(Pass: InstCombinePass()); |
756 | invokePeepholeEPCallbacks(FPM, Level); |
757 | |
758 | return FPM; |
759 | } |
760 | |
761 | void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { |
762 | MPM.addPass(Pass: CanonicalizeAliasesPass()); |
763 | MPM.addPass(Pass: NameAnonGlobalPass()); |
764 | } |
765 | |
766 | void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM, |
767 | OptimizationLevel Level, |
768 | ThinOrFullLTOPhase LTOPhase) { |
769 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
770 | if (DisablePreInliner) |
771 | return; |
772 | InlineParams IP; |
773 | |
774 | IP.DefaultThreshold = PreInlineThreshold; |
775 | |
776 | // FIXME: The hint threshold has the same value used by the regular inliner |
777 | // when not optimzing for size. This should probably be lowered after |
778 | // performance testing. |
779 | // FIXME: this comment is cargo culted from the old pass manager, revisit). |
780 | IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; |
781 | ModuleInlinerWrapperPass MIWP( |
782 | IP, /* MandatoryFirst */ true, |
783 | InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner}); |
784 | CGSCCPassManager &CGPipeline = MIWP.getPM(); |
785 | |
786 | FunctionPassManager FPM; |
787 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
788 | FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies. |
789 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( |
790 | B: true))); // Merge & remove basic blocks. |
791 | FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences. |
792 | invokePeepholeEPCallbacks(FPM, Level); |
793 | |
794 | CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
795 | Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
796 | |
797 | MPM.addPass(Pass: std::move(MIWP)); |
798 | |
799 | // Delete anything that is now dead to make sure that we don't instrument |
800 | // dead code. Instrumentation can end up keeping dead code around and |
801 | // dramatically increase code size. |
802 | MPM.addPass(Pass: GlobalDCEPass()); |
803 | } |
804 | |
805 | void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, |
806 | OptimizationLevel Level, bool RunProfileGen, |
807 | bool IsCS, bool AtomicCounterUpdate, |
808 | std::string ProfileFile, |
809 | std::string ProfileRemappingFile, |
810 | IntrusiveRefCntPtr<vfs::FileSystem> FS) { |
811 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
812 | |
813 | if (!RunProfileGen) { |
814 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
815 | MPM.addPass( |
816 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
817 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
818 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
819 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
820 | return; |
821 | } |
822 | |
823 | // Perform PGO instrumentation. |
824 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS)); |
825 | |
826 | if (EnablePostPGOLoopRotation) { |
827 | // Disable header duplication in loop rotation at -Oz. |
828 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
829 | Pass: createFunctionToLoopPassAdaptor( |
830 | Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
831 | Level != OptimizationLevel::Oz), |
832 | /*UseMemorySSA=*/false, |
833 | /*UseBlockFrequencyInfo=*/false), |
834 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
835 | } |
836 | |
837 | // Add the profile lowering pass. |
838 | InstrProfOptions Options; |
839 | if (!ProfileFile.empty()) |
840 | Options.InstrProfileOutput = ProfileFile; |
841 | // Do counter promotion at Level greater than O0. |
842 | Options.DoCounterPromotion = true; |
843 | Options.UseBFIInPromotion = IsCS; |
844 | Options.Atomic = AtomicCounterUpdate; |
845 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
846 | } |
847 | |
848 | void PassBuilder::addPGOInstrPassesForO0( |
849 | ModulePassManager &MPM, bool RunProfileGen, bool IsCS, |
850 | bool AtomicCounterUpdate, std::string ProfileFile, |
851 | std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) { |
852 | if (!RunProfileGen) { |
853 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
854 | MPM.addPass( |
855 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
856 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
857 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
858 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
859 | return; |
860 | } |
861 | |
862 | // Perform PGO instrumentation. |
863 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS)); |
864 | // Add the profile lowering pass. |
865 | InstrProfOptions Options; |
866 | if (!ProfileFile.empty()) |
867 | Options.InstrProfileOutput = ProfileFile; |
868 | // Do not do counter promotion at O0. |
869 | Options.DoCounterPromotion = false; |
870 | Options.UseBFIInPromotion = IsCS; |
871 | Options.Atomic = AtomicCounterUpdate; |
872 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
873 | } |
874 | |
875 | static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { |
876 | return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel()); |
877 | } |
878 | |
879 | ModuleInlinerWrapperPass |
880 | PassBuilder::buildInlinerPipeline(OptimizationLevel Level, |
881 | ThinOrFullLTOPhase Phase) { |
882 | InlineParams IP; |
883 | if (PTO.InlinerThreshold == -1) |
884 | IP = getInlineParamsFromOptLevel(Level); |
885 | else |
886 | IP = getInlineParams(Threshold: PTO.InlinerThreshold); |
887 | // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to |
888 | // disable hot callsite inline (as much as possible [1]) because it makes |
889 | // profile annotation in the backend inaccurate. |
890 | // |
891 | // [1] Note the cost of a function could be below zero due to erased |
892 | // prologue / epilogue. |
893 | if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && |
894 | PGOOpt->Action == PGOOptions::SampleUse) |
895 | IP.HotCallSiteThreshold = 0; |
896 | |
897 | if (PGOOpt) |
898 | IP.EnableDeferral = EnablePGOInlineDeferral; |
899 | |
900 | ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, |
901 | InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner}, |
902 | UseInlineAdvisor, MaxDevirtIterations); |
903 | |
904 | // Require the GlobalsAA analysis for the module so we can query it within |
905 | // the CGSCC pipeline. |
906 | if (EnableGlobalAnalyses) { |
907 | MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
908 | // Invalidate AAManager so it can be recreated and pick up the newly |
909 | // available GlobalsAA. |
910 | MIWP.addModulePass( |
911 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
912 | } |
913 | |
914 | // Require the ProfileSummaryAnalysis for the module so we can query it within |
915 | // the inliner pass. |
916 | MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
917 | |
918 | // Now begin the main postorder CGSCC pipeline. |
919 | // FIXME: The current CGSCC pipeline has its origins in the legacy pass |
920 | // manager and trying to emulate its precise behavior. Much of this doesn't |
921 | // make a lot of sense and we should revisit the core CGSCC structure. |
922 | CGSCCPassManager &MainCGPipeline = MIWP.getPM(); |
923 | |
924 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
925 | // generally clean up exception handling overhead. It isn't clear this is |
926 | // valuable as the inliner doesn't currently care whether it is inlining an |
927 | // invoke or a call. |
928 | |
929 | if (AttributorRun & AttributorRunOption::CGSCC) |
930 | MainCGPipeline.addPass(Pass: AttributorCGSCCPass()); |
931 | |
932 | // Deduce function attributes. We do another run of this after the function |
933 | // simplification pipeline, so this only needs to run when it could affect the |
934 | // function simplification pipeline, which is only the case with recursive |
935 | // functions. |
936 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)); |
937 | |
938 | // When at O3 add argument promotion to the pass pipeline. |
939 | // FIXME: It isn't at all clear why this should be limited to O3. |
940 | if (Level == OptimizationLevel::O3) |
941 | MainCGPipeline.addPass(Pass: ArgumentPromotionPass()); |
942 | |
943 | // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if |
944 | // there are no OpenMP runtime calls present in the module. |
945 | if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) |
946 | MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass()); |
947 | |
948 | invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level); |
949 | |
950 | // Add the core function simplification pipeline nested inside the |
951 | // CGSCC walk. |
952 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
953 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
954 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true)); |
955 | |
956 | // Finally, deduce any function attributes based on the fully simplified |
957 | // function. |
958 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass()); |
959 | |
960 | // Mark that the function is fully simplified and that it shouldn't be |
961 | // simplified again if we somehow revisit it due to CGSCC mutations unless |
962 | // it's been modified since. |
963 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
964 | Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>())); |
965 | |
966 | MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
967 | |
968 | // Make sure we don't affect potential future NoRerun CGSCC adaptors. |
969 | MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor( |
970 | Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); |
971 | |
972 | return MIWP; |
973 | } |
974 | |
975 | ModulePassManager |
976 | PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, |
977 | ThinOrFullLTOPhase Phase) { |
978 | ModulePassManager MPM; |
979 | |
980 | InlineParams IP = getInlineParamsFromOptLevel(Level); |
981 | // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to |
982 | // disable hot callsite inline (as much as possible [1]) because it makes |
983 | // profile annotation in the backend inaccurate. |
984 | // |
985 | // [1] Note the cost of a function could be below zero due to erased |
986 | // prologue / epilogue. |
987 | if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && |
988 | PGOOpt->Action == PGOOptions::SampleUse) |
989 | IP.HotCallSiteThreshold = 0; |
990 | |
991 | if (PGOOpt) |
992 | IP.EnableDeferral = EnablePGOInlineDeferral; |
993 | |
994 | // The inline deferral logic is used to avoid losing some |
995 | // inlining chance in future. It is helpful in SCC inliner, in which |
996 | // inlining is processed in bottom-up order. |
997 | // While in module inliner, the inlining order is a priority-based order |
998 | // by default. The inline deferral is unnecessary there. So we disable the |
999 | // inline deferral logic in module inliner. |
1000 | IP.EnableDeferral = false; |
1001 | |
1002 | MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); |
1003 | |
1004 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
1005 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
1006 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1007 | |
1008 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
1009 | Pass: CoroSplitPass(Level != OptimizationLevel::O0))); |
1010 | |
1011 | return MPM; |
1012 | } |
1013 | |
1014 | ModulePassManager |
1015 | PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, |
1016 | ThinOrFullLTOPhase Phase) { |
1017 | assert(Level != OptimizationLevel::O0 && |
1018 | "Should not be used for O0 pipeline" ); |
1019 | |
1020 | assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink && |
1021 | "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!" ); |
1022 | |
1023 | ModulePassManager MPM; |
1024 | |
1025 | // Place pseudo probe instrumentation as the first pass of the pipeline to |
1026 | // minimize the impact of optimization changes. |
1027 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
1028 | Phase != ThinOrFullLTOPhase::ThinLTOPostLink) |
1029 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
1030 | |
1031 | bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); |
1032 | |
1033 | // In ThinLTO mode, when flattened profile is used, all the available |
1034 | // profile information will be annotated in PreLink phase so there is |
1035 | // no need to load the profile again in PostLink. |
1036 | bool LoadSampleProfile = |
1037 | HasSampleProfile && |
1038 | !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); |
1039 | |
1040 | // During the ThinLTO backend phase we perform early indirect call promotion |
1041 | // here, before globalopt. Otherwise imported available_externally functions |
1042 | // look unreferenced and are removed. If we are going to load the sample |
1043 | // profile then defer until later. |
1044 | // TODO: See if we can move later and consolidate with the location where |
1045 | // we perform ICP when we are loading a sample profile. |
1046 | // TODO: We pass HasSampleProfile (whether there was a sample profile file |
1047 | // passed to the compile) to the SamplePGO flag of ICP. This is used to |
1048 | // determine whether the new direct calls are annotated with prof metadata. |
1049 | // Ideally this should be determined from whether the IR is annotated with |
1050 | // sample profile, and not whether the a sample profile was provided on the |
1051 | // command line. E.g. for flattened profiles where we will not be reloading |
1052 | // the sample profile in the ThinLTO backend, we ideally shouldn't have to |
1053 | // provide the sample profile file. |
1054 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) |
1055 | MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); |
1056 | |
1057 | // Create an early function pass manager to cleanup the output of the |
1058 | // frontend. Not necessary with LTO post link pipelines since the pre link |
1059 | // pipeline already cleaned up the frontend output. |
1060 | if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) { |
1061 | // Do basic inference of function attributes from known properties of system |
1062 | // libraries and other oracles. |
1063 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
1064 | MPM.addPass(Pass: CoroEarlyPass()); |
1065 | |
1066 | FunctionPassManager EarlyFPM; |
1067 | // Lower llvm.expect to metadata before attempting transforms. |
1068 | // Compare/branch metadata may alter the behavior of passes like |
1069 | // SimplifyCFG. |
1070 | EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass()); |
1071 | EarlyFPM.addPass(Pass: SimplifyCFGPass()); |
1072 | EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
1073 | EarlyFPM.addPass(Pass: EarlyCSEPass()); |
1074 | if (Level == OptimizationLevel::O3) |
1075 | EarlyFPM.addPass(Pass: CallSiteSplittingPass()); |
1076 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
1077 | Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1078 | } |
1079 | |
1080 | if (LoadSampleProfile) { |
1081 | // Annotate sample profile right after early FPM to ensure freshness of |
1082 | // the debug info. |
1083 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
1084 | PGOOpt->ProfileRemappingFile, Phase)); |
1085 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
1086 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
1087 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
1088 | // Do not invoke ICP in the LTOPrelink phase as it makes it hard |
1089 | // for the profile annotation to be accurate in the LTO backend. |
1090 | if (!isLTOPreLink(Phase)) |
1091 | // We perform early indirect call promotion here, before globalopt. |
1092 | // This is important for the ThinLTO backend phase because otherwise |
1093 | // imported available_externally functions look unreferenced and are |
1094 | // removed. |
1095 | MPM.addPass( |
1096 | Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); |
1097 | } |
1098 | |
1099 | // Try to perform OpenMP specific optimizations on the module. This is a |
1100 | // (quick!) no-op if there are no OpenMP runtime calls present in the module. |
1101 | MPM.addPass(Pass: OpenMPOptPass()); |
1102 | |
1103 | if (AttributorRun & AttributorRunOption::MODULE) |
1104 | MPM.addPass(Pass: AttributorPass()); |
1105 | |
1106 | // Lower type metadata and the type.test intrinsic in the ThinLTO |
1107 | // post link pipeline after ICP. This is to enable usage of the type |
1108 | // tests in ICP sequences. |
1109 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) |
1110 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1111 | |
1112 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level); |
1113 | |
1114 | // Interprocedural constant propagation now that basic cleanup has occurred |
1115 | // and prior to optimizing globals. |
1116 | // FIXME: This position in the pipeline hasn't been carefully considered in |
1117 | // years, it should be re-analyzed. |
1118 | MPM.addPass(Pass: IPSCCPPass( |
1119 | IPSCCPOptions(/*AllowFuncSpec=*/ |
1120 | Level != OptimizationLevel::Os && |
1121 | Level != OptimizationLevel::Oz && |
1122 | !isLTOPreLink(Phase)))); |
1123 | |
1124 | // Attach metadata to indirect call sites indicating the set of functions |
1125 | // they may target at run-time. This should follow IPSCCP. |
1126 | MPM.addPass(Pass: CalledValuePropagationPass()); |
1127 | |
1128 | // Optimize globals to try and fold them into constants. |
1129 | MPM.addPass(Pass: GlobalOptPass()); |
1130 | |
1131 | // Create a small function pass pipeline to cleanup after all the global |
1132 | // optimizations. |
1133 | FunctionPassManager GlobalCleanupPM; |
1134 | // FIXME: Should this instead by a run of SROA? |
1135 | GlobalCleanupPM.addPass(Pass: PromotePass()); |
1136 | GlobalCleanupPM.addPass(Pass: InstCombinePass()); |
1137 | invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level); |
1138 | GlobalCleanupPM.addPass( |
1139 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
1140 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM), |
1141 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1142 | |
1143 | // Invoke the pre-inliner passes for instrumentation PGO or MemProf. |
1144 | if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && |
1145 | (PGOOpt->Action == PGOOptions::IRInstr || |
1146 | PGOOpt->Action == PGOOptions::IRUse || !PGOOpt->MemoryProfile.empty())) |
1147 | addPreInlinerPasses(MPM, Level, LTOPhase: Phase); |
1148 | |
1149 | // Add all the requested passes for instrumentation PGO, if requested. |
1150 | if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && |
1151 | (PGOOpt->Action == PGOOptions::IRInstr || |
1152 | PGOOpt->Action == PGOOptions::IRUse)) { |
1153 | addPGOInstrPasses(MPM, Level, |
1154 | /*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr, |
1155 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1156 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1157 | FS: PGOOpt->FS); |
1158 | MPM.addPass(Pass: PGOIndirectCallPromotion(false, false)); |
1159 | } |
1160 | if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && |
1161 | PGOOpt->CSAction == PGOOptions::CSIRInstr) |
1162 | MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); |
1163 | |
1164 | if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && |
1165 | !PGOOpt->MemoryProfile.empty()) |
1166 | MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); |
1167 | |
1168 | // Synthesize function entry counts for non-PGO compilation. |
1169 | if (EnableSyntheticCounts && !PGOOpt) |
1170 | MPM.addPass(Pass: SyntheticCountsPropagation()); |
1171 | |
1172 | if (EnablePGOForceFunctionAttrs && PGOOpt) |
1173 | MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); |
1174 | |
1175 | MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); |
1176 | |
1177 | if (EnableModuleInliner) |
1178 | MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase)); |
1179 | else |
1180 | MPM.addPass(Pass: buildInlinerPipeline(Level, Phase)); |
1181 | |
1182 | // Remove any dead arguments exposed by cleanups, constant folding globals, |
1183 | // and argument promotion. |
1184 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
1185 | |
1186 | MPM.addPass(Pass: CoroCleanupPass()); |
1187 | |
1188 | // Optimize globals now that functions are fully simplified. |
1189 | MPM.addPass(Pass: GlobalOptPass()); |
1190 | MPM.addPass(Pass: GlobalDCEPass()); |
1191 | |
1192 | return MPM; |
1193 | } |
1194 | |
1195 | /// TODO: Should LTO cause any differences to this set of passes? |
1196 | void PassBuilder::addVectorPasses(OptimizationLevel Level, |
1197 | FunctionPassManager &FPM, bool IsFullLTO) { |
1198 | FPM.addPass(Pass: LoopVectorizePass( |
1199 | LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); |
1200 | |
1201 | if (EnableInferAlignmentPass) |
1202 | FPM.addPass(Pass: InferAlignmentPass()); |
1203 | if (IsFullLTO) { |
1204 | // The vectorizer may have significantly shortened a loop body; unroll |
1205 | // again. Unroll small loops to hide loop backedge latency and saturate any |
1206 | // parallel execution resources of an out-of-order processor. We also then |
1207 | // need to clean up redundancies and loop invariant code. |
1208 | // FIXME: It would be really good to use a loop-integrated instruction |
1209 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
1210 | // across the loop nests. |
1211 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
1212 | if (EnableUnrollAndJam && PTO.LoopUnrolling) |
1213 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1214 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
1215 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
1216 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
1217 | PTO.ForgetAllSCEVInLoopUnroll))); |
1218 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
1219 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
1220 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
1221 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
1222 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
1223 | // or SimplifyCFG passes scheduled after us, that would cleanup |
1224 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
1225 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
1226 | } |
1227 | |
1228 | if (!IsFullLTO) { |
1229 | // Eliminate loads by forwarding stores from the previous iteration to loads |
1230 | // of the current iteration. |
1231 | FPM.addPass(Pass: LoopLoadEliminationPass()); |
1232 | } |
1233 | // Cleanup after the loop optimization passes. |
1234 | FPM.addPass(Pass: InstCombinePass()); |
1235 | |
1236 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
1237 | ExtraVectorPassManager ; |
1238 | // At higher optimization levels, try to clean up any runtime overlap and |
1239 | // alignment checks inserted by the vectorizer. We want to track correlated |
1240 | // runtime checks for two inner loops in the same outer loop, fold any |
1241 | // common computations, hoist loop-invariant aspects out of any outer loop, |
1242 | // and unswitch the runtime checks if possible. Once hoisted, we may have |
1243 | // dead (or speculatable) control flows or more combining opportunities. |
1244 | ExtraPasses.addPass(Pass: EarlyCSEPass()); |
1245 | ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass()); |
1246 | ExtraPasses.addPass(Pass: InstCombinePass()); |
1247 | LoopPassManager LPM; |
1248 | LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1249 | /*AllowSpeculation=*/true)); |
1250 | LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
1251 | OptimizationLevel::O3)); |
1252 | ExtraPasses.addPass( |
1253 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true, |
1254 | /*UseBlockFrequencyInfo=*/true)); |
1255 | ExtraPasses.addPass( |
1256 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
1257 | ExtraPasses.addPass(Pass: InstCombinePass()); |
1258 | FPM.addPass(Pass: std::move(ExtraPasses)); |
1259 | } |
1260 | |
1261 | // Now that we've formed fast to execute loop structures, we do further |
1262 | // optimizations. These are run afterward as they might block doing complex |
1263 | // analyses and transforms such as what are needed for loop vectorization. |
1264 | |
1265 | // Cleanup after loop vectorization, etc. Simplification passes like CVP and |
1266 | // GVN, loop transforms, and others have already run, so it's now better to |
1267 | // convert to more optimized IR using more aggressive simplify CFG options. |
1268 | // The extra sinking transform can create larger basic blocks, so do this |
1269 | // before SLP vectorization. |
1270 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
1271 | .forwardSwitchCondToPhi(B: true) |
1272 | .convertSwitchRangeToICmp(B: true) |
1273 | .convertSwitchToLookupTable(B: true) |
1274 | .needCanonicalLoops(B: false) |
1275 | .hoistCommonInsts(B: true) |
1276 | .sinkCommonInsts(B: true))); |
1277 | |
1278 | if (IsFullLTO) { |
1279 | FPM.addPass(Pass: SCCPPass()); |
1280 | FPM.addPass(Pass: InstCombinePass()); |
1281 | FPM.addPass(Pass: BDCEPass()); |
1282 | } |
1283 | |
1284 | // Optimize parallel scalar instruction chains into SIMD instructions. |
1285 | if (PTO.SLPVectorization) { |
1286 | FPM.addPass(Pass: SLPVectorizerPass()); |
1287 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
1288 | FPM.addPass(Pass: EarlyCSEPass()); |
1289 | } |
1290 | } |
1291 | // Enhance/cleanup vector code. |
1292 | FPM.addPass(Pass: VectorCombinePass()); |
1293 | |
1294 | if (!IsFullLTO) { |
1295 | FPM.addPass(Pass: InstCombinePass()); |
1296 | // Unroll small loops to hide loop backedge latency and saturate any |
1297 | // parallel execution resources of an out-of-order processor. We also then |
1298 | // need to clean up redundancies and loop invariant code. |
1299 | // FIXME: It would be really good to use a loop-integrated instruction |
1300 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
1301 | // across the loop nests. |
1302 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
1303 | if (EnableUnrollAndJam && PTO.LoopUnrolling) { |
1304 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1305 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
1306 | } |
1307 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
1308 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
1309 | PTO.ForgetAllSCEVInLoopUnroll))); |
1310 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
1311 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
1312 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
1313 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
1314 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
1315 | // or SimplifyCFG passes scheduled after us, that would cleanup |
1316 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
1317 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
1318 | } |
1319 | |
1320 | if (EnableInferAlignmentPass) |
1321 | FPM.addPass(Pass: InferAlignmentPass()); |
1322 | FPM.addPass(Pass: InstCombinePass()); |
1323 | |
1324 | // This is needed for two reasons: |
1325 | // 1. It works around problems that instcombine introduces, such as sinking |
1326 | // expensive FP divides into loops containing multiplications using the |
1327 | // divide result. |
1328 | // 2. It helps to clean up some loop-invariant code created by the loop |
1329 | // unroll pass when IsFullLTO=false. |
1330 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1331 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1332 | /*AllowSpeculation=*/true), |
1333 | /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); |
1334 | |
1335 | // Now that we've vectorized and unrolled loops, we may have more refined |
1336 | // alignment information, try to re-derive it here. |
1337 | FPM.addPass(Pass: AlignmentFromAssumptionsPass()); |
1338 | } |
1339 | |
1340 | ModulePassManager |
1341 | PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, |
1342 | ThinOrFullLTOPhase LTOPhase) { |
1343 | const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase); |
1344 | ModulePassManager MPM; |
1345 | |
1346 | // Run partial inlining pass to partially inline functions that have |
1347 | // large bodies. |
1348 | if (RunPartialInlining) |
1349 | MPM.addPass(Pass: PartialInlinerPass()); |
1350 | |
1351 | // Remove avail extern fns and globals definitions since we aren't compiling |
1352 | // an object file for later LTO. For LTO we want to preserve these so they |
1353 | // are eligible for inlining at link-time. Note if they are unreferenced they |
1354 | // will be removed by GlobalDCE later, so this only impacts referenced |
1355 | // available externally globals. Eventually they will be suppressed during |
1356 | // codegen, but eliminating here enables more opportunity for GlobalDCE as it |
1357 | // may make globals referenced by available external functions dead and saves |
1358 | // running remaining passes on the eliminated functions. These should be |
1359 | // preserved during prelinking for link-time inlining decisions. |
1360 | if (!LTOPreLink) |
1361 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
1362 | |
1363 | if (EnableOrderFileInstrumentation) |
1364 | MPM.addPass(Pass: InstrOrderFilePass()); |
1365 | |
1366 | // Do RPO function attribute inference across the module to forward-propagate |
1367 | // attributes where applicable. |
1368 | // FIXME: Is this really an optimization rather than a canonicalization? |
1369 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
1370 | |
1371 | // Do a post inline PGO instrumentation and use pass. This is a context |
1372 | // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as |
1373 | // cross-module inline has not been done yet. The context sensitive |
1374 | // instrumentation is after all the inlines are done. |
1375 | if (!LTOPreLink && PGOOpt) { |
1376 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
1377 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
1378 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1379 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1380 | FS: PGOOpt->FS); |
1381 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
1382 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
1383 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1384 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1385 | FS: PGOOpt->FS); |
1386 | } |
1387 | |
1388 | // Re-compute GlobalsAA here prior to function passes. This is particularly |
1389 | // useful as the above will have inlined, DCE'ed, and function-attr |
1390 | // propagated everything. We should at this point have a reasonably minimal |
1391 | // and richly annotated call graph. By computing aliasing and mod/ref |
1392 | // information for all local globals here, the late loop passes and notably |
1393 | // the vectorizer will be able to use them to help recognize vectorizable |
1394 | // memory operations. |
1395 | if (EnableGlobalAnalyses) |
1396 | MPM.addPass(Pass: RecomputeGlobalsAAPass()); |
1397 | |
1398 | invokeOptimizerEarlyEPCallbacks(MPM, Level); |
1399 | |
1400 | FunctionPassManager OptimizePM; |
1401 | // Scheduling LoopVersioningLICM when inlining is over, because after that |
1402 | // we may see more accurate aliasing. Reason to run this late is that too |
1403 | // early versioning may prevent further inlining due to increase of code |
1404 | // size. Other optimizations which runs later might get benefit of no-alias |
1405 | // assumption in clone loop. |
1406 | if (UseLoopVersioningLICM) { |
1407 | OptimizePM.addPass( |
1408 | Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass())); |
1409 | // LoopVersioningLICM pass might increase new LICM opportunities. |
1410 | OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1411 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1412 | /*AllowSpeculation=*/true), |
1413 | /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false)); |
1414 | } |
1415 | |
1416 | OptimizePM.addPass(Pass: Float2IntPass()); |
1417 | OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass()); |
1418 | |
1419 | if (EnableMatrix) { |
1420 | OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass()); |
1421 | OptimizePM.addPass(Pass: EarlyCSEPass()); |
1422 | } |
1423 | |
1424 | // CHR pass should only be applied with the profile information. |
1425 | // The check is to check the profile summary information in CHR. |
1426 | if (EnableCHR && Level == OptimizationLevel::O3) |
1427 | OptimizePM.addPass(Pass: ControlHeightReductionPass()); |
1428 | |
1429 | // FIXME: We need to run some loop optimizations to re-rotate loops after |
1430 | // simplifycfg and others undo their rotation. |
1431 | |
1432 | // Optimize the loop execution. These passes operate on entire loop nests |
1433 | // rather than on each loop in an inside-out manner, and so they are actually |
1434 | // function passes. |
1435 | |
1436 | invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level); |
1437 | |
1438 | LoopPassManager LPM; |
1439 | // First rotate loops that may have been un-rotated by prior passes. |
1440 | // Disable header duplication at -Oz. |
1441 | LPM.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
1442 | Level != OptimizationLevel::Oz, |
1443 | LTOPreLink)); |
1444 | // Some loops may have become dead by now. Try to delete them. |
1445 | // FIXME: see discussion in https://reviews.llvm.org/D112851, |
1446 | // this may need to be revisited once we run GVN before loop deletion |
1447 | // in the simplification pipeline. |
1448 | LPM.addPass(Pass: LoopDeletionPass()); |
1449 | OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1450 | Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); |
1451 | |
1452 | // Distribute loops to allow partial vectorization. I.e. isolate dependences |
1453 | // into separate loop that would otherwise inhibit vectorization. This is |
1454 | // currently only performed for loops marked with the metadata |
1455 | // llvm.loop.distribute=true or when -enable-loop-distribute is specified. |
1456 | OptimizePM.addPass(Pass: LoopDistributePass()); |
1457 | |
1458 | // Populates the VFABI attribute with the scalar-to-vector mappings |
1459 | // from the TargetLibraryInfo. |
1460 | OptimizePM.addPass(Pass: InjectTLIMappings()); |
1461 | |
1462 | addVectorPasses(Level, FPM&: OptimizePM, /* IsFullLTO */ false); |
1463 | |
1464 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
1465 | // canonicalization pass that enables other optimizations. As a result, |
1466 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
1467 | // result too early. |
1468 | OptimizePM.addPass(Pass: LoopSinkPass()); |
1469 | |
1470 | // And finally clean up LCSSA form before generating code. |
1471 | OptimizePM.addPass(Pass: InstSimplifyPass()); |
1472 | |
1473 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
1474 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
1475 | // flattening of blocks. |
1476 | OptimizePM.addPass(Pass: DivRemPairsPass()); |
1477 | |
1478 | // Try to annotate calls that were created during optimization. |
1479 | OptimizePM.addPass(Pass: TailCallElimPass()); |
1480 | |
1481 | // LoopSink (and other loop passes since the last simplifyCFG) might have |
1482 | // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. |
1483 | OptimizePM.addPass( |
1484 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
1485 | |
1486 | // Add the core optimizing pipeline. |
1487 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM), |
1488 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1489 | |
1490 | invokeOptimizerLastEPCallbacks(MPM, Level); |
1491 | |
1492 | // Split out cold code. Splitting is done late to avoid hiding context from |
1493 | // other optimizations and inadvertently regressing performance. The tradeoff |
1494 | // is that this has a higher code size cost than splitting early. |
1495 | if (EnableHotColdSplit && !LTOPreLink) |
1496 | MPM.addPass(Pass: HotColdSplittingPass()); |
1497 | |
1498 | // Search the code for similar regions of code. If enough similar regions can |
1499 | // be found where extracting the regions into their own function will decrease |
1500 | // the size of the program, we extract the regions, a deduplicate the |
1501 | // structurally similar regions. |
1502 | if (EnableIROutliner) |
1503 | MPM.addPass(Pass: IROutlinerPass()); |
1504 | |
1505 | // Merge functions if requested. |
1506 | if (PTO.MergeFunctions) |
1507 | MPM.addPass(Pass: MergeFunctionsPass()); |
1508 | |
1509 | // Now we need to do some global optimization transforms. |
1510 | // FIXME: It would seem like these should come first in the optimization |
1511 | // pipeline and maybe be the bottom of the canonicalization pipeline? Weird |
1512 | // ordering here. |
1513 | MPM.addPass(Pass: GlobalDCEPass()); |
1514 | MPM.addPass(Pass: ConstantMergePass()); |
1515 | |
1516 | if (PTO.CallGraphProfile && !LTOPreLink) |
1517 | MPM.addPass(Pass: CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || |
1518 | LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink)); |
1519 | |
1520 | // TODO: Relative look table converter pass caused an issue when full lto is |
1521 | // enabled. See https://reviews.llvm.org/D94355 for more details. |
1522 | // Until the issue fixed, disable this pass during pre-linking phase. |
1523 | if (!LTOPreLink) |
1524 | MPM.addPass(Pass: RelLookupTableConverterPass()); |
1525 | |
1526 | return MPM; |
1527 | } |
1528 | |
1529 | ModulePassManager |
1530 | PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, |
1531 | bool LTOPreLink) { |
1532 | if (Level == OptimizationLevel::O0) |
1533 | return buildO0DefaultPipeline(Level, LTOPreLink); |
1534 | |
1535 | ModulePassManager MPM; |
1536 | |
1537 | // Convert @llvm.global.annotations to !annotation metadata. |
1538 | MPM.addPass(Pass: Annotation2MetadataPass()); |
1539 | |
1540 | // Force any function attributes we want the rest of the pipeline to observe. |
1541 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
1542 | |
1543 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
1544 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
1545 | |
1546 | // Apply module pipeline start EP callback. |
1547 | invokePipelineStartEPCallbacks(MPM, Level); |
1548 | |
1549 | const ThinOrFullLTOPhase LTOPhase = LTOPreLink |
1550 | ? ThinOrFullLTOPhase::FullLTOPreLink |
1551 | : ThinOrFullLTOPhase::None; |
1552 | // Add the core simplification pipeline. |
1553 | MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase: LTOPhase)); |
1554 | |
1555 | // Now add the optimization pipeline. |
1556 | MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase)); |
1557 | |
1558 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
1559 | PGOOpt->Action == PGOOptions::SampleUse) |
1560 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
1561 | |
1562 | // Emit annotation remarks. |
1563 | addAnnotationRemarksPass(MPM); |
1564 | |
1565 | if (LTOPreLink) |
1566 | addRequiredLTOPreLinkPasses(MPM); |
1567 | return MPM; |
1568 | } |
1569 | |
1570 | ModulePassManager |
1571 | PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, |
1572 | bool EmitSummary) { |
1573 | ModulePassManager MPM; |
1574 | if (ThinLTO) |
1575 | MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level)); |
1576 | else |
1577 | MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level)); |
1578 | MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary)); |
1579 | |
1580 | // Use the ThinLTO post-link pipeline with sample profiling |
1581 | if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
1582 | MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr)); |
1583 | else { |
1584 | // otherwise, just use module optimization |
1585 | MPM.addPass( |
1586 | Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None)); |
1587 | // Emit annotation remarks. |
1588 | addAnnotationRemarksPass(MPM); |
1589 | } |
1590 | return MPM; |
1591 | } |
1592 | |
1593 | ModulePassManager |
1594 | PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
1595 | if (Level == OptimizationLevel::O0) |
1596 | return buildO0DefaultPipeline(Level, /*LTOPreLink*/true); |
1597 | |
1598 | ModulePassManager MPM; |
1599 | |
1600 | // Convert @llvm.global.annotations to !annotation metadata. |
1601 | MPM.addPass(Pass: Annotation2MetadataPass()); |
1602 | |
1603 | // Force any function attributes we want the rest of the pipeline to observe. |
1604 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
1605 | |
1606 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
1607 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
1608 | |
1609 | // Apply module pipeline start EP callback. |
1610 | invokePipelineStartEPCallbacks(MPM, Level); |
1611 | |
1612 | // If we are planning to perform ThinLTO later, we don't bloat the code with |
1613 | // unrolling/vectorization/... now. Just simplify the module as much as we |
1614 | // can. |
1615 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
1616 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink)); |
1617 | |
1618 | // Run partial inlining pass to partially inline functions that have |
1619 | // large bodies. |
1620 | // FIXME: It isn't clear whether this is really the right place to run this |
1621 | // in ThinLTO. Because there is another canonicalization and simplification |
1622 | // phase that will run after the thin link, running this here ends up with |
1623 | // less information than will be available later and it may grow functions in |
1624 | // ways that aren't beneficial. |
1625 | if (RunPartialInlining) |
1626 | MPM.addPass(Pass: PartialInlinerPass()); |
1627 | |
1628 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
1629 | PGOOpt->Action == PGOOptions::SampleUse) |
1630 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
1631 | |
1632 | // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual |
1633 | // optimization is going to be done in PostLink stage, but clang can't add |
1634 | // callbacks there in case of in-process ThinLTO called by linker. |
1635 | invokeOptimizerEarlyEPCallbacks(MPM, Level); |
1636 | invokeOptimizerLastEPCallbacks(MPM, Level); |
1637 | |
1638 | // Emit annotation remarks. |
1639 | addAnnotationRemarksPass(MPM); |
1640 | |
1641 | addRequiredLTOPreLinkPasses(MPM); |
1642 | |
1643 | return MPM; |
1644 | } |
1645 | |
1646 | ModulePassManager PassBuilder::buildThinLTODefaultPipeline( |
1647 | OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { |
1648 | ModulePassManager MPM; |
1649 | |
1650 | if (ImportSummary) { |
1651 | // For ThinLTO we must apply the context disambiguation decisions early, to |
1652 | // ensure we can correctly match the callsites to summary data. |
1653 | if (EnableMemProfContextDisambiguation) |
1654 | MPM.addPass(Pass: MemProfContextDisambiguation(ImportSummary)); |
1655 | |
1656 | // These passes import type identifier resolutions for whole-program |
1657 | // devirtualization and CFI. They must run early because other passes may |
1658 | // disturb the specific instruction patterns that these passes look for, |
1659 | // creating dependencies on resolutions that may not appear in the summary. |
1660 | // |
1661 | // For example, GVN may transform the pattern assume(type.test) appearing in |
1662 | // two basic blocks into assume(phi(type.test, type.test)), which would |
1663 | // transform a dependency on a WPD resolution into a dependency on a type |
1664 | // identifier resolution for CFI. |
1665 | // |
1666 | // Also, WPD has access to more precise information than ICP and can |
1667 | // devirtualize more effectively, so it should operate on the IR first. |
1668 | // |
1669 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
1670 | // metadata and intrinsics. |
1671 | MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary)); |
1672 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary)); |
1673 | } |
1674 | |
1675 | if (Level == OptimizationLevel::O0) { |
1676 | // Run a second time to clean up any type tests left behind by WPD for use |
1677 | // in ICP. |
1678 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1679 | // Drop available_externally and unreferenced globals. This is necessary |
1680 | // with ThinLTO in order to avoid leaving undefined references to dead |
1681 | // globals in the object file. |
1682 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
1683 | MPM.addPass(Pass: GlobalDCEPass()); |
1684 | return MPM; |
1685 | } |
1686 | |
1687 | // Add the core simplification pipeline. |
1688 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
1689 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
1690 | |
1691 | // Now add the optimization pipeline. |
1692 | MPM.addPass(Pass: buildModuleOptimizationPipeline( |
1693 | Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
1694 | |
1695 | // Emit annotation remarks. |
1696 | addAnnotationRemarksPass(MPM); |
1697 | |
1698 | return MPM; |
1699 | } |
1700 | |
1701 | ModulePassManager |
1702 | PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
1703 | // FIXME: We should use a customized pre-link pipeline! |
1704 | return buildPerModuleDefaultPipeline(Level, |
1705 | /* LTOPreLink */ true); |
1706 | } |
1707 | |
1708 | ModulePassManager |
1709 | PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, |
1710 | ModuleSummaryIndex *ExportSummary) { |
1711 | ModulePassManager MPM; |
1712 | |
1713 | invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level); |
1714 | |
1715 | // Create a function that performs CFI checks for cross-DSO calls with targets |
1716 | // in the current module. |
1717 | MPM.addPass(Pass: CrossDSOCFIPass()); |
1718 | |
1719 | if (Level == OptimizationLevel::O0) { |
1720 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
1721 | // metadata and intrinsics. |
1722 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
1723 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
1724 | // Run a second time to clean up any type tests left behind by WPD for use |
1725 | // in ICP. |
1726 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1727 | |
1728 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
1729 | |
1730 | // Emit annotation remarks. |
1731 | addAnnotationRemarksPass(MPM); |
1732 | |
1733 | return MPM; |
1734 | } |
1735 | |
1736 | if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { |
1737 | // Load sample profile before running the LTO optimization pipeline. |
1738 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
1739 | PGOOpt->ProfileRemappingFile, |
1740 | ThinOrFullLTOPhase::FullLTOPostLink)); |
1741 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
1742 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
1743 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
1744 | } |
1745 | |
1746 | // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. |
1747 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
1748 | |
1749 | // Remove unused virtual tables to improve the quality of code generated by |
1750 | // whole-program devirtualization and bitset lowering. |
1751 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
1752 | |
1753 | // Do basic inference of function attributes from known properties of system |
1754 | // libraries and other oracles. |
1755 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
1756 | |
1757 | if (Level.getSpeedupLevel() > 1) { |
1758 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
1759 | Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1760 | |
1761 | // Indirect call promotion. This should promote all the targets that are |
1762 | // left by the earlier promotion pass that promotes intra-module targets. |
1763 | // This two-step promotion is to save the compile time. For LTO, it should |
1764 | // produce the same result as if we only do promotion here. |
1765 | MPM.addPass(Pass: PGOIndirectCallPromotion( |
1766 | true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
1767 | |
1768 | // Propagate constants at call sites into the functions they call. This |
1769 | // opens opportunities for globalopt (and inlining) by substituting function |
1770 | // pointers passed as arguments to direct uses of functions. |
1771 | MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ |
1772 | Level != OptimizationLevel::Os && |
1773 | Level != OptimizationLevel::Oz))); |
1774 | |
1775 | // Attach metadata to indirect call sites indicating the set of functions |
1776 | // they may target at run-time. This should follow IPSCCP. |
1777 | MPM.addPass(Pass: CalledValuePropagationPass()); |
1778 | } |
1779 | |
1780 | // Now deduce any function attributes based in the current code. |
1781 | MPM.addPass( |
1782 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass())); |
1783 | |
1784 | // Do RPO function attribute inference across the module to forward-propagate |
1785 | // attributes where applicable. |
1786 | // FIXME: Is this really an optimization rather than a canonicalization? |
1787 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
1788 | |
1789 | // Use in-range annotations on GEP indices to split globals where beneficial. |
1790 | MPM.addPass(Pass: GlobalSplitPass()); |
1791 | |
1792 | // Run whole program optimization of virtual call when the list of callees |
1793 | // is fixed. |
1794 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
1795 | |
1796 | // Stop here at -O1. |
1797 | if (Level == OptimizationLevel::O1) { |
1798 | // The LowerTypeTestsPass needs to run to lower type metadata and the |
1799 | // type.test intrinsics. The pass does nothing if CFI is disabled. |
1800 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
1801 | // Run a second time to clean up any type tests left behind by WPD for use |
1802 | // in ICP (which is performed earlier than this in the regular LTO |
1803 | // pipeline). |
1804 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1805 | |
1806 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
1807 | |
1808 | // Emit annotation remarks. |
1809 | addAnnotationRemarksPass(MPM); |
1810 | |
1811 | return MPM; |
1812 | } |
1813 | |
1814 | // Optimize globals to try and fold them into constants. |
1815 | MPM.addPass(Pass: GlobalOptPass()); |
1816 | |
1817 | // Promote any localized globals to SSA registers. |
1818 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass())); |
1819 | |
1820 | // Linking modules together can lead to duplicate global constant, only |
1821 | // keep one copy of each constant. |
1822 | MPM.addPass(Pass: ConstantMergePass()); |
1823 | |
1824 | // Remove unused arguments from functions. |
1825 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
1826 | |
1827 | // Reduce the code after globalopt and ipsccp. Both can open up significant |
1828 | // simplification opportunities, and both can propagate functions through |
1829 | // function pointers. When this happens, we often have to resolve varargs |
1830 | // calls, etc, so let instcombine do this. |
1831 | FunctionPassManager PeepholeFPM; |
1832 | PeepholeFPM.addPass(Pass: InstCombinePass()); |
1833 | if (Level.getSpeedupLevel() > 1) |
1834 | PeepholeFPM.addPass(Pass: AggressiveInstCombinePass()); |
1835 | invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level); |
1836 | |
1837 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM), |
1838 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1839 | |
1840 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
1841 | // generally clean up exception handling overhead. It isn't clear this is |
1842 | // valuable as the inliner doesn't currently care whether it is inlining an |
1843 | // invoke or a call. |
1844 | // Run the inliner now. |
1845 | if (EnableModuleInliner) { |
1846 | MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level), |
1847 | UseInlineAdvisor, |
1848 | ThinOrFullLTOPhase::FullLTOPostLink)); |
1849 | } else { |
1850 | MPM.addPass(Pass: ModuleInlinerWrapperPass( |
1851 | getInlineParamsFromOptLevel(Level), |
1852 | /* MandatoryFirst */ true, |
1853 | InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink, |
1854 | .Pass: InlinePass::CGSCCInliner})); |
1855 | } |
1856 | |
1857 | // Perform context disambiguation after inlining, since that would reduce the |
1858 | // amount of additional cloning required to distinguish the allocation |
1859 | // contexts. |
1860 | if (EnableMemProfContextDisambiguation) |
1861 | MPM.addPass(Pass: MemProfContextDisambiguation()); |
1862 | |
1863 | // Optimize globals again after we ran the inliner. |
1864 | MPM.addPass(Pass: GlobalOptPass()); |
1865 | |
1866 | // Run the OpenMPOpt pass again after global optimizations. |
1867 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
1868 | |
1869 | // Garbage collect dead functions. |
1870 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
1871 | |
1872 | // If we didn't decide to inline a function, check to see if we can |
1873 | // transform it to pass arguments by value instead of by reference. |
1874 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: ArgumentPromotionPass())); |
1875 | |
1876 | FunctionPassManager FPM; |
1877 | // The IPO Passes may leave cruft around. Clean up after them. |
1878 | FPM.addPass(Pass: InstCombinePass()); |
1879 | invokePeepholeEPCallbacks(FPM, Level); |
1880 | |
1881 | if (EnableConstraintElimination) |
1882 | FPM.addPass(Pass: ConstraintEliminationPass()); |
1883 | |
1884 | FPM.addPass(Pass: JumpThreadingPass()); |
1885 | |
1886 | // Do a post inline PGO instrumentation and use pass. This is a context |
1887 | // sensitive PGO pass. |
1888 | if (PGOOpt) { |
1889 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
1890 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
1891 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1892 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1893 | FS: PGOOpt->FS); |
1894 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
1895 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
1896 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1897 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1898 | FS: PGOOpt->FS); |
1899 | } |
1900 | |
1901 | // Break up allocas |
1902 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
1903 | |
1904 | // LTO provides additional opportunities for tailcall elimination due to |
1905 | // link-time inlining, and visibility of nocapture attribute. |
1906 | FPM.addPass(Pass: TailCallElimPass()); |
1907 | |
1908 | // Run a few AA driver optimizations here and now to cleanup the code. |
1909 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM), |
1910 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1911 | |
1912 | MPM.addPass( |
1913 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass())); |
1914 | |
1915 | // Require the GlobalsAA analysis for the module so we can query it within |
1916 | // MainFPM. |
1917 | if (EnableGlobalAnalyses) { |
1918 | MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
1919 | // Invalidate AAManager so it can be recreated and pick up the newly |
1920 | // available GlobalsAA. |
1921 | MPM.addPass( |
1922 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
1923 | } |
1924 | |
1925 | FunctionPassManager MainFPM; |
1926 | MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1927 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1928 | /*AllowSpeculation=*/true), |
1929 | /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false)); |
1930 | |
1931 | if (RunNewGVN) |
1932 | MainFPM.addPass(Pass: NewGVNPass()); |
1933 | else |
1934 | MainFPM.addPass(Pass: GVNPass()); |
1935 | |
1936 | // Remove dead memcpy()'s. |
1937 | MainFPM.addPass(Pass: MemCpyOptPass()); |
1938 | |
1939 | // Nuke dead stores. |
1940 | MainFPM.addPass(Pass: DSEPass()); |
1941 | MainFPM.addPass(Pass: MoveAutoInitPass()); |
1942 | MainFPM.addPass(Pass: MergedLoadStoreMotionPass()); |
1943 | |
1944 | LoopPassManager LPM; |
1945 | if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) |
1946 | LPM.addPass(Pass: LoopFlattenPass()); |
1947 | LPM.addPass(Pass: IndVarSimplifyPass()); |
1948 | LPM.addPass(Pass: LoopDeletionPass()); |
1949 | // FIXME: Add loop interchange. |
1950 | |
1951 | // Unroll small loops and perform peeling. |
1952 | LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
1953 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
1954 | PTO.ForgetAllSCEVInLoopUnroll)); |
1955 | // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. |
1956 | // *All* loop passes must preserve it, in order to be able to use it. |
1957 | MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1958 | Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); |
1959 | |
1960 | MainFPM.addPass(Pass: LoopDistributePass()); |
1961 | |
1962 | addVectorPasses(Level, FPM&: MainFPM, /* IsFullLTO */ true); |
1963 | |
1964 | // Run the OpenMPOpt CGSCC pass again late. |
1965 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
1966 | Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink))); |
1967 | |
1968 | invokePeepholeEPCallbacks(FPM&: MainFPM, Level); |
1969 | MainFPM.addPass(Pass: JumpThreadingPass()); |
1970 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM), |
1971 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1972 | |
1973 | // Lower type metadata and the type.test intrinsic. This pass supports |
1974 | // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs |
1975 | // to be run at link time if CFI is enabled. This pass does nothing if |
1976 | // CFI is disabled. |
1977 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
1978 | // Run a second time to clean up any type tests left behind by WPD for use |
1979 | // in ICP (which is performed earlier than this in the regular LTO pipeline). |
1980 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1981 | |
1982 | // Enable splitting late in the FullLTO post-link pipeline. |
1983 | if (EnableHotColdSplit) |
1984 | MPM.addPass(Pass: HotColdSplittingPass()); |
1985 | |
1986 | // Add late LTO optimization passes. |
1987 | FunctionPassManager LateFPM; |
1988 | |
1989 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
1990 | // canonicalization pass that enables other optimizations. As a result, |
1991 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
1992 | // result too early. |
1993 | LateFPM.addPass(Pass: LoopSinkPass()); |
1994 | |
1995 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
1996 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
1997 | // flattening of blocks. |
1998 | LateFPM.addPass(Pass: DivRemPairsPass()); |
1999 | |
2000 | // Delete basic blocks, which optimization passes may have killed. |
2001 | LateFPM.addPass(Pass: SimplifyCFGPass( |
2002 | SimplifyCFGOptions().convertSwitchRangeToICmp(B: true).hoistCommonInsts( |
2003 | B: true))); |
2004 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM))); |
2005 | |
2006 | // Drop bodies of available eternally objects to improve GlobalDCE. |
2007 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
2008 | |
2009 | // Now that we have optimized the program, discard unreachable functions. |
2010 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
2011 | |
2012 | if (PTO.MergeFunctions) |
2013 | MPM.addPass(Pass: MergeFunctionsPass()); |
2014 | |
2015 | if (PTO.CallGraphProfile) |
2016 | MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true)); |
2017 | |
2018 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
2019 | |
2020 | // Emit annotation remarks. |
2021 | addAnnotationRemarksPass(MPM); |
2022 | |
2023 | return MPM; |
2024 | } |
2025 | |
2026 | ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, |
2027 | bool LTOPreLink) { |
2028 | assert(Level == OptimizationLevel::O0 && |
2029 | "buildO0DefaultPipeline should only be used with O0" ); |
2030 | |
2031 | ModulePassManager MPM; |
2032 | |
2033 | // Perform pseudo probe instrumentation in O0 mode. This is for the |
2034 | // consistency between different build modes. For example, a LTO build can be |
2035 | // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in |
2036 | // the postlink will require pseudo probe instrumentation in the prelink. |
2037 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling) |
2038 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
2039 | |
2040 | if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || |
2041 | PGOOpt->Action == PGOOptions::IRUse)) |
2042 | addPGOInstrPassesForO0( |
2043 | MPM, |
2044 | /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr), |
2045 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile, |
2046 | ProfileRemappingFile: PGOOpt->ProfileRemappingFile, FS: PGOOpt->FS); |
2047 | |
2048 | invokePipelineStartEPCallbacks(MPM, Level); |
2049 | |
2050 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
2051 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
2052 | |
2053 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level); |
2054 | |
2055 | // Build a minimal pipeline based on the semantics required by LLVM, |
2056 | // which is just that always inlining occurs. Further, disable generating |
2057 | // lifetime intrinsics to avoid enabling further optimizations during |
2058 | // code generation. |
2059 | MPM.addPass(Pass: AlwaysInlinerPass( |
2060 | /*InsertLifetimeIntrinsics=*/false)); |
2061 | |
2062 | if (PTO.MergeFunctions) |
2063 | MPM.addPass(Pass: MergeFunctionsPass()); |
2064 | |
2065 | if (EnableMatrix) |
2066 | MPM.addPass( |
2067 | Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true))); |
2068 | |
2069 | if (!CGSCCOptimizerLateEPCallbacks.empty()) { |
2070 | CGSCCPassManager CGPM; |
2071 | invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level); |
2072 | if (!CGPM.isEmpty()) |
2073 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
2074 | } |
2075 | if (!LateLoopOptimizationsEPCallbacks.empty()) { |
2076 | LoopPassManager LPM; |
2077 | invokeLateLoopOptimizationsEPCallbacks(LPM, Level); |
2078 | if (!LPM.isEmpty()) { |
2079 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
2080 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
2081 | } |
2082 | } |
2083 | if (!LoopOptimizerEndEPCallbacks.empty()) { |
2084 | LoopPassManager LPM; |
2085 | invokeLoopOptimizerEndEPCallbacks(LPM, Level); |
2086 | if (!LPM.isEmpty()) { |
2087 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
2088 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
2089 | } |
2090 | } |
2091 | if (!ScalarOptimizerLateEPCallbacks.empty()) { |
2092 | FunctionPassManager FPM; |
2093 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
2094 | if (!FPM.isEmpty()) |
2095 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
2096 | } |
2097 | |
2098 | invokeOptimizerEarlyEPCallbacks(MPM, Level); |
2099 | |
2100 | if (!VectorizerStartEPCallbacks.empty()) { |
2101 | FunctionPassManager FPM; |
2102 | invokeVectorizerStartEPCallbacks(FPM, Level); |
2103 | if (!FPM.isEmpty()) |
2104 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
2105 | } |
2106 | |
2107 | ModulePassManager CoroPM; |
2108 | CoroPM.addPass(Pass: CoroEarlyPass()); |
2109 | CGSCCPassManager CGPM; |
2110 | CGPM.addPass(Pass: CoroSplitPass()); |
2111 | CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
2112 | CoroPM.addPass(Pass: CoroCleanupPass()); |
2113 | CoroPM.addPass(Pass: GlobalDCEPass()); |
2114 | MPM.addPass(Pass: CoroConditionalWrapper(std::move(CoroPM))); |
2115 | |
2116 | invokeOptimizerLastEPCallbacks(MPM, Level); |
2117 | |
2118 | if (LTOPreLink) |
2119 | addRequiredLTOPreLinkPasses(MPM); |
2120 | |
2121 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass())); |
2122 | |
2123 | return MPM; |
2124 | } |
2125 | |
2126 | AAManager PassBuilder::buildDefaultAAPipeline() { |
2127 | AAManager AA; |
2128 | |
2129 | // The order in which these are registered determines their priority when |
2130 | // being queried. |
2131 | |
2132 | // First we register the basic alias analysis that provides the majority of |
2133 | // per-function local AA logic. This is a stateless, on-demand local set of |
2134 | // AA techniques. |
2135 | AA.registerFunctionAnalysis<BasicAA>(); |
2136 | |
2137 | // Next we query fast, specialized alias analyses that wrap IR-embedded |
2138 | // information about aliasing. |
2139 | AA.registerFunctionAnalysis<ScopedNoAliasAA>(); |
2140 | AA.registerFunctionAnalysis<TypeBasedAA>(); |
2141 | |
2142 | // Add support for querying global aliasing information when available. |
2143 | // Because the `AAManager` is a function analysis and `GlobalsAA` is a module |
2144 | // analysis, all that the `AAManager` can do is query for any *cached* |
2145 | // results from `GlobalsAA` through a readonly proxy. |
2146 | if (EnableGlobalAnalyses) |
2147 | AA.registerModuleAnalysis<GlobalsAA>(); |
2148 | |
2149 | // Add target-specific alias analyses. |
2150 | if (TM) |
2151 | TM->registerDefaultAliasAnalyses(AA); |
2152 | |
2153 | return AA; |
2154 | } |
2155 | |