1 | //===- LoopVectorize.h ------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops |
10 | // and generates target-independent LLVM-IR. |
11 | // The vectorizer uses the TargetTransformInfo analysis to estimate the costs |
12 | // of instructions in order to estimate the profitability of vectorization. |
13 | // |
14 | // The loop vectorizer combines consecutive loop iterations into a single |
15 | // 'wide' iteration. After this transformation the index is incremented |
16 | // by the SIMD vector width, and not by one. |
17 | // |
18 | // This pass has four parts: |
19 | // 1. The main loop pass that drives the different parts. |
20 | // 2. LoopVectorizationLegality - A unit that checks for the legality |
21 | // of the vectorization. |
22 | // 3. InnerLoopVectorizer - A unit that performs the actual |
23 | // widening of instructions. |
24 | // 4. LoopVectorizationCostModel - A unit that checks for the profitability |
25 | // of vectorization. It decides on the optimal vector width, which |
26 | // can be one, if vectorization is not profitable. |
27 | // |
28 | // There is a development effort going on to migrate loop vectorizer to the |
29 | // VPlan infrastructure and to introduce outer loop vectorization support (see |
30 | // docs/VectorizationPlan.rst and |
31 | // http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this |
32 | // purpose, we temporarily introduced the VPlan-native vectorization path: an |
33 | // alternative vectorization path that is natively implemented on top of the |
34 | // VPlan infrastructure. See EnableVPlanNativePath for enabling. |
35 | // |
36 | //===----------------------------------------------------------------------===// |
37 | // |
38 | // The reduction-variable vectorization is based on the paper: |
39 | // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. |
40 | // |
41 | // Variable uniformity checks are inspired by: |
42 | // Karrenberg, R. and Hack, S. Whole Function Vectorization. |
43 | // |
44 | // The interleaved access vectorization is based on the paper: |
45 | // Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved |
46 | // Data for SIMD |
47 | // |
48 | // Other ideas/concepts are from: |
49 | // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. |
50 | // |
51 | // S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of |
52 | // Vectorizing Compilers. |
53 | // |
54 | //===----------------------------------------------------------------------===// |
55 | |
56 | #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |
57 | #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |
58 | |
59 | #include "llvm/IR/PassManager.h" |
60 | #include "llvm/Support/CommandLine.h" |
61 | #include <functional> |
62 | |
63 | namespace llvm { |
64 | |
65 | class AssumptionCache; |
66 | class BlockFrequencyInfo; |
67 | class DemandedBits; |
68 | class DominatorTree; |
69 | class Function; |
70 | class Loop; |
71 | class LoopAccessInfoManager; |
72 | class LoopInfo; |
73 | class ; |
74 | class ProfileSummaryInfo; |
75 | class ScalarEvolution; |
76 | class TargetLibraryInfo; |
77 | class TargetTransformInfo; |
78 | |
79 | extern cl::opt<bool> EnableLoopInterleaving; |
80 | extern cl::opt<bool> EnableLoopVectorization; |
81 | |
82 | /// A marker to determine if extra passes after loop vectorization should be |
83 | /// run. |
84 | struct |
85 | : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> { |
86 | static AnalysisKey ; |
87 | struct { |
88 | bool (Function &F, const PreservedAnalyses &PA, |
89 | FunctionAnalysisManager::Invalidator &) { |
90 | // Check whether the analysis has been explicitly invalidated. Otherwise, |
91 | // it remains preserved. |
92 | auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>(); |
93 | return !PAC.preservedWhenStateless(); |
94 | } |
95 | }; |
96 | |
97 | Result (Function &F, FunctionAnalysisManager &FAM) { return Result(); } |
98 | }; |
99 | |
100 | /// A pass manager to run a set of extra function simplification passes after |
101 | /// vectorization, if requested. LoopVectorize caches the |
102 | /// ShouldRunExtraVectorPasses analysis to request extra simplifications, if |
103 | /// they could be beneficial. |
104 | struct : public FunctionPassManager { |
105 | PreservedAnalyses (Function &F, FunctionAnalysisManager &AM) { |
106 | auto PA = PreservedAnalyses::all(); |
107 | if (AM.getCachedResult<ShouldRunExtraVectorPasses>(IR&: F)) |
108 | PA.intersect(Arg: FunctionPassManager::run(IR&: F, AM)); |
109 | PA.abandon<ShouldRunExtraVectorPasses>(); |
110 | return PA; |
111 | } |
112 | }; |
113 | |
114 | struct LoopVectorizeOptions { |
115 | /// If false, consider all loops for interleaving. |
116 | /// If true, only loops that explicitly request interleaving are considered. |
117 | bool InterleaveOnlyWhenForced; |
118 | |
119 | /// If false, consider all loops for vectorization. |
120 | /// If true, only loops that explicitly request vectorization are considered. |
121 | bool VectorizeOnlyWhenForced; |
122 | |
123 | /// The current defaults when creating the pass with no arguments are: |
124 | /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This |
125 | /// means that interleaving default is consistent with the cl::opt flag, while |
126 | /// vectorization is not. |
127 | /// FIXME: The default for EnableLoopVectorization in the cl::opt should be |
128 | /// set to true, and the corresponding change to account for this be made in |
129 | /// opt.cpp. The initializations below will become: |
130 | /// InterleaveOnlyWhenForced(!EnableLoopInterleaving) |
131 | /// VectorizeOnlyWhenForced(!EnableLoopVectorization). |
132 | LoopVectorizeOptions() |
133 | : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {} |
134 | LoopVectorizeOptions(bool InterleaveOnlyWhenForced, |
135 | bool VectorizeOnlyWhenForced) |
136 | : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced), |
137 | VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {} |
138 | |
139 | LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) { |
140 | InterleaveOnlyWhenForced = Value; |
141 | return *this; |
142 | } |
143 | |
144 | LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) { |
145 | VectorizeOnlyWhenForced = Value; |
146 | return *this; |
147 | } |
148 | }; |
149 | |
150 | /// Storage for information about made changes. |
151 | struct LoopVectorizeResult { |
152 | bool MadeAnyChange; |
153 | bool MadeCFGChange; |
154 | |
155 | LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange) |
156 | : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {} |
157 | }; |
158 | |
159 | /// The LoopVectorize Pass. |
160 | struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> { |
161 | private: |
162 | /// If false, consider all loops for interleaving. |
163 | /// If true, only loops that explicitly request interleaving are considered. |
164 | bool InterleaveOnlyWhenForced; |
165 | |
166 | /// If false, consider all loops for vectorization. |
167 | /// If true, only loops that explicitly request vectorization are considered. |
168 | bool VectorizeOnlyWhenForced; |
169 | |
170 | public: |
171 | LoopVectorizePass(LoopVectorizeOptions Opts = {}); |
172 | |
173 | ScalarEvolution *SE; |
174 | LoopInfo *LI; |
175 | TargetTransformInfo *TTI; |
176 | DominatorTree *DT; |
177 | BlockFrequencyInfo *BFI; |
178 | TargetLibraryInfo *TLI; |
179 | DemandedBits *DB; |
180 | AssumptionCache *AC; |
181 | LoopAccessInfoManager *LAIs; |
182 | OptimizationRemarkEmitter *ORE; |
183 | ProfileSummaryInfo *PSI; |
184 | |
185 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
186 | void printPipeline(raw_ostream &OS, |
187 | function_ref<StringRef(StringRef)> MapClassName2PassName); |
188 | |
189 | // Shim for old PM. |
190 | LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, |
191 | TargetTransformInfo &TTI_, DominatorTree &DT_, |
192 | BlockFrequencyInfo *BFI_, TargetLibraryInfo *TLI_, |
193 | DemandedBits &DB_, AssumptionCache &AC_, |
194 | LoopAccessInfoManager &LAIs_, |
195 | OptimizationRemarkEmitter &ORE_, |
196 | ProfileSummaryInfo *PSI_); |
197 | |
198 | bool processLoop(Loop *L); |
199 | }; |
200 | |
201 | /// Reports a vectorization failure: print \p DebugMsg for debugging |
202 | /// purposes along with the corresponding optimization remark \p RemarkName. |
203 | /// If \p I is passed, it is an instruction that prevents vectorization. |
204 | /// Otherwise, the loop \p TheLoop is used for the location of the remark. |
205 | void (const StringRef DebugMsg, |
206 | const StringRef OREMsg, const StringRef ORETag, |
207 | OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr); |
208 | |
209 | /// Reports an informative message: print \p Msg for debugging purposes as well |
210 | /// as an optimization remark. Uses either \p I as location of the remark, or |
211 | /// otherwise \p TheLoop. |
212 | void (const StringRef OREMsg, const StringRef ORETag, |
213 | OptimizationRemarkEmitter *ORE, Loop *TheLoop, |
214 | Instruction *I = nullptr); |
215 | |
216 | } // end namespace llvm |
217 | |
218 | #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |
219 | |