1//===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file defines the LoopVectorizationLegality class. Original code
11/// in Loop Vectorizer has been moved out to its own file for modularity
12/// and reusability.
13///
14/// Currently, it works for innermost loop vectorization. Extending this to
15/// outer loop vectorization is a TODO item.
16///
17/// Also provides:
18/// 1) LoopVectorizeHints class which keeps a number of loop annotations
19/// locally for easy look up. It has the ability to write them back as
20/// loop metadata, upon request.
21/// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22/// of reporting useful failure to vectorize message.
23//
24//===----------------------------------------------------------------------===//
25
26#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28
29#include "llvm/ADT/MapVector.h"
30#include "llvm/Analysis/LoopAccessAnalysis.h"
31#include "llvm/Support/TypeSize.h"
32#include "llvm/Transforms/Utils/LoopUtils.h"
33
34namespace llvm {
35class AssumptionCache;
36class BasicBlock;
37class BlockFrequencyInfo;
38class DemandedBits;
39class DominatorTree;
40class Function;
41class Loop;
42class LoopInfo;
43class Metadata;
44class OptimizationRemarkEmitter;
45class PredicatedScalarEvolution;
46class ProfileSummaryInfo;
47class TargetLibraryInfo;
48class TargetTransformInfo;
49class Type;
50
51/// Utility class for getting and setting loop vectorizer hints in the form
52/// of loop metadata.
53/// This class keeps a number of loop annotations locally (as member variables)
54/// and can, upon request, write them back as metadata on the loop. It will
55/// initially scan the loop for existing metadata, and will update the local
56/// values based on information in the loop.
57/// We cannot write all values to metadata, as the mere presence of some info,
58/// for example 'force', means a decision has been made. So, we need to be
59/// careful NOT to add them if the user hasn't specifically asked so.
60class LoopVectorizeHints {
61 enum HintKind {
62 HK_WIDTH,
63 HK_INTERLEAVE,
64 HK_FORCE,
65 HK_ISVECTORIZED,
66 HK_PREDICATE,
67 HK_SCALABLE
68 };
69
70 /// Hint - associates name and validation with the hint value.
71 struct Hint {
72 const char *Name;
73 unsigned Value; // This may have to change for non-numeric values.
74 HintKind Kind;
75
76 Hint(const char *Name, unsigned Value, HintKind Kind)
77 : Name(Name), Value(Value), Kind(Kind) {}
78
79 bool validate(unsigned Val);
80 };
81
82 /// Vectorization width.
83 Hint Width;
84
85 /// Vectorization interleave factor.
86 Hint Interleave;
87
88 /// Vectorization forced
89 Hint Force;
90
91 /// Already Vectorized
92 Hint IsVectorized;
93
94 /// Vector Predicate
95 Hint Predicate;
96
97 /// Says whether we should use fixed width or scalable vectorization.
98 Hint Scalable;
99
100 /// Return the loop metadata prefix.
101 static StringRef Prefix() { return "llvm.loop."; }
102
103 /// True if there is any unsafe math in the loop.
104 bool PotentiallyUnsafe = false;
105
106public:
107 enum ForceKind {
108 FK_Undefined = -1, ///< Not selected.
109 FK_Disabled = 0, ///< Forcing disabled.
110 FK_Enabled = 1, ///< Forcing enabled.
111 };
112
113 enum ScalableForceKind {
114 /// Not selected.
115 SK_Unspecified = -1,
116 /// Disables vectorization with scalable vectors.
117 SK_FixedWidthOnly = 0,
118 /// Vectorize loops using scalable vectors or fixed-width vectors, but favor
119 /// scalable vectors when the cost-model is inconclusive. This is the
120 /// default when the scalable.enable hint is enabled through a pragma.
121 SK_PreferScalable = 1
122 };
123
124 LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
125 OptimizationRemarkEmitter &ORE,
126 const TargetTransformInfo *TTI = nullptr);
127
128 /// Mark the loop L as already vectorized by setting the width to 1.
129 void setAlreadyVectorized();
130
131 bool allowVectorization(Function *F, Loop *L,
132 bool VectorizeOnlyWhenForced) const;
133
134 /// Dumps all the hint information.
135 void emitRemarkWithHints() const;
136
137 ElementCount getWidth() const {
138 return ElementCount::get(MinVal: Width.Value, Scalable: (ScalableForceKind)Scalable.Value ==
139 SK_PreferScalable);
140 }
141
142 unsigned getInterleave() const {
143 if (Interleave.Value)
144 return Interleave.Value;
145 // If interleaving is not explicitly set, assume that if we do not want
146 // unrolling, we also don't want any interleaving.
147 if (llvm::hasUnrollTransformation(L: TheLoop) & TM_Disable)
148 return 1;
149 return 0;
150 }
151 unsigned getIsVectorized() const { return IsVectorized.Value; }
152 unsigned getPredicate() const { return Predicate.Value; }
153 enum ForceKind getForce() const {
154 if ((ForceKind)Force.Value == FK_Undefined &&
155 hasDisableAllTransformsHint(L: TheLoop))
156 return FK_Disabled;
157 return (ForceKind)Force.Value;
158 }
159
160 /// \return true if scalable vectorization has been explicitly disabled.
161 bool isScalableVectorizationDisabled() const {
162 return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
163 }
164
165 /// If hints are provided that force vectorization, use the AlwaysPrint
166 /// pass name to force the frontend to print the diagnostic.
167 const char *vectorizeAnalysisPassName() const;
168
169 /// When enabling loop hints are provided we allow the vectorizer to change
170 /// the order of operations that is given by the scalar loop. This is not
171 /// enabled by default because can be unsafe or inefficient. For example,
172 /// reordering floating-point operations will change the way round-off
173 /// error accumulates in the loop.
174 bool allowReordering() const;
175
176 bool isPotentiallyUnsafe() const {
177 // Avoid FP vectorization if the target is unsure about proper support.
178 // This may be related to the SIMD unit in the target not handling
179 // IEEE 754 FP ops properly, or bad single-to-double promotions.
180 // Otherwise, a sequence of vectorized loops, even without reduction,
181 // could lead to different end results on the destination vectors.
182 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
183 }
184
185 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
186
187private:
188 /// Find hints specified in the loop metadata and update local values.
189 void getHintsFromMetadata();
190
191 /// Checks string hint with one operand and set value if valid.
192 void setHint(StringRef Name, Metadata *Arg);
193
194 /// The loop these hints belong to.
195 const Loop *TheLoop;
196
197 /// Interface to emit optimization remarks.
198 OptimizationRemarkEmitter &ORE;
199};
200
201/// This holds vectorization requirements that must be verified late in
202/// the process. The requirements are set by legalize and costmodel. Once
203/// vectorization has been determined to be possible and profitable the
204/// requirements can be verified by looking for metadata or compiler options.
205/// For example, some loops require FP commutativity which is only allowed if
206/// vectorization is explicitly specified or if the fast-math compiler option
207/// has been provided.
208/// Late evaluation of these requirements allows helpful diagnostics to be
209/// composed that tells the user what need to be done to vectorize the loop. For
210/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
211/// evaluation should be used only when diagnostics can generated that can be
212/// followed by a non-expert user.
213class LoopVectorizationRequirements {
214public:
215 /// Track the 1st floating-point instruction that can not be reassociated.
216 void addExactFPMathInst(Instruction *I) {
217 if (I && !ExactFPMathInst)
218 ExactFPMathInst = I;
219 }
220
221 Instruction *getExactFPInst() { return ExactFPMathInst; }
222
223private:
224 Instruction *ExactFPMathInst = nullptr;
225};
226
227/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
228/// to what vectorization factor.
229/// This class does not look at the profitability of vectorization, only the
230/// legality. This class has two main kinds of checks:
231/// * Memory checks - The code in canVectorizeMemory checks if vectorization
232/// will change the order of memory accesses in a way that will change the
233/// correctness of the program.
234/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
235/// checks for a number of different conditions, such as the availability of a
236/// single induction variable, that all types are supported and vectorize-able,
237/// etc. This code reflects the capabilities of InnerLoopVectorizer.
238/// This class is also used by InnerLoopVectorizer for identifying
239/// induction variable and the different reduction variables.
240class LoopVectorizationLegality {
241public:
242 LoopVectorizationLegality(
243 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
244 TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F,
245 LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE,
246 LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
247 AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
248 : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs),
249 ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI),
250 PSI(PSI) {}
251
252 /// ReductionList contains the reduction descriptors for all
253 /// of the reductions that were found in the loop.
254 using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>;
255
256 /// InductionList saves induction variables and maps them to the
257 /// induction descriptor.
258 using InductionList = MapVector<PHINode *, InductionDescriptor>;
259
260 /// RecurrenceSet contains the phi nodes that are recurrences other than
261 /// inductions and reductions.
262 using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
263
264 /// Returns true if it is legal to vectorize this loop.
265 /// This does not mean that it is profitable to vectorize this
266 /// loop, only that it is legal to do so.
267 /// Temporarily taking UseVPlanNativePath parameter. If true, take
268 /// the new code path being implemented for outer loop vectorization
269 /// (should be functional for inner loop vectorization) based on VPlan.
270 /// If false, good old LV code.
271 bool canVectorize(bool UseVPlanNativePath);
272
273 /// Returns true if it is legal to vectorize the FP math operations in this
274 /// loop. Vectorizing is legal if we allow reordering of FP operations, or if
275 /// we can use in-order reductions.
276 bool canVectorizeFPMath(bool EnableStrictReductions);
277
278 /// Return true if we can vectorize this loop while folding its tail by
279 /// masking, and mark all respective loads/stores for masking.
280 /// This object's state is only modified iff this function returns true.
281 bool prepareToFoldTailByMasking();
282
283 /// Returns the primary induction variable.
284 PHINode *getPrimaryInduction() { return PrimaryInduction; }
285
286 /// Returns the reduction variables found in the loop.
287 const ReductionList &getReductionVars() const { return Reductions; }
288
289 /// Returns the induction variables found in the loop.
290 const InductionList &getInductionVars() const { return Inductions; }
291
292 /// Return the fixed-order recurrences found in the loop.
293 RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
294
295 /// Returns the widest induction type.
296 Type *getWidestInductionType() { return WidestIndTy; }
297
298 /// Returns True if given store is a final invariant store of one of the
299 /// reductions found in the loop.
300 bool isInvariantStoreOfReduction(StoreInst *SI);
301
302 /// Returns True if given address is invariant and is used to store recurrent
303 /// expression
304 bool isInvariantAddressOfReduction(Value *V);
305
306 /// Returns True if V is a Phi node of an induction variable in this loop.
307 bool isInductionPhi(const Value *V) const;
308
309 /// Returns a pointer to the induction descriptor, if \p Phi is an integer or
310 /// floating point induction.
311 const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const;
312
313 /// Returns a pointer to the induction descriptor, if \p Phi is pointer
314 /// induction.
315 const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const;
316
317 /// Returns True if V is a cast that is part of an induction def-use chain,
318 /// and had been proven to be redundant under a runtime guard (in other
319 /// words, the cast has the same SCEV expression as the induction phi).
320 bool isCastedInductionVariable(const Value *V) const;
321
322 /// Returns True if V can be considered as an induction variable in this
323 /// loop. V can be the induction phi, or some redundant cast in the def-use
324 /// chain of the inducion phi.
325 bool isInductionVariable(const Value *V) const;
326
327 /// Returns True if PN is a reduction variable in this loop.
328 bool isReductionVariable(PHINode *PN) const { return Reductions.count(Key: PN); }
329
330 /// Returns True if Phi is a fixed-order recurrence in this loop.
331 bool isFixedOrderRecurrence(const PHINode *Phi) const;
332
333 /// Return true if the block BB needs to be predicated in order for the loop
334 /// to be vectorized.
335 bool blockNeedsPredication(BasicBlock *BB) const;
336
337 /// Check if this pointer is consecutive when vectorizing. This happens
338 /// when the last index of the GEP is the induction variable, or that the
339 /// pointer itself is an induction variable.
340 /// This check allows us to vectorize A[idx] into a wide load/store.
341 /// Returns:
342 /// 0 - Stride is unknown or non-consecutive.
343 /// 1 - Address is consecutive.
344 /// -1 - Address is consecutive, and decreasing.
345 /// NOTE: This method must only be used before modifying the original scalar
346 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
347 int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
348
349 /// Returns true if value V is uniform across \p VF lanes, when \p VF is
350 /// provided, and otherwise if \p V is invariant across all loop iterations.
351 bool isInvariant(Value *V) const;
352
353 /// Returns true if value V is uniform across \p VF lanes, when \p VF is
354 /// provided, and otherwise if \p V is invariant across all loop iterations.
355 bool isUniform(Value *V, ElementCount VF) const;
356
357 /// A uniform memory op is a load or store which accesses the same memory
358 /// location on all \p VF lanes, if \p VF is provided and otherwise if the
359 /// memory location is invariant.
360 bool isUniformMemOp(Instruction &I, ElementCount VF) const;
361
362 /// Returns the information that we collected about runtime memory check.
363 const RuntimePointerChecking *getRuntimePointerChecking() const {
364 return LAI->getRuntimePointerChecking();
365 }
366
367 const LoopAccessInfo *getLAI() const { return LAI; }
368
369 bool isSafeForAnyVectorWidth() const {
370 return LAI->getDepChecker().isSafeForAnyVectorWidth();
371 }
372
373 uint64_t getMaxSafeVectorWidthInBits() const {
374 return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
375 }
376
377 /// Returns true if vector representation of the instruction \p I
378 /// requires mask.
379 bool isMaskRequired(const Instruction *I) const {
380 return MaskedOp.contains(Ptr: I);
381 }
382
383 /// Returns true if there is at least one function call in the loop which
384 /// has a vectorized variant available.
385 bool hasVectorCallVariants() const { return VecCallVariantsFound; }
386
387 unsigned getNumStores() const { return LAI->getNumStores(); }
388 unsigned getNumLoads() const { return LAI->getNumLoads(); }
389
390 PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
391 return &PSE;
392 }
393
394 Loop *getLoop() const { return TheLoop; }
395
396 LoopInfo *getLoopInfo() const { return LI; }
397
398 AssumptionCache *getAssumptionCache() const { return AC; }
399
400 ScalarEvolution *getScalarEvolution() const { return PSE.getSE(); }
401
402 DominatorTree *getDominatorTree() const { return DT; }
403
404private:
405 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
406 /// its nested loops are considered legal for vectorization. These legal
407 /// checks are common for inner and outer loop vectorization.
408 /// Temporarily taking UseVPlanNativePath parameter. If true, take
409 /// the new code path being implemented for outer loop vectorization
410 /// (should be functional for inner loop vectorization) based on VPlan.
411 /// If false, good old LV code.
412 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
413
414 /// Set up outer loop inductions by checking Phis in outer loop header for
415 /// supported inductions (int inductions). Return false if any of these Phis
416 /// is not a supported induction or if we fail to find an induction.
417 bool setupOuterLoopInductions();
418
419 /// Return true if the pre-header, exiting and latch blocks of \p Lp
420 /// (non-recursive) are considered legal for vectorization.
421 /// Temporarily taking UseVPlanNativePath parameter. If true, take
422 /// the new code path being implemented for outer loop vectorization
423 /// (should be functional for inner loop vectorization) based on VPlan.
424 /// If false, good old LV code.
425 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
426
427 /// Check if a single basic block loop is vectorizable.
428 /// At this point we know that this is a loop with a constant trip count
429 /// and we only need to check individual instructions.
430 bool canVectorizeInstrs();
431
432 /// When we vectorize loops we may change the order in which
433 /// we read and write from memory. This method checks if it is
434 /// legal to vectorize the code, considering only memory constrains.
435 /// Returns true if the loop is vectorizable
436 bool canVectorizeMemory();
437
438 /// Return true if we can vectorize this loop using the IF-conversion
439 /// transformation.
440 bool canVectorizeWithIfConvert();
441
442 /// Return true if we can vectorize this outer loop. The method performs
443 /// specific checks for outer loop vectorization.
444 bool canVectorizeOuterLoop();
445
446 /// Return true if all of the instructions in the block can be speculatively
447 /// executed, and record the loads/stores that require masking.
448 /// \p SafePtrs is a list of addresses that are known to be legal and we know
449 /// that we can read from them without segfault.
450 /// \p MaskedOp is a list of instructions that have to be transformed into
451 /// calls to the appropriate masked intrinsic when the loop is vectorized
452 /// or dropped if the instruction is a conditional assume intrinsic.
453 bool
454 blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
455 SmallPtrSetImpl<const Instruction *> &MaskedOp) const;
456
457 /// Updates the vectorization state by adding \p Phi to the inductions list.
458 /// This can set \p Phi as the main induction of the loop if \p Phi is a
459 /// better choice for the main induction than the existing one.
460 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
461 SmallPtrSetImpl<Value *> &AllowedExit);
462
463 /// The loop that we evaluate.
464 Loop *TheLoop;
465
466 /// Loop Info analysis.
467 LoopInfo *LI;
468
469 /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
470 /// Applies dynamic knowledge to simplify SCEV expressions in the context
471 /// of existing SCEV assumptions. The analysis will also add a minimal set
472 /// of new predicates if this is required to enable vectorization and
473 /// unrolling.
474 PredicatedScalarEvolution &PSE;
475
476 /// Target Transform Info.
477 TargetTransformInfo *TTI;
478
479 /// Target Library Info.
480 TargetLibraryInfo *TLI;
481
482 /// Dominator Tree.
483 DominatorTree *DT;
484
485 // LoopAccess analysis.
486 LoopAccessInfoManager &LAIs;
487
488 const LoopAccessInfo *LAI = nullptr;
489
490 /// Interface to emit optimization remarks.
491 OptimizationRemarkEmitter *ORE;
492
493 // --- vectorization state --- //
494
495 /// Holds the primary induction variable. This is the counter of the
496 /// loop.
497 PHINode *PrimaryInduction = nullptr;
498
499 /// Holds the reduction variables.
500 ReductionList Reductions;
501
502 /// Holds all of the induction variables that we found in the loop.
503 /// Notice that inductions don't need to start at zero and that induction
504 /// variables can be pointers.
505 InductionList Inductions;
506
507 /// Holds all the casts that participate in the update chain of the induction
508 /// variables, and that have been proven to be redundant (possibly under a
509 /// runtime guard). These casts can be ignored when creating the vectorized
510 /// loop body.
511 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
512
513 /// Holds the phi nodes that are fixed-order recurrences.
514 RecurrenceSet FixedOrderRecurrences;
515
516 /// Holds the widest induction type encountered.
517 Type *WidestIndTy = nullptr;
518
519 /// Allowed outside users. This holds the variables that can be accessed from
520 /// outside the loop.
521 SmallPtrSet<Value *, 4> AllowedExit;
522
523 /// Vectorization requirements that will go through late-evaluation.
524 LoopVectorizationRequirements *Requirements;
525
526 /// Used to emit an analysis of any legality issues.
527 LoopVectorizeHints *Hints;
528
529 /// The demanded bits analysis is used to compute the minimum type size in
530 /// which a reduction can be computed.
531 DemandedBits *DB;
532
533 /// The assumption cache analysis is used to compute the minimum type size in
534 /// which a reduction can be computed.
535 AssumptionCache *AC;
536
537 /// While vectorizing these instructions we have to generate a
538 /// call to the appropriate masked intrinsic or drop them in case of
539 /// conditional assumes.
540 SmallPtrSet<const Instruction *, 8> MaskedOp;
541
542 /// BFI and PSI are used to check for profile guided size optimizations.
543 BlockFrequencyInfo *BFI;
544 ProfileSummaryInfo *PSI;
545
546 /// If we discover function calls within the loop which have a valid
547 /// vectorized variant, record that fact so that LoopVectorize can
548 /// (potentially) make a better decision on the maximum VF and enable
549 /// the use of those function variants.
550 bool VecCallVariantsFound = false;
551};
552
553} // namespace llvm
554
555#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
556

source code of llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h