LoopVectorizationLegality.h source code [llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h]

1	//===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file defines the LoopVectorizationLegality class. Original code
11	/// in Loop Vectorizer has been moved out to its own file for modularity
12	/// and reusability.
13	///
14	/// Currently, it works for innermost loop vectorization. Extending this to
15	/// outer loop vectorization is a TODO item.
16	///
17	/// Also provides:
18	/// 1) LoopVectorizeHints class which keeps a number of loop annotations
19	/// locally for easy look up. It has the ability to write them back as
20	/// loop metadata, upon request.
21	/// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22	/// of reporting useful failure to vectorize message.
23	//
24	//===----------------------------------------------------------------------===//
25
26	#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27	#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28
29	#include "llvm/ADT/MapVector.h"
30	#include "llvm/Analysis/LoopAccessAnalysis.h"
31	#include "llvm/Support/TypeSize.h"
32	#include "llvm/Transforms/Utils/LoopUtils.h"
33
34	namespace llvm {
35	class AssumptionCache;
36	class BasicBlock;
37	class BlockFrequencyInfo;
38	class DemandedBits;
39	class DominatorTree;
40	class Function;
41	class Loop;
42	class LoopInfo;
43	class Metadata;
44	class OptimizationRemarkEmitter;
45	class PredicatedScalarEvolution;
46	class ProfileSummaryInfo;
47	class TargetLibraryInfo;
48	class TargetTransformInfo;
49	class Type;
50
51	/// Utility class for getting and setting loop vectorizer hints in the form
52	/// of loop metadata.
53	/// This class keeps a number of loop annotations locally (as member variables)
54	/// and can, upon request, write them back as metadata on the loop. It will
55	/// initially scan the loop for existing metadata, and will update the local
56	/// values based on information in the loop.
57	/// We cannot write all values to metadata, as the mere presence of some info,
58	/// for example 'force', means a decision has been made. So, we need to be
59	/// careful NOT to add them if the user hasn't specifically asked so.
60	class LoopVectorizeHints {
61	enum HintKind {
62	HK_WIDTH,
63	HK_INTERLEAVE,
64	HK_FORCE,
65	HK_ISVECTORIZED,
66	HK_PREDICATE,
67	HK_SCALABLE
68	};
69
70	/// Hint - associates name and validation with the hint value.
71	struct Hint {
72	const char *Name;
73	unsigned Value; // This may have to change for non-numeric values.
74	HintKind Kind;
75
76	Hint(const char Name, unsigned* Value, HintKind Kind)
77	: Name(Name), Value(Value), Kind(Kind) {}
78
79	bool validate(unsigned Val);
80	};
81
82	/// Vectorization width.
83	Hint Width;
84
85	/// Vectorization interleave factor.
86	Hint Interleave;
87
88	/// Vectorization forced
89	Hint Force;
90
91	/// Already Vectorized
92	Hint IsVectorized;
93
94	/// Vector Predicate
95	Hint Predicate;
96
97	/// Says whether we should use fixed width or scalable vectorization.
98	Hint Scalable;
99
100	/// Return the loop metadata prefix.
101	static StringRef Prefix() { return "llvm.loop."; }
102
103	/// True if there is any unsafe math in the loop.
104	bool PotentiallyUnsafe = false;
105
106	public:
107	enum ForceKind {
108	FK_Undefined = -`1`, ///< Not selected.
109	FK_Disabled = `0`, ///< Forcing disabled.
110	FK_Enabled = `1`, ///< Forcing enabled.
111	};
112
113	enum ScalableForceKind {
114	/// Not selected.
115	SK_Unspecified = -`1`,
116	/// Disables vectorization with scalable vectors.
117	SK_FixedWidthOnly = `0`,
118	/// Vectorize loops using scalable vectors or fixed-width vectors, but favor
119	/// scalable vectors when the cost-model is inconclusive. This is the
120	/// default when the scalable.enable hint is enabled through a pragma.
121	SK_PreferScalable = `1`
122	};
123
124	LoopVectorizeHints(const Loop L, bool* InterleaveOnlyWhenForced,
125	OptimizationRemarkEmitter &ORE,
126	const TargetTransformInfo TTI = nullptr*);
127
128	/// Mark the loop L as already vectorized by setting the width to 1.
129	void setAlreadyVectorized();
130
131	bool allowVectorization(Function F, Loop L,
132	bool VectorizeOnlyWhenForced) const;
133
134	/// Dumps all the hint information.
135	void emitRemarkWithHints() const;
136
137	ElementCount getWidth() const {
138	return ElementCount::get(MinVal: Width.Value, Scalable: (ScalableForceKind)Scalable.Value ==
139	SK_PreferScalable);
140	}
141
142	unsigned getInterleave() const {
143	if (Interleave.Value)
144	return Interleave.Value;
145	// If interleaving is not explicitly set, assume that if we do not want
146	// unrolling, we also don't want any interleaving.
147	if (llvm::hasUnrollTransformation(L: TheLoop) & TM_Disable)
148	return `1`;
149	return `0`;
150	}
151	unsigned getIsVectorized() const { return IsVectorized.Value; }
152	unsigned getPredicate() const { return Predicate.Value; }
153	enum ForceKind getForce() const {
154	if ((ForceKind)Force.Value == FK_Undefined &&
155	hasDisableAllTransformsHint(L: TheLoop))
156	return FK_Disabled;
157	return (ForceKind)Force.Value;
158	}
159
160	/// \return true if scalable vectorization has been explicitly disabled.
161	bool isScalableVectorizationDisabled() const {
162	return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
163	}
164
165	/// If hints are provided that force vectorization, use the AlwaysPrint
166	/// pass name to force the frontend to print the diagnostic.
167	const char vectorizeAnalysisPassName() const*;
168
169	/// When enabling loop hints are provided we allow the vectorizer to change
170	/// the order of operations that is given by the scalar loop. This is not
171	/// enabled by default because can be unsafe or inefficient. For example,
172	/// reordering floating-point operations will change the way round-off
173	/// error accumulates in the loop.
174	bool allowReordering() const;
175
176	bool isPotentiallyUnsafe() const {
177	// Avoid FP vectorization if the target is unsure about proper support.
178	// This may be related to the SIMD unit in the target not handling
179	// IEEE 754 FP ops properly, or bad single-to-double promotions.
180	// Otherwise, a sequence of vectorized loops, even without reduction,
181	// could lead to different end results on the destination vectors.
182	return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
183	}
184
185	void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
186
187	private:
188	/// Find hints specified in the loop metadata and update local values.
189	void getHintsFromMetadata();
190
191	/// Checks string hint with one operand and set value if valid.
192	void setHint(StringRef Name, Metadata *Arg);
193
194	/// The loop these hints belong to.
195	const Loop *TheLoop;
196
197	/// Interface to emit optimization remarks.
198	OptimizationRemarkEmitter &ORE;
199	};
200
201	/// This holds vectorization requirements that must be verified late in
202	/// the process. The requirements are set by legalize and costmodel. Once
203	/// vectorization has been determined to be possible and profitable the
204	/// requirements can be verified by looking for metadata or compiler options.
205	/// For example, some loops require FP commutativity which is only allowed if
206	/// vectorization is explicitly specified or if the fast-math compiler option
207	/// has been provided.
208	/// Late evaluation of these requirements allows helpful diagnostics to be
209	/// composed that tells the user what need to be done to vectorize the loop. For
210	/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
211	/// evaluation should be used only when diagnostics can generated that can be
212	/// followed by a non-expert user.
213	class LoopVectorizationRequirements {
214	public:
215	/// Track the 1st floating-point instruction that can not be reassociated.
216	void addExactFPMathInst(Instruction *I) {
217	if (I && !ExactFPMathInst)
218	ExactFPMathInst = I;
219	}
220
221	Instruction getExactFPInst() { return* ExactFPMathInst; }
222
223	private:
224	Instruction ExactFPMathInst = nullptr*;
225	};
226
227	/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
228	/// to what vectorization factor.
229	/// This class does not look at the profitability of vectorization, only the
230	/// legality. This class has two main kinds of checks:
231	/// Memory checks - The code in canVectorizeMemory checks if vectorization*
232	/// will change the order of memory accesses in a way that will change the
233	/// correctness of the program.
234	/// Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory*
235	/// checks for a number of different conditions, such as the availability of a
236	/// single induction variable, that all types are supported and vectorize-able,
237	/// etc. This code reflects the capabilities of InnerLoopVectorizer.
238	/// This class is also used by InnerLoopVectorizer for identifying
239	/// induction variable and the different reduction variables.
240	class LoopVectorizationLegality {
241	public:
242	LoopVectorizationLegality(
243	Loop L, PredicatedScalarEvolution &PSE, DominatorTree DT,
244	TargetTransformInfo TTI, TargetLibraryInfo TLI, Function *F,
245	LoopAccessInfoManager &LAIs, LoopInfo LI, OptimizationRemarkEmitter ORE,
246	LoopVectorizationRequirements R, LoopVectorizeHints H, DemandedBits *DB,
247	AssumptionCache AC, BlockFrequencyInfo BFI, ProfileSummaryInfo *PSI)
248	: TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs),
249	ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI),
250	PSI(PSI) {}
251
252	/// ReductionList contains the reduction descriptors for all
253	/// of the reductions that were found in the loop.
254	using ReductionList = MapVector<PHINode *, RecurrenceDescriptor>;
255
256	/// InductionList saves induction variables and maps them to the
257	/// induction descriptor.
258	using InductionList = MapVector<PHINode *, InductionDescriptor>;
259
260	/// RecurrenceSet contains the phi nodes that are recurrences other than
261	/// inductions and reductions.
262	using RecurrenceSet = SmallPtrSet<const PHINode *, `8`>;
263
264	/// Returns true if it is legal to vectorize this loop.
265	/// This does not mean that it is profitable to vectorize this
266	/// loop, only that it is legal to do so.
267	/// Temporarily taking UseVPlanNativePath parameter. If true, take
268	/// the new code path being implemented for outer loop vectorization
269	/// (should be functional for inner loop vectorization) based on VPlan.
270	/// If false, good old LV code.
271	bool canVectorize(bool UseVPlanNativePath);
272
273	/// Returns true if it is legal to vectorize the FP math operations in this
274	/// loop. Vectorizing is legal if we allow reordering of FP operations, or if
275	/// we can use in-order reductions.
276	bool canVectorizeFPMath(bool EnableStrictReductions);
277
278	/// Return true if we can vectorize this loop while folding its tail by
279	/// masking, and mark all respective loads/stores for masking.
280	/// This object's state is only modified iff this function returns true.
281	bool prepareToFoldTailByMasking();
282
283	/// Returns the primary induction variable.
284	PHINode getPrimaryInduction() { return* PrimaryInduction; }
285
286	/// Returns the reduction variables found in the loop.
287	const ReductionList &getReductionVars() const { return Reductions; }
288
289	/// Returns the induction variables found in the loop.
290	const InductionList &getInductionVars() const { return Inductions; }
291
292	/// Return the fixed-order recurrences found in the loop.
293	RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
294
295	/// Returns the widest induction type.
296	Type getWidestInductionType() { return* WidestIndTy; }
297
298	/// Returns True if given store is a final invariant store of one of the
299	/// reductions found in the loop.
300	bool isInvariantStoreOfReduction(StoreInst *SI);
301
302	/// Returns True if given address is invariant and is used to store recurrent
303	/// expression
304	bool isInvariantAddressOfReduction(Value *V);
305
306	/// Returns True if V is a Phi node of an induction variable in this loop.
307	bool isInductionPhi(const Value V) const*;
308
309	/// Returns a pointer to the induction descriptor, if \p Phi is an integer or
310	/// floating point induction.
311	const InductionDescriptor getIntOrFpInductionDescriptor(PHINode Phi) const;
312
313	/// Returns a pointer to the induction descriptor, if \p Phi is pointer
314	/// induction.
315	const InductionDescriptor getPointerInductionDescriptor(PHINode Phi) const;
316
317	/// Returns True if V is a cast that is part of an induction def-use chain,
318	/// and had been proven to be redundant under a runtime guard (in other
319	/// words, the cast has the same SCEV expression as the induction phi).
320	bool isCastedInductionVariable(const Value V) const*;
321
322	/// Returns True if V can be considered as an induction variable in this
323	/// loop. V can be the induction phi, or some redundant cast in the def-use
324	/// chain of the inducion phi.
325	bool isInductionVariable(const Value V) const*;
326
327	/// Returns True if PN is a reduction variable in this loop.
328	bool isReductionVariable(PHINode PN) const* { return Reductions.count(Key: PN); }
329
330	/// Returns True if Phi is a fixed-order recurrence in this loop.
331	bool isFixedOrderRecurrence(const PHINode Phi) const*;
332
333	/// Return true if the block BB needs to be predicated in order for the loop
334	/// to be vectorized.
335	bool blockNeedsPredication(BasicBlock BB) const*;
336
337	/// Check if this pointer is consecutive when vectorizing. This happens
338	/// when the last index of the GEP is the induction variable, or that the
339	/// pointer itself is an induction variable.
340	/// This check allows us to vectorize A[idx] into a wide load/store.
341	/// Returns:
342	/// 0 - Stride is unknown or non-consecutive.
343	/// 1 - Address is consecutive.
344	/// -1 - Address is consecutive, and decreasing.
345	/// NOTE: This method must only be used before modifying the original scalar
346	/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
347	int isConsecutivePtr(Type AccessTy, Value Ptr) const;
348
349	/// Returns true if value V is uniform across \p VF lanes, when \p VF is
350	/// provided, and otherwise if \p V is invariant across all loop iterations.
351	bool isInvariant(Value V) const*;
352
353	/// Returns true if value V is uniform across \p VF lanes, when \p VF is
354	/// provided, and otherwise if \p V is invariant across all loop iterations.
355	bool isUniform(Value V, ElementCount VF) const*;
356
357	/// A uniform memory op is a load or store which accesses the same memory
358	/// location on all \p VF lanes, if \p VF is provided and otherwise if the
359	/// memory location is invariant.
360	bool isUniformMemOp(Instruction &I, ElementCount VF) const;
361
362	/// Returns the information that we collected about runtime memory check.
363	const RuntimePointerChecking getRuntimePointerChecking() const* {
364	return LAI->getRuntimePointerChecking();
365	}
366
367	const LoopAccessInfo getLAI() const* { return LAI; }
368
369	bool isSafeForAnyVectorWidth() const {
370	return LAI->getDepChecker().isSafeForAnyVectorWidth();
371	}
372
373	uint64_t getMaxSafeVectorWidthInBits() const {
374	return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
375	}
376
377	/// Returns true if vector representation of the instruction \p I
378	/// requires mask.
379	bool isMaskRequired(const Instruction I) const* {
380	return MaskedOp.contains(Ptr: I);
381	}
382
383	/// Returns true if there is at least one function call in the loop which
384	/// has a vectorized variant available.
385	bool hasVectorCallVariants() const { return VecCallVariantsFound; }
386
387	unsigned getNumStores() const { return LAI->getNumStores(); }
388	unsigned getNumLoads() const { return LAI->getNumLoads(); }
389
390	PredicatedScalarEvolution getPredicatedScalarEvolution() const* {
391	return &PSE;
392	}
393
394	Loop getLoop() const* { return TheLoop; }
395
396	LoopInfo getLoopInfo() const* { return LI; }
397
398	AssumptionCache getAssumptionCache() const* { return AC; }
399
400	ScalarEvolution getScalarEvolution() const* { return PSE.getSE(); }
401
402	DominatorTree getDominatorTree() const* { return DT; }
403
404	private:
405	/// Return true if the pre-header, exiting and latch blocks of \p Lp and all
406	/// its nested loops are considered legal for vectorization. These legal
407	/// checks are common for inner and outer loop vectorization.
408	/// Temporarily taking UseVPlanNativePath parameter. If true, take
409	/// the new code path being implemented for outer loop vectorization
410	/// (should be functional for inner loop vectorization) based on VPlan.
411	/// If false, good old LV code.
412	bool canVectorizeLoopNestCFG(Loop Lp, bool* UseVPlanNativePath);
413
414	/// Set up outer loop inductions by checking Phis in outer loop header for
415	/// supported inductions (int inductions). Return false if any of these Phis
416	/// is not a supported induction or if we fail to find an induction.
417	bool setupOuterLoopInductions();
418
419	/// Return true if the pre-header, exiting and latch blocks of \p Lp
420	/// (non-recursive) are considered legal for vectorization.
421	/// Temporarily taking UseVPlanNativePath parameter. If true, take
422	/// the new code path being implemented for outer loop vectorization
423	/// (should be functional for inner loop vectorization) based on VPlan.
424	/// If false, good old LV code.
425	bool canVectorizeLoopCFG(Loop Lp, bool* UseVPlanNativePath);
426
427	/// Check if a single basic block loop is vectorizable.
428	/// At this point we know that this is a loop with a constant trip count
429	/// and we only need to check individual instructions.
430	bool canVectorizeInstrs();
431
432	/// When we vectorize loops we may change the order in which
433	/// we read and write from memory. This method checks if it is
434	/// legal to vectorize the code, considering only memory constrains.
435	/// Returns true if the loop is vectorizable
436	bool canVectorizeMemory();
437
438	/// Return true if we can vectorize this loop using the IF-conversion
439	/// transformation.
440	bool canVectorizeWithIfConvert();
441
442	/// Return true if we can vectorize this outer loop. The method performs
443	/// specific checks for outer loop vectorization.
444	bool canVectorizeOuterLoop();
445
446	/// Return true if all of the instructions in the block can be speculatively
447	/// executed, and record the loads/stores that require masking.
448	/// \p SafePtrs is a list of addresses that are known to be legal and we know
449	/// that we can read from them without segfault.
450	/// \p MaskedOp is a list of instructions that have to be transformed into
451	/// calls to the appropriate masked intrinsic when the loop is vectorized
452	/// or dropped if the instruction is a conditional assume intrinsic.
453	bool
454	blockCanBePredicated(BasicBlock BB, SmallPtrSetImpl<Value > &SafePtrs,
455	SmallPtrSetImpl<const Instruction > &MaskedOp) const*;
456
457	/// Updates the vectorization state by adding \p Phi to the inductions list.
458	/// This can set \p Phi as the main induction of the loop if \p Phi is a
459	/// better choice for the main induction than the existing one.
460	void addInductionPhi(PHINode Phi, const* InductionDescriptor &ID,
461	SmallPtrSetImpl<Value *> &AllowedExit);
462
463	/// The loop that we evaluate.
464	Loop *TheLoop;
465
466	/// Loop Info analysis.
467	LoopInfo *LI;
468
469	/// A wrapper around ScalarEvolution used to add runtime SCEV checks.
470	/// Applies dynamic knowledge to simplify SCEV expressions in the context
471	/// of existing SCEV assumptions. The analysis will also add a minimal set
472	/// of new predicates if this is required to enable vectorization and
473	/// unrolling.
474	PredicatedScalarEvolution &PSE;
475
476	/// Target Transform Info.
477	TargetTransformInfo *TTI;
478
479	/// Target Library Info.
480	TargetLibraryInfo *TLI;
481
482	/// Dominator Tree.
483	DominatorTree *DT;
484
485	// LoopAccess analysis.
486	LoopAccessInfoManager &LAIs;
487
488	const LoopAccessInfo LAI = nullptr*;
489
490	/// Interface to emit optimization remarks.
491	OptimizationRemarkEmitter *ORE;
492
493	// --- vectorization state --- //
494
495	/// Holds the primary induction variable. This is the counter of the
496	/// loop.
497	PHINode PrimaryInduction = nullptr*;
498
499	/// Holds the reduction variables.
500	ReductionList Reductions;
501
502	/// Holds all of the induction variables that we found in the loop.
503	/// Notice that inductions don't need to start at zero and that induction
504	/// variables can be pointers.
505	InductionList Inductions;
506
507	/// Holds all the casts that participate in the update chain of the induction
508	/// variables, and that have been proven to be redundant (possibly under a
509	/// runtime guard). These casts can be ignored when creating the vectorized
510	/// loop body.
511	SmallPtrSet<Instruction *, `4`> InductionCastsToIgnore;
512
513	/// Holds the phi nodes that are fixed-order recurrences.
514	RecurrenceSet FixedOrderRecurrences;
515
516	/// Holds the widest induction type encountered.
517	Type WidestIndTy = nullptr*;
518
519	/// Allowed outside users. This holds the variables that can be accessed from
520	/// outside the loop.
521	SmallPtrSet<Value *, `4`> AllowedExit;
522
523	/// Vectorization requirements that will go through late-evaluation.
524	LoopVectorizationRequirements *Requirements;
525
526	/// Used to emit an analysis of any legality issues.
527	LoopVectorizeHints *Hints;
528
529	/// The demanded bits analysis is used to compute the minimum type size in
530	/// which a reduction can be computed.
531	DemandedBits *DB;
532
533	/// The assumption cache analysis is used to compute the minimum type size in
534	/// which a reduction can be computed.
535	AssumptionCache *AC;
536
537	/// While vectorizing these instructions we have to generate a
538	/// call to the appropriate masked intrinsic or drop them in case of
539	/// conditional assumes.
540	SmallPtrSet<const Instruction *, `8`> MaskedOp;
541
542	/// BFI and PSI are used to check for profile guided size optimizations.
543	BlockFrequencyInfo *BFI;
544	ProfileSummaryInfo *PSI;
545
546	/// If we discover function calls within the loop which have a valid
547	/// vectorized variant, record that fact so that LoopVectorize can
548	/// (potentially) make a better decision on the maximum VF and enable
549	/// the use of those function variants.
550	bool VecCallVariantsFound = false;
551	};
552
553	} // namespace llvm
554
555	#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
556

source code of llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h