VPlan.h source code [llvm/lib/Transforms/Vectorize/VPlan.h]

1	//===- VPlan.h - Represent A Vectorizer Plan --------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file contains the declarations of the Vectorization Plan base classes:
11	/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12	/// VPBlockBase, together implementing a Hierarchical CFG;
13	/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14	/// within VPBasicBlocks;
15	/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16	/// also inherit from VPValue.
17	/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18	/// instruction;
19	/// 5. The VPlan class holding a candidate for vectorization;
20	/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21	/// These are documented in docs/VectorizationPlan.rst.
22	//
23	//===----------------------------------------------------------------------===//
24
25	#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26	#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28	#include "VPlanAnalysis.h"
29	#include "VPlanValue.h"
30	#include "llvm/ADT/DenseMap.h"
31	#include "llvm/ADT/MapVector.h"
32	#include "llvm/ADT/SmallBitVector.h"
33	#include "llvm/ADT/SmallPtrSet.h"
34	#include "llvm/ADT/SmallVector.h"
35	#include "llvm/ADT/Twine.h"
36	#include "llvm/ADT/ilist.h"
37	#include "llvm/ADT/ilist_node.h"
38	#include "llvm/Analysis/IVDescriptors.h"
39	#include "llvm/Analysis/LoopInfo.h"
40	#include "llvm/Analysis/VectorUtils.h"
41	#include "llvm/IR/DebugLoc.h"
42	#include "llvm/IR/FMF.h"
43	#include "llvm/IR/Operator.h"
44	#include <algorithm>
45	#include <cassert>
46	#include <cstddef>
47	#include <string>
48
49	namespace llvm {
50
51	class BasicBlock;
52	class DominatorTree;
53	class InnerLoopVectorizer;
54	class IRBuilderBase;
55	class LoopInfo;
56	class raw_ostream;
57	class RecurrenceDescriptor;
58	class SCEV;
59	class Type;
60	class VPBasicBlock;
61	class VPRegionBlock;
62	class VPlan;
63	class VPReplicateRecipe;
64	class VPlanSlp;
65	class Value;
66	class LoopVersioning;
67
68	namespace Intrinsic {
69	typedef unsigned ID;
70	}
71
72	/// Returns a calculation for the total number of elements for a given \p VF.
73	/// For fixed width vectors this value is a constant, whereas for scalable
74	/// vectors it is an expression determined at runtime.
75	Value getRuntimeVF(IRBuilderBase &B, Type Ty, ElementCount VF);
76
77	/// Return a value for Step multiplied by VF.
78	Value createStepForVF(IRBuilderBase &B, Type Ty, ElementCount VF,
79	int64_t Step);
80
81	const SCEV createTripCountSCEV(Type IdxTy, PredicatedScalarEvolution &PSE,
82	Loop CurLoop = nullptr*);
83
84	/// A range of powers-of-2 vectorization factors with fixed start and
85	/// adjustable end. The range includes start and excludes end, e.g.,:
86	/// [1, 16) = {1, 2, 4, 8}
87	struct VFRange {
88	// A power of 2.
89	const ElementCount Start;
90
91	// A power of 2. If End <= Start range is empty.
92	ElementCount End;
93
94	bool isEmpty() const {
95	return End.getKnownMinValue() <= Start.getKnownMinValue();
96	}
97
98	VFRange(const ElementCount &Start, const ElementCount &End)
99	: Start (Start), End (End) {
100	assert(Start.isScalable() == End.isScalable() &&
101	"Both Start and End should have the same scalable flag");
102	assert(isPowerOf2_32(Start.getKnownMinValue()) &&
103	"Expected Start to be a power of 2");
104	assert(isPowerOf2_32(End.getKnownMinValue()) &&
105	"Expected End to be a power of 2");
106	}
107
108	/// Iterator to iterate over vectorization factors in a VFRange.
109	class iterator
110	: public iterator_facade_base<iterator, std::forward_iterator_tag,
111	ElementCount> {
112	ElementCount VF;
113
114	public:
115	iterator(ElementCount VF) : VF (VF) {}
116
117	bool operator==(const iterator &Other) const { return VF == Other.VF; }
118
119	ElementCount operator() const* { return VF; }
120
121	iterator &operator++() {
122	VF *= `2`;
123	return *this;
124	}
125	};
126
127	iterator begin() { return iterator (Start); }
128	iterator end() {
129	assert(isPowerOf2_32(End.getKnownMinValue()));
130	return iterator (End);
131	}
132	};
133
134	using VPlanPtr = std::unique_ptr<VPlan>;
135
136	/// In what follows, the term "input IR" refers to code that is fed into the
137	/// vectorizer whereas the term "output IR" refers to code that is generated by
138	/// the vectorizer.
139
140	/// VPLane provides a way to access lanes in both fixed width and scalable
141	/// vectors, where for the latter the lane index sometimes needs calculating
142	/// as a runtime expression.
143	class VPLane {
144	public:
145	/// Kind describes how to interpret Lane.
146	enum class Kind : uint8_t {
147	/// For First, Lane is the index into the first N elements of a
148	/// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
149	First,
150	/// For ScalableLast, Lane is the offset from the start of the last
151	/// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
152	/// example, a Lane of 0 corresponds to lane `(vscale - 1) N`, a Lane of*
153	/// 1 corresponds to `((vscale - 1) N) + 1`, etc.*
154	ScalableLast
155	};
156
157	private:
158	/// in [0..VF)
159	unsigned Lane;
160
161	/// Indicates how the Lane should be interpreted, as described above.
162	Kind LaneKind;
163
164	public:
165	VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
166
167	static VPLane getFirstLane() { return VPLane (`0`, VPLane::Kind::First); }
168
169	static VPLane getLastLaneForVF(const ElementCount &VF) {
170	unsigned LaneOffset = VF.getKnownMinValue() - `1`;
171	Kind LaneKind;
172	if (VF.isScalable())
173	// In this case 'LaneOffset' refers to the offset from the start of the
174	// last subvector with VF.getKnownMinValue() elements.
175	LaneKind = VPLane::Kind::ScalableLast;
176	else
177	LaneKind = VPLane::Kind::First;
178	return VPLane (LaneOffset, LaneKind);
179	}
180
181	/// Returns a compile-time known value for the lane index and asserts if the
182	/// lane can only be calculated at runtime.
183	unsigned getKnownLane() const {
184	assert(LaneKind == Kind::First);
185	return Lane;
186	}
187
188	/// Returns an expression describing the lane index that can be used at
189	/// runtime.
190	Value getAsRuntimeExpr(IRBuilderBase &Builder, const* ElementCount &VF) const;
191
192	/// Returns the Kind of lane offset.
193	Kind getKind() const { return LaneKind; }
194
195	/// Returns true if this is the first lane of the whole vector.
196	bool isFirstLane() const { return Lane == `0` && LaneKind == Kind::First; }
197
198	/// Maps the lane to a cache index based on \p VF.
199	unsigned mapToCacheIndex(const ElementCount &VF) const {
200	switch (LaneKind) {
201	case VPLane::Kind::ScalableLast:
202	assert(VF.isScalable() && Lane < VF.getKnownMinValue());
203	return VF.getKnownMinValue() + Lane;
204	default:
205	assert(Lane < VF.getKnownMinValue());
206	return Lane;
207	}
208	}
209
210	/// Returns the maxmimum number of lanes that we are able to consider
211	/// caching for \p VF.
212	static unsigned getNumCachedLanes(const ElementCount &VF) {
213	return VF.getKnownMinValue() * (VF.isScalable() ? `2` : `1`);
214	}
215	};
216
217	/// VPIteration represents a single point in the iteration space of the output
218	/// (vectorized and/or unrolled) IR loop.
219	struct VPIteration {
220	/// in [0..UF)
221	unsigned Part;
222
223	VPLane Lane;
224
225	VPIteration(unsigned Part, unsigned Lane,
226	VPLane::Kind Kind = VPLane::Kind::First)
227	: Part(Part), Lane (Lane, Kind) {}
228
229	VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane (Lane) {}
230
231	bool isFirstIteration() const { return Part == `0` && Lane.isFirstLane(); }
232	};
233
234	/// VPTransformState holds information passed down when "executing" a VPlan,
235	/// needed for generating the output IR.
236	struct VPTransformState {
237	VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
238	DominatorTree *DT, IRBuilderBase &Builder,
239	InnerLoopVectorizer ILV, VPlan Plan, LLVMContext &Ctx);
240
241	/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
242	ElementCount VF;
243	unsigned UF;
244
245	/// Hold the indices to generate specific scalar instructions. Null indicates
246	/// that all instances are to be generated, using either scalar or vector
247	/// instructions.
248	std::optional<VPIteration> Instance;
249
250	struct DataState {
251	/// A type for vectorized values in the new loop. Each value from the
252	/// original loop, when vectorized, is represented by UF vector values in
253	/// the new unrolled loop, where UF is the unroll factor.
254	typedef SmallVector<Value *, `2`> PerPartValuesTy;
255
256	DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
257
258	using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, `4`>, `2`>;
259	DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
260	} Data;
261
262	/// Get the generated vector Value for a given VPValue \p Def and a given \p
263	/// Part if \p IsScalar is false, otherwise return the generated scalar
264	/// for \p Part. \See set.
265	Value get(VPValue Def, unsigned Part, bool IsScalar = false);
266
267	/// Get the generated Value for a given VPValue and given Part and Lane.
268	Value get(VPValue Def, const VPIteration &Instance);
269
270	bool hasVectorValue(VPValue Def, unsigned* Part) {
271	auto I = Data.PerPartOutput.find(Val: Def);
272	return I != Data.PerPartOutput.end() && Part < I ->second.size() &&
273	I ->second [Part];
274	}
275
276	bool hasScalarValue(VPValue *Def, VPIteration Instance) {
277	auto I = Data.PerPartScalars.find(Val: Def);
278	if (I == Data.PerPartScalars.end())
279	return false;
280	unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
281	return Instance.Part < I ->second.size() &&
282	CacheIdx < I ->second [Instance.Part].size() &&
283	I ->second [Instance.Part][CacheIdx];
284	}
285
286	/// Set the generated vector Value for a given VPValue and a given Part, if \p
287	/// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
288	void set(VPValue Def, Value V, unsigned Part, bool IsScalar = false) {
289	if (IsScalar) {
290	set(Def, V, Instance: VPIteration (Part, `0`));
291	return;
292	}
293	assert((VF.isScalar() \|\| V->getType()->isVectorTy()) &&
294	"scalar values must be stored as (Part, 0)");
295	if (!Data.PerPartOutput.count(Val: Def)) {
296	DataState::PerPartValuesTy Entry(UF);
297	Data.PerPartOutput [Def] = Entry;
298	}
299	Data.PerPartOutput [Def][Part] = V;
300	}
301
302	/// Reset an existing vector value for \p Def and a given \p Part.
303	void reset(VPValue Def, Value V, unsigned Part) {
304	auto Iter = Data.PerPartOutput.find(Val: Def);
305	assert(Iter != Data.PerPartOutput.end() &&
306	"need to overwrite existing value");
307	Iter ->second [Part] = V;
308	}
309
310	/// Set the generated scalar \p V for \p Def and the given \p Instance.
311	void set(VPValue Def, Value V, const VPIteration &Instance) {
312	auto Iter = Data.PerPartScalars.insert(KV: {Def, {}});
313	auto &PerPartVec = Iter.first ->second;
314	if (PerPartVec.size() <= Instance.Part)
315	PerPartVec.resize(N: Instance.Part + `1`);
316	auto &Scalars = PerPartVec [Instance.Part];
317	unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
318	if (Scalars.size() <= CacheIdx)
319	Scalars.resize(N: CacheIdx + `1`);
320	assert(!Scalars[CacheIdx] && "should overwrite existing value");
321	Scalars [CacheIdx] = V;
322	}
323
324	/// Reset an existing scalar value for \p Def and a given \p Instance.
325	void reset(VPValue Def, Value V, const VPIteration &Instance) {
326	auto Iter = Data.PerPartScalars.find(Val: Def);
327	assert(Iter != Data.PerPartScalars.end() &&
328	"need to overwrite existing value");
329	assert(Instance.Part < Iter ->second.size() &&
330	"need to overwrite existing value");
331	unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
332	assert(CacheIdx < Iter ->second[Instance.Part].size() &&
333	"need to overwrite existing value");
334	Iter ->second [Instance.Part][CacheIdx] = V;
335	}
336
337	/// Add additional metadata to \p To that was not present on \p Orig.
338	///
339	/// Currently this is used to add the noalias annotations based on the
340	/// inserted memchecks. Use this for instructions that are cloned* into the*
341	/// vector loop.
342	void addNewMetadata(Instruction To, const* Instruction *Orig);
343
344	/// Add metadata from one instruction to another.
345	///
346	/// This includes both the original MDs from \p From and additional ones (\see
347	/// addNewMetadata). Use this for newly created* instructions in the vector*
348	/// loop.
349	void addMetadata(Value To, Instruction From);
350
351	/// Set the debug location in the builder using the debug location \p DL.
352	void setDebugLocFrom(DebugLoc DL);
353
354	/// Construct the vector value of a scalarized value \p V one lane at a time.
355	void packScalarIntoVectorValue(VPValue Def, const* VPIteration &Instance);
356
357	/// Hold state information used when constructing the CFG of the output IR,
358	/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
359	struct CFGState {
360	/// The previous VPBasicBlock visited. Initially set to null.
361	VPBasicBlock PrevVPBB = nullptr*;
362
363	/// The previous IR BasicBlock created or used. Initially set to the new
364	/// header BasicBlock.
365	BasicBlock PrevBB = nullptr*;
366
367	/// The last IR BasicBlock in the output IR. Set to the exit block of the
368	/// vector loop.
369	BasicBlock ExitBB = nullptr*;
370
371	/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
372	/// of replication, maps the BasicBlock of the last replica created.
373	SmallDenseMap<VPBasicBlock , BasicBlock > VPBB2IRBB;
374
375	CFGState() = default;
376
377	/// Returns the BasicBlock mapped to the pre-header of the loop region*
378	/// containing \p R.
379	BasicBlock getPreheaderBBFor(VPRecipeBase R);
380	} CFG;
381
382	/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
383	LoopInfo *LI;
384
385	/// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
386	DominatorTree *DT;
387
388	/// Hold a reference to the IRBuilder used to generate output IR code.
389	IRBuilderBase &Builder;
390
391	/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
392	InnerLoopVectorizer *ILV;
393
394	/// Pointer to the VPlan code is generated for.
395	VPlan *Plan;
396
397	/// The loop object for the current parent region, or nullptr.
398	Loop CurrentVectorLoop = nullptr*;
399
400	/// LoopVersioning. It's only set up (non-null) if memchecks were
401	/// used.
402	///
403	/// This is currently only used to add no-alias metadata based on the
404	/// memchecks. The actually versioning is performed manually.
405	LoopVersioning LVer = nullptr*;
406
407	/// Map SCEVs to their expanded values. Populated when executing
408	/// VPExpandSCEVRecipes.
409	DenseMap<const SCEV , Value > ExpandedSCEVs;
410
411	/// VPlan-based type analysis.
412	VPTypeAnalysis TypeAnalysis;
413	};
414
415	/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
416	/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
417	class VPBlockBase {
418	friend class VPBlockUtils;
419
420	const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
421
422	/// An optional name for the block.
423	std::string Name;
424
425	/// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
426	/// it is a topmost VPBlockBase.
427	VPRegionBlock Parent = nullptr*;
428
429	/// List of predecessor blocks.
430	SmallVector<VPBlockBase *, `1`> Predecessors;
431
432	/// List of successor blocks.
433	SmallVector<VPBlockBase *, `1`> Successors;
434
435	/// VPlan containing the block. Can only be set on the entry block of the
436	/// plan.
437	VPlan Plan = nullptr*;
438
439	/// Add \p Successor as the last successor to this block.
440	void appendSuccessor(VPBlockBase *Successor) {
441	assert(Successor && "Cannot add nullptr successor!");
442	Successors.push_back(Elt: Successor);
443	}
444
445	/// Add \p Predecessor as the last predecessor to this block.
446	void appendPredecessor(VPBlockBase *Predecessor) {
447	assert(Predecessor && "Cannot add nullptr predecessor!");
448	Predecessors.push_back(Elt: Predecessor);
449	}
450
451	/// Remove \p Predecessor from the predecessors of this block.
452	void removePredecessor(VPBlockBase *Predecessor) {
453	auto Pos = find(Range&: Predecessors, Val: Predecessor);
454	assert(Pos && "Predecessor does not exist");
455	Predecessors.erase(CI: Pos);
456	}
457
458	/// Remove \p Successor from the successors of this block.
459	void removeSuccessor(VPBlockBase *Successor) {
460	auto Pos = find(Range&: Successors, Val: Successor);
461	assert(Pos && "Successor does not exist");
462	Successors.erase(CI: Pos);
463	}
464
465	protected:
466	VPBlockBase(const unsigned char SC, const std::string &N)
467	: SubclassID(SC), Name (N) {}
468
469	public:
470	/// An enumeration for keeping track of the concrete subclass of VPBlockBase
471	/// that are actually instantiated. Values of this enumeration are kept in the
472	/// SubclassID field of the VPBlockBase objects. They are used for concrete
473	/// type identification.
474	using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
475
476	using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
477
478	virtual ~VPBlockBase() = default;
479
480	const std::string &getName() const { return Name; }
481
482	void setName(const Twine &newName) { Name = newName.str(); }
483
484	/// \return an ID for the concrete type of this object.
485	/// This is used to implement the classof checks. This should not be used
486	/// for any other purpose, as the values may change as LLVM evolves.
487	unsigned getVPBlockID() const { return SubclassID; }
488
489	VPRegionBlock getParent() { return* Parent; }
490	const VPRegionBlock getParent() const* { return Parent; }
491
492	/// \return A pointer to the plan containing the current block.
493	VPlan *getPlan();
494	const VPlan getPlan() const*;
495
496	/// Sets the pointer of the plan containing the block. The block must be the
497	/// entry block into the VPlan.
498	void setPlan(VPlan *ParentPlan);
499
500	void setParent(VPRegionBlock *P) { Parent = P; }
501
502	/// \return the VPBasicBlock that is the entry of this VPBlockBase,
503	/// recursively, if the latter is a VPRegionBlock. Otherwise, if this
504	/// VPBlockBase is a VPBasicBlock, it is returned.
505	const VPBasicBlock getEntryBasicBlock() const*;
506	VPBasicBlock *getEntryBasicBlock();
507
508	/// \return the VPBasicBlock that is the exiting this VPBlockBase,
509	/// recursively, if the latter is a VPRegionBlock. Otherwise, if this
510	/// VPBlockBase is a VPBasicBlock, it is returned.
511	const VPBasicBlock getExitingBasicBlock() const*;
512	VPBasicBlock *getExitingBasicBlock();
513
514	const VPBlocksTy &getSuccessors() const { return Successors; }
515	VPBlocksTy &getSuccessors() { return Successors; }
516
517	iterator_range<VPBlockBase > successors() { return** Successors; }
518
519	const VPBlocksTy &getPredecessors() const { return Predecessors; }
520	VPBlocksTy &getPredecessors() { return Predecessors; }
521
522	/// \return the successor of this VPBlockBase if it has a single successor.
523	/// Otherwise return a null pointer.
524	VPBlockBase getSingleSuccessor() const* {
525	return (Successors.size() == `1` ? Successors.begin() : nullptr*);
526	}
527
528	/// \return the predecessor of this VPBlockBase if it has a single
529	/// predecessor. Otherwise return a null pointer.
530	VPBlockBase getSinglePredecessor() const* {
531	return (Predecessors.size() == `1` ? Predecessors.begin() : nullptr*);
532	}
533
534	size_t getNumSuccessors() const { return Successors.size(); }
535	size_t getNumPredecessors() const { return Predecessors.size(); }
536
537	/// An Enclosing Block of a block B is any block containing B, including B
538	/// itself. \return the closest enclosing block starting from "this", which
539	/// has successors. \return the root enclosing block if all enclosing blocks
540	/// have no successors.
541	VPBlockBase *getEnclosingBlockWithSuccessors();
542
543	/// \return the closest enclosing block starting from "this", which has
544	/// predecessors. \return the root enclosing block if all enclosing blocks
545	/// have no predecessors.
546	VPBlockBase *getEnclosingBlockWithPredecessors();
547
548	/// \return the successors either attached directly to this VPBlockBase or, if
549	/// this VPBlockBase is the exit block of a VPRegionBlock and has no
550	/// successors of its own, search recursively for the first enclosing
551	/// VPRegionBlock that has successors and return them. If no such
552	/// VPRegionBlock exists, return the (empty) successors of the topmost
553	/// VPBlockBase reached.
554	const VPBlocksTy &getHierarchicalSuccessors() {
555	return getEnclosingBlockWithSuccessors()->getSuccessors();
556	}
557
558	/// \return the hierarchical successor of this VPBlockBase if it has a single
559	/// hierarchical successor. Otherwise return a null pointer.
560	VPBlockBase *getSingleHierarchicalSuccessor() {
561	return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
562	}
563
564	/// \return the predecessors either attached directly to this VPBlockBase or,
565	/// if this VPBlockBase is the entry block of a VPRegionBlock and has no
566	/// predecessors of its own, search recursively for the first enclosing
567	/// VPRegionBlock that has predecessors and return them. If no such
568	/// VPRegionBlock exists, return the (empty) predecessors of the topmost
569	/// VPBlockBase reached.
570	const VPBlocksTy &getHierarchicalPredecessors() {
571	return getEnclosingBlockWithPredecessors()->getPredecessors();
572	}
573
574	/// \return the hierarchical predecessor of this VPBlockBase if it has a
575	/// single hierarchical predecessor. Otherwise return a null pointer.
576	VPBlockBase *getSingleHierarchicalPredecessor() {
577	return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
578	}
579
580	/// Set a given VPBlockBase \p Successor as the single successor of this
581	/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
582	/// This VPBlockBase must have no successors.
583	void setOneSuccessor(VPBlockBase *Successor) {
584	assert(Successors.empty() && "Setting one successor when others exist.");
585	assert(Successor->getParent() == getParent() &&
586	"connected blocks must have the same parent");
587	appendSuccessor(Successor);
588	}
589
590	/// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
591	/// successors of this VPBlockBase. This VPBlockBase is not added as
592	/// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
593	/// successors.
594	void setTwoSuccessors(VPBlockBase IfTrue, VPBlockBase IfFalse) {
595	assert(Successors.empty() && "Setting two successors when others exist.");
596	appendSuccessor(Successor: IfTrue);
597	appendSuccessor(Successor: IfFalse);
598	}
599
600	/// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
601	/// This VPBlockBase must have no predecessors. This VPBlockBase is not added
602	/// as successor of any VPBasicBlock in \p NewPreds.
603	void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
604	assert(Predecessors.empty() && "Block predecessors already set.");
605	for (auto *Pred : NewPreds)
606	appendPredecessor(Predecessor: Pred);
607	}
608
609	/// Remove all the predecessor of this block.
610	void clearPredecessors() { Predecessors.clear(); }
611
612	/// Remove all the successors of this block.
613	void clearSuccessors() { Successors.clear(); }
614
615	/// The method which generates the output IR that correspond to this
616	/// VPBlockBase, thereby "executing" the VPlan.
617	virtual void execute(VPTransformState *State) = `0`;
618
619	/// Delete all blocks reachable from a given VPBlockBase, inclusive.
620	static void deleteCFG(VPBlockBase *Entry);
621
622	/// Return true if it is legal to hoist instructions into this block.
623	bool isLegalToHoistInto() {
624	// There are currently no constraints that prevent an instruction to be
625	// hoisted into a VPBlockBase.
626	return true;
627	}
628
629	/// Replace all operands of VPUsers in the block with \p NewValue and also
630	/// replaces all uses of VPValues defined in the block with NewValue.
631	virtual void dropAllReferences(VPValue *NewValue) = `0`;
632
633	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
634	void printAsOperand(raw_ostream &OS, bool PrintType) const {
635	OS << getName();
636	}
637
638	/// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
639	/// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
640	/// consequtive numbers.
641	///
642	/// Note that the numbering is applied to the whole VPlan, so printing
643	/// individual blocks is consistent with the whole VPlan printing.
644	virtual void print(raw_ostream &O, const Twine &Indent,
645	VPSlotTracker &SlotTracker) const = `0`;
646
647	/// Print plain-text dump of this VPlan to \p O.
648	void print(raw_ostream &O) const {
649	VPSlotTracker SlotTracker(getPlan());
650	print(O, Indent: "", SlotTracker);
651	}
652
653	/// Print the successors of this block to \p O, prefixing all lines with \p
654	/// Indent.
655	void printSuccessors(raw_ostream &O, const Twine &Indent) const;
656
657	/// Dump this VPBlockBase to dbgs().
658	LLVM_DUMP_METHOD void dump() const { print(O&: dbgs()); }
659	#endif
660
661	/// Clone the current block and it's recipes without updating the operands of
662	/// the cloned recipes, including all blocks in the single-entry single-exit
663	/// region for VPRegionBlocks.
664	virtual VPBlockBase *clone() = `0`;
665	};
666
667	/// A value that is used outside the VPlan. The operand of the user needs to be
668	/// added to the associated LCSSA phi node.
669	class VPLiveOut : public VPUser {
670	PHINode *Phi;
671
672	public:
673	VPLiveOut(PHINode Phi, VPValue Op)
674	: VPUser ({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
675
676	static inline bool classof(const VPUser *U) {
677	return U->getVPUserID() == VPUser::VPUserID::LiveOut;
678	}
679
680	/// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
681	/// means we need to add the appropriate incoming value from the middle
682	/// block as exiting edges from the scalar epilogue loop (if present) are
683	/// already in place, and we exit the vector loop exclusively to the middle
684	/// block.
685	void fixPhi(VPlan &Plan, VPTransformState &State);
686
687	/// Returns true if the VPLiveOut uses scalars of operand \p Op.
688	bool usesScalars(const VPValue Op) const* override {
689	assert(is_contained(operands(), Op) &&
690	"Op must be an operand of the recipe");
691	return true;
692	}
693
694	PHINode getPhi() const* { return Phi; }
695
696	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
697	/// Print the VPLiveOut to \p O.
698	void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
699	#endif
700	};
701
702	/// VPRecipeBase is a base class modeling a sequence of one or more output IR
703	/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
704	/// and is responsible for deleting its defined values. Single-value
705	/// recipes must inherit from VPSingleDef instead of inheriting from both
706	/// VPRecipeBase and VPValue separately.
707	class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
708	public VPDef,
709	public VPUser {
710	friend VPBasicBlock;
711	friend class VPBlockUtils;
712
713	/// Each VPRecipe belongs to a single VPBasicBlock.
714	VPBasicBlock Parent = nullptr*;
715
716	/// The debug location for the recipe.
717	DebugLoc DL;
718
719	public:
720	VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
721	DebugLoc DL = {})
722	: VPDef (SC), VPUser (Operands, VPUser::VPUserID::Recipe), DL (DL) {}
723
724	template <typename IterT>
725	VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
726	DebugLoc DL = {})
727	: VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL (DL) {}
728	virtual ~VPRecipeBase() = default;
729
730	/// Clone the current recipe.
731	virtual VPRecipeBase *clone() = `0`;
732
733	/// \return the VPBasicBlock which this VPRecipe belongs to.
734	VPBasicBlock getParent() { return* Parent; }
735	const VPBasicBlock getParent() const* { return Parent; }
736
737	/// The method which generates the output IR instructions that correspond to
738	/// this VPRecipe, thereby "executing" the VPlan.
739	virtual void execute(VPTransformState &State) = `0`;
740
741	/// Insert an unlinked recipe into a basic block immediately before
742	/// the specified recipe.
743	void insertBefore(VPRecipeBase *InsertPos);
744	/// Insert an unlinked recipe into \p BB immediately before the insertion
745	/// point \p IP;
746	void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
747
748	/// Insert an unlinked Recipe into a basic block immediately after
749	/// the specified Recipe.
750	void insertAfter(VPRecipeBase *InsertPos);
751
752	/// Unlink this recipe from its current VPBasicBlock and insert it into
753	/// the VPBasicBlock that MovePos lives in, right after MovePos.
754	void moveAfter(VPRecipeBase *MovePos);
755
756	/// Unlink this recipe and insert into BB before I.
757	///
758	/// \pre I is a valid iterator into BB.
759	void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
760
761	/// This method unlinks 'this' from the containing basic block, but does not
762	/// delete it.
763	void removeFromParent();
764
765	/// This method unlinks 'this' from the containing basic block and deletes it.
766	///
767	/// \returns an iterator pointing to the element after the erased one
768	iplist<VPRecipeBase>::iterator eraseFromParent();
769
770	/// Method to support type inquiry through isa, cast, and dyn_cast.
771	static inline bool classof(const VPDef *D) {
772	// All VPDefs are also VPRecipeBases.
773	return true;
774	}
775
776	static inline bool classof(const VPUser *U) {
777	return U->getVPUserID() == VPUser::VPUserID::Recipe;
778	}
779
780	/// Returns true if the recipe may have side-effects.
781	bool mayHaveSideEffects() const;
782
783	/// Returns true for PHI-like recipes.
784	bool isPhi() const {
785	return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
786	}
787
788	/// Returns true if the recipe may read from memory.
789	bool mayReadFromMemory() const;
790
791	/// Returns true if the recipe may write to memory.
792	bool mayWriteToMemory() const;
793
794	/// Returns true if the recipe may read from or write to memory.
795	bool mayReadOrWriteMemory() const {
796	return mayReadFromMemory() \|\| mayWriteToMemory();
797	}
798
799	/// Returns the debug location of the recipe.
800	DebugLoc getDebugLoc() const { return DL; }
801	};
802
803	// Helper macro to define common classof implementations for recipes.
804	#define VP_CLASSOF_IMPL(VPDefID) \
805	static inline bool classof(const VPDef *D) { \
806	return D->getVPDefID() == VPDefID; \
807	} \
808	static inline bool classof(const VPValue *V) { \
809	auto *R = V->getDefiningRecipe(); \
810	return R && R->getVPDefID() == VPDefID; \
811	} \
812	static inline bool classof(const VPUser *U) { \
813	auto *R = dyn_cast<VPRecipeBase>(U); \
814	return R && R->getVPDefID() == VPDefID; \
815	} \
816	static inline bool classof(const VPRecipeBase *R) { \
817	return R->getVPDefID() == VPDefID; \
818	} \
819	static inline bool classof(const VPSingleDefRecipe *R) { \
820	return R->getVPDefID() == VPDefID; \
821	}
822
823	/// VPSingleDef is a base class for recipes for modeling a sequence of one or
824	/// more output IR that define a single result VPValue.
825	/// Note that VPRecipeBase must be inherited from before VPValue.
826	class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
827	public:
828	template <typename IterT>
829	VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
830	: VPRecipeBase(SC, Operands, DL), VPValue(this) {}
831
832	VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
833	DebugLoc DL = {})
834	: VPRecipeBase (SC, Operands, DL), VPValue (this) {}
835
836	template <typename IterT>
837	VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
838	DebugLoc DL = {})
839	: VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
840
841	static inline bool classof(const VPRecipeBase *R) {
842	switch (R->getVPDefID()) {
843	case VPRecipeBase::VPDerivedIVSC:
844	case VPRecipeBase::VPExpandSCEVSC:
845	case VPRecipeBase::VPInstructionSC:
846	case VPRecipeBase::VPReductionSC:
847	case VPRecipeBase::VPReplicateSC:
848	case VPRecipeBase::VPScalarIVStepsSC:
849	case VPRecipeBase::VPVectorPointerSC:
850	case VPRecipeBase::VPWidenCallSC:
851	case VPRecipeBase::VPWidenCanonicalIVSC:
852	case VPRecipeBase::VPWidenCastSC:
853	case VPRecipeBase::VPWidenGEPSC:
854	case VPRecipeBase::VPWidenSC:
855	case VPRecipeBase::VPWidenSelectSC:
856	case VPRecipeBase::VPBlendSC:
857	case VPRecipeBase::VPPredInstPHISC:
858	case VPRecipeBase::VPCanonicalIVPHISC:
859	case VPRecipeBase::VPActiveLaneMaskPHISC:
860	case VPRecipeBase::VPFirstOrderRecurrencePHISC:
861	case VPRecipeBase::VPWidenPHISC:
862	case VPRecipeBase::VPWidenIntOrFpInductionSC:
863	case VPRecipeBase::VPWidenPointerInductionSC:
864	case VPRecipeBase::VPReductionPHISC:
865	case VPRecipeBase::VPScalarCastSC:
866	return true;
867	case VPRecipeBase::VPInterleaveSC:
868	case VPRecipeBase::VPBranchOnMaskSC:
869	case VPRecipeBase::VPWidenLoadEVLSC:
870	case VPRecipeBase::VPWidenLoadSC:
871	case VPRecipeBase::VPWidenStoreEVLSC:
872	case VPRecipeBase::VPWidenStoreSC:
873	// TODO: Widened stores don't define a value, but widened loads do. Split
874	// the recipes to be able to make widened loads VPSingleDefRecipes.
875	return false;
876	}
877	llvm_unreachable("Unhandled VPDefID");
878	}
879
880	static inline bool classof(const VPUser *U) {
881	auto *R = dyn_cast<VPRecipeBase>(Val: U);
882	return R && classof(R);
883	}
884
885	virtual VPSingleDefRecipe *clone() override = `0`;
886
887	/// Returns the underlying instruction.
888	Instruction *getUnderlyingInstr() {
889	return cast<Instruction>(Val: getUnderlyingValue());
890	}
891	const Instruction getUnderlyingInstr() const* {
892	return cast<Instruction>(Val: getUnderlyingValue());
893	}
894	};
895
896	/// Class to record LLVM IR flag for a recipe along with it.
897	class VPRecipeWithIRFlags : public VPSingleDefRecipe {
898	enum class OperationType : unsigned char {
899	Cmp,
900	OverflowingBinOp,
901	DisjointOp,
902	PossiblyExactOp,
903	GEPOp,
904	FPMathOp,
905	NonNegOp,
906	Other
907	};
908
909	public:
910	struct WrapFlagsTy {
911	char HasNUW : `1`;
912	char HasNSW : `1`;
913
914	WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
915	};
916
917	struct DisjointFlagsTy {
918	char IsDisjoint : `1`;
919	DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
920	};
921
922	protected:
923	struct GEPFlagsTy {
924	char IsInBounds : `1`;
925	GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
926	};
927
928	private:
929	struct ExactFlagsTy {
930	char IsExact : `1`;
931	};
932	struct NonNegFlagsTy {
933	char NonNeg : `1`;
934	};
935	struct FastMathFlagsTy {
936	char AllowReassoc : `1`;
937	char NoNaNs : `1`;
938	char NoInfs : `1`;
939	char NoSignedZeros : `1`;
940	char AllowReciprocal : `1`;
941	char AllowContract : `1`;
942	char ApproxFunc : `1`;
943
944	FastMathFlagsTy(const FastMathFlags &FMF);
945	};
946
947	OperationType OpType;
948
949	union {
950	CmpInst::Predicate CmpPredicate;
951	WrapFlagsTy WrapFlags;
952	DisjointFlagsTy DisjointFlags;
953	ExactFlagsTy ExactFlags;
954	GEPFlagsTy GEPFlags;
955	NonNegFlagsTy NonNegFlags;
956	FastMathFlagsTy FMFs;
957	unsigned AllFlags;
958	};
959
960	protected:
961	void transferFlags(VPRecipeWithIRFlags &Other) {
962	OpType = Other.OpType;
963	AllFlags = Other.AllFlags;
964	}
965
966	public:
967	template <typename IterT>
968	VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
969	: VPSingleDefRecipe(SC, Operands, DL) {
970	OpType = OperationType::Other;
971	AllFlags = `0`;
972	}
973
974	template <typename IterT>
975	VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
976	: VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {
977	if (auto *Op = dyn_cast<CmpInst>(Val: &I)) {
978	OpType = OperationType::Cmp;
979	CmpPredicate = Op->getPredicate();
980	} else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) {
981	OpType = OperationType::DisjointOp;
982	DisjointFlags.IsDisjoint = Op->isDisjoint();
983	} else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
984	OpType = OperationType::OverflowingBinOp;
985	WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
986	} else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) {
987	OpType = OperationType::PossiblyExactOp;
988	ExactFlags.IsExact = Op->isExact();
989	} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
990	OpType = OperationType::GEPOp;
991	GEPFlags.IsInBounds = GEP->isInBounds();
992	} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) {
993	OpType = OperationType::NonNegOp;
994	NonNegFlags.NonNeg = PNNI->hasNonNeg();
995	} else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) {
996	OpType = OperationType::FPMathOp;
997	FMFs = Op->getFastMathFlags();
998	} else {
999	OpType = OperationType::Other;
1000	AllFlags = `0`;
1001	}
1002	}
1003
1004	template <typename IterT>
1005	VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1006	CmpInst::Predicate Pred, DebugLoc DL = {})
1007	: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1008	CmpPredicate(Pred) {}
1009
1010	template <typename IterT>
1011	VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1012	WrapFlagsTy WrapFlags, DebugLoc DL = {})
1013	: VPSingleDefRecipe(SC, Operands, DL),
1014	OpType(OperationType::OverflowingBinOp), WrapFlags (WrapFlags) {}
1015
1016	template <typename IterT>
1017	VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1018	FastMathFlags FMFs, DebugLoc DL = {})
1019	: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1020	FMFs (FMFs) {}
1021
1022	template <typename IterT>
1023	VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1024	DisjointFlagsTy DisjointFlags, DebugLoc DL = {})
1025	: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1026	DisjointFlags (DisjointFlags) {}
1027
1028	protected:
1029	template <typename IterT>
1030	VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1031	GEPFlagsTy GEPFlags, DebugLoc DL = {})
1032	: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1033	GEPFlags (GEPFlags) {}
1034
1035	public:
1036	static inline bool classof(const VPRecipeBase *R) {
1037	return R->getVPDefID() == VPRecipeBase::VPInstructionSC \|\|
1038	R->getVPDefID() == VPRecipeBase::VPWidenSC \|\|
1039	R->getVPDefID() == VPRecipeBase::VPWidenGEPSC \|\|
1040	R->getVPDefID() == VPRecipeBase::VPWidenCastSC \|\|
1041	R->getVPDefID() == VPRecipeBase::VPReplicateSC \|\|
1042	R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1043	}
1044
1045	static inline bool classof(const VPUser *U) {
1046	auto *R = dyn_cast<VPRecipeBase>(Val: U);
1047	return R && classof(R);
1048	}
1049
1050	/// Drop all poison-generating flags.
1051	void dropPoisonGeneratingFlags() {
1052	// NOTE: This needs to be kept in-sync with
1053	// Instruction::dropPoisonGeneratingFlags.
1054	switch (OpType) {
1055	case OperationType::OverflowingBinOp:
1056	WrapFlags.HasNUW = false;
1057	WrapFlags.HasNSW = false;
1058	break;
1059	case OperationType::DisjointOp:
1060	DisjointFlags.IsDisjoint = false;
1061	break;
1062	case OperationType::PossiblyExactOp:
1063	ExactFlags.IsExact = false;
1064	break;
1065	case OperationType::GEPOp:
1066	GEPFlags.IsInBounds = false;
1067	break;
1068	case OperationType::FPMathOp:
1069	FMFs.NoNaNs = false;
1070	FMFs.NoInfs = false;
1071	break;
1072	case OperationType::NonNegOp:
1073	NonNegFlags.NonNeg = false;
1074	break;
1075	case OperationType::Cmp:
1076	case OperationType::Other:
1077	break;
1078	}
1079	}
1080
1081	/// Set the IR flags for \p I.
1082	void setFlags(Instruction I) const* {
1083	switch (OpType) {
1084	case OperationType::OverflowingBinOp:
1085	I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1086	I->setHasNoSignedWrap(WrapFlags.HasNSW);
1087	break;
1088	case OperationType::DisjointOp:
1089	cast<PossiblyDisjointInst>(Val: I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1090	break;
1091	case OperationType::PossiblyExactOp:
1092	I->setIsExact(ExactFlags.IsExact);
1093	break;
1094	case OperationType::GEPOp:
1095	cast<GetElementPtrInst>(Val: I)->setIsInBounds(GEPFlags.IsInBounds);
1096	break;
1097	case OperationType::FPMathOp:
1098	I->setHasAllowReassoc(FMFs.AllowReassoc);
1099	I->setHasNoNaNs(FMFs.NoNaNs);
1100	I->setHasNoInfs(FMFs.NoInfs);
1101	I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1102	I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1103	I->setHasAllowContract(FMFs.AllowContract);
1104	I->setHasApproxFunc(FMFs.ApproxFunc);
1105	break;
1106	case OperationType::NonNegOp:
1107	I->setNonNeg(NonNegFlags.NonNeg);
1108	break;
1109	case OperationType::Cmp:
1110	case OperationType::Other:
1111	break;
1112	}
1113	}
1114
1115	CmpInst::Predicate getPredicate() const {
1116	assert(OpType == OperationType::Cmp &&
1117	"recipe doesn't have a compare predicate");
1118	return CmpPredicate;
1119	}
1120
1121	bool isInBounds() const {
1122	assert(OpType == OperationType::GEPOp &&
1123	"recipe doesn't have inbounds flag");
1124	return GEPFlags.IsInBounds;
1125	}
1126
1127	/// Returns true if the recipe has fast-math flags.
1128	bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1129
1130	FastMathFlags getFastMathFlags() const;
1131
1132	bool hasNoUnsignedWrap() const {
1133	assert(OpType == OperationType::OverflowingBinOp &&
1134	"recipe doesn't have a NUW flag");
1135	return WrapFlags.HasNUW;
1136	}
1137
1138	bool hasNoSignedWrap() const {
1139	assert(OpType == OperationType::OverflowingBinOp &&
1140	"recipe doesn't have a NSW flag");
1141	return WrapFlags.HasNSW;
1142	}
1143
1144	bool isDisjoint() const {
1145	assert(OpType == OperationType::DisjointOp &&
1146	"recipe cannot have a disjoing flag");
1147	return DisjointFlags.IsDisjoint;
1148	}
1149
1150	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1151	void printFlags(raw_ostream &O) const;
1152	#endif
1153	};
1154
1155	/// This is a concrete Recipe that models a single VPlan-level instruction.
1156	/// While as any Recipe it may generate a sequence of IR instructions when
1157	/// executed, these instructions would always form a single-def expression as
1158	/// the VPInstruction is also a single def-use vertex.
1159	class VPInstruction : public VPRecipeWithIRFlags {
1160	friend class VPlanSlp;
1161
1162	public:
1163	/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1164	enum {
1165	FirstOrderRecurrenceSplice =
1166	Instruction::OtherOpsEnd + `1`, // Combines the incoming and previous
1167	// values of a first-order recurrence.
1168	Not,
1169	SLPLoad,
1170	SLPStore,
1171	ActiveLaneMask,
1172	ExplicitVectorLength,
1173	CalculateTripCountMinusVF,
1174	// Increment the canonical IV separately for each unrolled part.
1175	CanonicalIVIncrementForPart,
1176	BranchOnCount,
1177	BranchOnCond,
1178	ComputeReductionResult,
1179	// Add an offset in bytes (second operand) to a base pointer (first
1180	// operand). Only generates scalar values (either for the first lane only or
1181	// for all lanes, depending on its uses).
1182	PtrAdd,
1183	};
1184
1185	private:
1186	typedef unsigned char OpcodeTy;
1187	OpcodeTy Opcode;
1188
1189	/// An optional name that can be used for the generated IR instruction.
1190	const std::string Name;
1191
1192	/// Returns true if this VPInstruction generates scalar values for all lanes.
1193	/// Most VPInstructions generate a single value per part, either vector or
1194	/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1195	/// values per all lanes, stemming from an original ingredient. This method
1196	/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1197	/// underlying ingredient.
1198	bool doesGeneratePerAllLanes() const;
1199
1200	/// Returns true if we can generate a scalar for the first lane only if
1201	/// needed.
1202	bool canGenerateScalarForFirstLane() const;
1203
1204	/// Utility methods serving execute(): generates a single instance of the
1205	/// modeled instruction for a given part. \returns the generated value for \p
1206	/// Part. In some cases an existing value is returned rather than a generated
1207	/// one.
1208	Value generatePerPart(VPTransformState &State, unsigned* Part);
1209
1210	/// Utility methods serving execute(): generates a scalar single instance of
1211	/// the modeled instruction for a given lane. \returns the scalar generated
1212	/// value for lane \p Lane.
1213	Value generatePerLane(VPTransformState &State, const* VPIteration &Lane);
1214
1215	#if !defined(NDEBUG)
1216	/// Return true if the VPInstruction is a floating point math operation, i.e.
1217	/// has fast-math flags.
1218	bool isFPMathOp() const;
1219	#endif
1220
1221	public:
1222	VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
1223	const Twine &Name = "")
1224	: VPRecipeWithIRFlags (VPDef::VPInstructionSC, Operands, DL),
1225	Opcode(Opcode), Name(Name.str()) {}
1226
1227	VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1228	DebugLoc DL = {}, const Twine &Name = "")
1229	: VPInstruction (Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
1230
1231	VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1232	VPValue B, DebugLoc DL = {}, const* Twine &Name = "");
1233
1234	VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1235	WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1236	: VPRecipeWithIRFlags (VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1237	Opcode(Opcode), Name(Name.str()) {}
1238
1239	VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1240	DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1241	const Twine &Name = "")
1242	: VPRecipeWithIRFlags (VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1243	Opcode(Opcode), Name(Name.str()) {
1244	assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1245	}
1246
1247	VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1248	FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1249
1250	VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1251
1252	VPInstruction *clone() override {
1253	SmallVector<VPValue *, `2`> Operands(operands());
1254	auto New = new* VPInstruction (Opcode, Operands, getDebugLoc(), Name);
1255	New->transferFlags(Other&: *this);
1256	return New;
1257	}
1258
1259	unsigned getOpcode() const { return Opcode; }
1260
1261	/// Generate the instruction.
1262	/// TODO: We currently execute only per-part unless a specific instance is
1263	/// provided.
1264	void execute(VPTransformState &State) override;
1265
1266	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1267	/// Print the VPInstruction to \p O.
1268	void print(raw_ostream &O, const Twine &Indent,
1269	VPSlotTracker &SlotTracker) const override;
1270
1271	/// Print the VPInstruction to dbgs() (for debugging).
1272	LLVM_DUMP_METHOD void dump() const;
1273	#endif
1274
1275	/// Return true if this instruction may modify memory.
1276	bool mayWriteToMemory() const {
1277	// TODO: we can use attributes of the called function to rule out memory
1278	// modifications.
1279	return Opcode == Instruction::Store \|\| Opcode == Instruction::Call \|\|
1280	Opcode == Instruction::Invoke \|\| Opcode == SLPStore;
1281	}
1282
1283	bool hasResult() const {
1284	// CallInst may or may not have a result, depending on the called function.
1285	// Conservatively return calls have results for now.
1286	switch (getOpcode()) {
1287	case Instruction::Ret:
1288	case Instruction::Br:
1289	case Instruction::Store:
1290	case Instruction::Switch:
1291	case Instruction::IndirectBr:
1292	case Instruction::Resume:
1293	case Instruction::CatchRet:
1294	case Instruction::Unreachable:
1295	case Instruction::Fence:
1296	case Instruction::AtomicRMW:
1297	case VPInstruction::BranchOnCond:
1298	case VPInstruction::BranchOnCount:
1299	return false;
1300	default:
1301	return true;
1302	}
1303	}
1304
1305	/// Returns true if the recipe only uses the first lane of operand \p Op.
1306	bool onlyFirstLaneUsed(const VPValue Op) const* override;
1307
1308	/// Returns true if the recipe only uses the first part of operand \p Op.
1309	bool onlyFirstPartUsed(const VPValue Op) const* override {
1310	assert(is_contained(operands(), Op) &&
1311	"Op must be an operand of the recipe");
1312	if (getOperand(N: `0`) != Op)
1313	return false;
1314	switch (getOpcode()) {
1315	default:
1316	return false;
1317	case VPInstruction::BranchOnCount:
1318	case VPInstruction::CanonicalIVIncrementForPart:
1319	return true;
1320	};
1321	llvm_unreachable("switch should return");
1322	}
1323	};
1324
1325	/// VPWidenRecipe is a recipe for producing a copy of vector type its
1326	/// ingredient. This recipe covers most of the traditional vectorization cases
1327	/// where each ingredient transforms into a vectorized version of itself.
1328	class VPWidenRecipe : public VPRecipeWithIRFlags {
1329	unsigned Opcode;
1330
1331	public:
1332	template <typename IterT>
1333	VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1334	: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1335	Opcode(I.getOpcode()) {}
1336
1337	~VPWidenRecipe() override = default;
1338
1339	VPWidenRecipe *clone() override {
1340	auto R = new* VPWidenRecipe (*getUnderlyingInstr(), operands());
1341	R->transferFlags(Other&: *this);
1342	return R;
1343	}
1344
1345	VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1346
1347	/// Produce widened copies of all Ingredients.
1348	void execute(VPTransformState &State) override;
1349
1350	unsigned getOpcode() const { return Opcode; }
1351
1352	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1353	/// Print the recipe.
1354	void print(raw_ostream &O, const Twine &Indent,
1355	VPSlotTracker &SlotTracker) const override;
1356	#endif
1357	};
1358
1359	/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1360	class VPWidenCastRecipe : public VPRecipeWithIRFlags {
1361	/// Cast instruction opcode.
1362	Instruction::CastOps Opcode;
1363
1364	/// Result type for the cast.
1365	Type *ResultTy;
1366
1367	public:
1368	VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue Op, Type ResultTy,
1369	CastInst &UI)
1370	: VPRecipeWithIRFlags (VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1371	ResultTy(ResultTy) {
1372	assert(UI.getOpcode() == Opcode &&
1373	"opcode of underlying cast doesn't match");
1374	assert(UI.getType() == ResultTy &&
1375	"result type of underlying cast doesn't match");
1376	}
1377
1378	VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue Op, Type ResultTy)
1379	: VPRecipeWithIRFlags (VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1380	ResultTy(ResultTy) {}
1381
1382	~VPWidenCastRecipe() override = default;
1383
1384	VPWidenCastRecipe *clone() override {
1385	if (auto *UV = getUnderlyingValue())
1386	return new VPWidenCastRecipe (Opcode, getOperand(N: `0`), ResultTy,
1387	*cast<CastInst>(Val: UV));
1388
1389	return new VPWidenCastRecipe (Opcode, getOperand(N: `0`), ResultTy);
1390	}
1391
1392	VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1393
1394	/// Produce widened copies of the cast.
1395	void execute(VPTransformState &State) override;
1396
1397	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1398	/// Print the recipe.
1399	void print(raw_ostream &O, const Twine &Indent,
1400	VPSlotTracker &SlotTracker) const override;
1401	#endif
1402
1403	Instruction::CastOps getOpcode() const { return Opcode; }
1404
1405	/// Returns the result type of the cast.
1406	Type getResultType() const* { return ResultTy; }
1407	};
1408
1409	/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1410	class VPScalarCastRecipe : public VPSingleDefRecipe {
1411	Instruction::CastOps Opcode;
1412
1413	Type *ResultTy;
1414
1415	Value generate(VPTransformState &State, unsigned* Part);
1416
1417	public:
1418	VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue Op, Type ResultTy)
1419	: VPSingleDefRecipe (VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1420	ResultTy(ResultTy) {}
1421
1422	~VPScalarCastRecipe() override = default;
1423
1424	VPScalarCastRecipe *clone() override {
1425	return new VPScalarCastRecipe (Opcode, getOperand(N: `0`), ResultTy);
1426	}
1427
1428	VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1429
1430	void execute(VPTransformState &State) override;
1431
1432	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1433	void print(raw_ostream &O, const Twine &Indent,
1434	VPSlotTracker &SlotTracker) const override;
1435	#endif
1436
1437	/// Returns the result type of the cast.
1438	Type getResultType() const* { return ResultTy; }
1439
1440	bool onlyFirstLaneUsed(const VPValue Op) const* override {
1441	// At the moment, only uniform codegen is implemented.
1442	assert(is_contained(operands(), Op) &&
1443	"Op must be an operand of the recipe");
1444	return true;
1445	}
1446	};
1447
1448	/// A recipe for widening Call instructions.
1449	class VPWidenCallRecipe : public VPSingleDefRecipe {
1450	/// ID of the vector intrinsic to call when widening the call. If set the
1451	/// Intrinsic::not_intrinsic, a library call will be used instead.
1452	Intrinsic::ID VectorIntrinsicID;
1453	/// If this recipe represents a library call, Variant stores a pointer to
1454	/// the chosen function. There is a 1:1 mapping between a given VF and the
1455	/// chosen vectorized variant, so there will be a different vplan for each
1456	/// VF with a valid variant.
1457	Function *Variant;
1458
1459	public:
1460	template <typename IterT>
1461	VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
1462	Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1463	Function Variant = nullptr*)
1464	: VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, &I, DL),
1465	VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
1466
1467	~VPWidenCallRecipe() override = default;
1468
1469	VPWidenCallRecipe *clone() override {
1470	return new VPWidenCallRecipe (*cast<CallInst>(Val: getUnderlyingInstr()),
1471	operands(), VectorIntrinsicID, getDebugLoc(),
1472	Variant);
1473	}
1474
1475	VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1476
1477	/// Produce a widened version of the call instruction.
1478	void execute(VPTransformState &State) override;
1479
1480	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1481	/// Print the recipe.
1482	void print(raw_ostream &O, const Twine &Indent,
1483	VPSlotTracker &SlotTracker) const override;
1484	#endif
1485	};
1486
1487	/// A recipe for widening select instructions.
1488	struct VPWidenSelectRecipe : public VPSingleDefRecipe {
1489	template <typename IterT>
1490	VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
1491	: VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1492	I.getDebugLoc()) {}
1493
1494	~VPWidenSelectRecipe() override = default;
1495
1496	VPWidenSelectRecipe *clone() override {
1497	return new VPWidenSelectRecipe (*cast<SelectInst>(Val: getUnderlyingInstr()),
1498	operands());
1499	}
1500
1501	VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1502
1503	/// Produce a widened version of the select instruction.
1504	void execute(VPTransformState &State) override;
1505
1506	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1507	/// Print the recipe.
1508	void print(raw_ostream &O, const Twine &Indent,
1509	VPSlotTracker &SlotTracker) const override;
1510	#endif
1511
1512	VPValue getCond() const* {
1513	return getOperand(N: `0`);
1514	}
1515
1516	bool isInvariantCond() const {
1517	return getCond()->isDefinedOutsideVectorRegions();
1518	}
1519	};
1520
1521	/// A recipe for handling GEP instructions.
1522	class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
1523	bool isPointerLoopInvariant() const {
1524	return getOperand(N: `0`)->isDefinedOutsideVectorRegions();
1525	}
1526
1527	bool isIndexLoopInvariant(unsigned I) const {
1528	return getOperand(N: I + `1`)->isDefinedOutsideVectorRegions();
1529	}
1530
1531	bool areAllOperandsInvariant() const {
1532	return all_of(Range: operands(), P: [](VPValue *Op) {
1533	return Op->isDefinedOutsideVectorRegions();
1534	});
1535	}
1536
1537	public:
1538	template <typename IterT>
1539	VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
1540	: VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1541
1542	~VPWidenGEPRecipe() override = default;
1543
1544	VPWidenGEPRecipe *clone() override {
1545	return new VPWidenGEPRecipe (cast<GetElementPtrInst>(Val: getUnderlyingInstr()),
1546	operands());
1547	}
1548
1549	VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1550
1551	/// Generate the gep nodes.
1552	void execute(VPTransformState &State) override;
1553
1554	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1555	/// Print the recipe.
1556	void print(raw_ostream &O, const Twine &Indent,
1557	VPSlotTracker &SlotTracker) const override;
1558	#endif
1559	};
1560
1561	/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1562	/// all parts. If IsReverse is true, compute pointers for accessing the input in
1563	/// reverse order per part.
1564	class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
1565	Type *IndexedTy;
1566	bool IsReverse;
1567
1568	public:
1569	VPVectorPointerRecipe(VPValue Ptr, Type IndexedTy, bool IsReverse,
1570	bool IsInBounds, DebugLoc DL)
1571	: VPRecipeWithIRFlags (VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1572	GEPFlagsTy (IsInBounds), DL),
1573	IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1574
1575	VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1576
1577	void execute(VPTransformState &State) override;
1578
1579	bool onlyFirstLaneUsed(const VPValue Op) const* override {
1580	assert(is_contained(operands(), Op) &&
1581	"Op must be an operand of the recipe");
1582	return true;
1583	}
1584
1585	VPVectorPointerRecipe *clone() override {
1586	return new VPVectorPointerRecipe (getOperand(N: `0`), IndexedTy, IsReverse,
1587	isInBounds(), getDebugLoc());
1588	}
1589
1590	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1591	/// Print the recipe.
1592	void print(raw_ostream &O, const Twine &Indent,
1593	VPSlotTracker &SlotTracker) const override;
1594	#endif
1595	};
1596
1597	/// A pure virtual base class for all recipes modeling header phis, including
1598	/// phis for first order recurrences, pointer inductions and reductions. The
1599	/// start value is the first operand of the recipe and the incoming value from
1600	/// the backedge is the second operand.
1601	///
1602	/// Inductions are modeled using the following sub-classes:
1603	/// VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,*
1604	/// starting at a specified value (zero for the main vector loop, the resume
1605	/// value for the epilogue vector loop) and stepping by 1. The induction
1606	/// controls exiting of the vector loop by comparing against the vector trip
1607	/// count. Produces a single scalar PHI for the induction value per
1608	/// iteration.
1609	/// VPWidenIntOrFpInductionRecipe: Generates vector values for integer and*
1610	/// floating point inductions with arbitrary start and step values. Produces
1611	/// a vector PHI per-part.
1612	/// VPDerivedIVRecipe: Converts the canonical IV value to the corresponding*
1613	/// value of an IV with different start and step values. Produces a single
1614	/// scalar value per iteration
1615	/// VPScalarIVStepsRecipe: Generates scalar values per-lane based on a*
1616	/// canonical or derived induction.
1617	/// VPWidenPointerInductionRecipe: Generate vector and scalar values for a*
1618	/// pointer induction. Produces either a vector PHI per-part or scalar values
1619	/// per-lane based on the canonical induction.
1620	class VPHeaderPHIRecipe : public VPSingleDefRecipe {
1621	protected:
1622	VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1623	VPValue Start = nullptr*, DebugLoc DL = {})
1624	: VPSingleDefRecipe (VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1625	if (Start)
1626	addOperand(Operand: Start);
1627	}
1628
1629	public:
1630	~VPHeaderPHIRecipe() override = default;
1631
1632	/// Method to support type inquiry through isa, cast, and dyn_cast.
1633	static inline bool classof(const VPRecipeBase *B) {
1634	return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1635	B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1636	}
1637	static inline bool classof(const VPValue *V) {
1638	auto *B = V->getDefiningRecipe();
1639	return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1640	B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1641	}
1642
1643	/// Generate the phi nodes.
1644	void execute(VPTransformState &State) override = `0`;
1645
1646	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1647	/// Print the recipe.
1648	void print(raw_ostream &O, const Twine &Indent,
1649	VPSlotTracker &SlotTracker) const override = `0`;
1650	#endif
1651
1652	/// Returns the start value of the phi, if one is set.
1653	VPValue *getStartValue() {
1654	return getNumOperands() == `0` ? nullptr : getOperand(N: `0`);
1655	}
1656	VPValue getStartValue() const* {
1657	return getNumOperands() == `0` ? nullptr : getOperand(N: `0`);
1658	}
1659
1660	/// Update the start value of the recipe.
1661	void setStartValue(VPValue *V) { setOperand(I: `0`, New: V); }
1662
1663	/// Returns the incoming value from the loop backedge.
1664	virtual VPValue *getBackedgeValue() {
1665	return getOperand(N: `1`);
1666	}
1667
1668	/// Returns the backedge value as a recipe. The backedge value is guaranteed
1669	/// to be a recipe.
1670	virtual VPRecipeBase &getBackedgeRecipe() {
1671	return *getBackedgeValue()->getDefiningRecipe();
1672	}
1673	};
1674
1675	/// A recipe for handling phi nodes of integer and floating-point inductions,
1676	/// producing their vector values.
1677	class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
1678	PHINode *IV;
1679	TruncInst *Trunc;
1680	const InductionDescriptor &IndDesc;
1681
1682	public:
1683	VPWidenIntOrFpInductionRecipe(PHINode IV, VPValue Start, VPValue *Step,
1684	const InductionDescriptor &IndDesc)
1685	: VPHeaderPHIRecipe (VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1686	Trunc(nullptr), IndDesc(IndDesc) {
1687	addOperand(Operand: Step);
1688	}
1689
1690	VPWidenIntOrFpInductionRecipe(PHINode IV, VPValue Start, VPValue *Step,
1691	const InductionDescriptor &IndDesc,
1692	TruncInst *Trunc)
1693	: VPHeaderPHIRecipe (VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1694	IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1695	addOperand(Operand: Step);
1696	}
1697
1698	~VPWidenIntOrFpInductionRecipe() override = default;
1699
1700	VPWidenIntOrFpInductionRecipe *clone() override {
1701	return new VPWidenIntOrFpInductionRecipe (IV, getStartValue(),
1702	getStepValue(), IndDesc, Trunc);
1703	}
1704
1705	VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1706
1707	/// Generate the vectorized and scalarized versions of the phi node as
1708	/// needed by their users.
1709	void execute(VPTransformState &State) override;
1710
1711	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1712	/// Print the recipe.
1713	void print(raw_ostream &O, const Twine &Indent,
1714	VPSlotTracker &SlotTracker) const override;
1715	#endif
1716
1717	VPValue *getBackedgeValue() override {
1718	// TODO: All operands of base recipe must exist and be at same index in
1719	// derived recipe.
1720	llvm_unreachable(
1721	"VPWidenIntOrFpInductionRecipe generates its own backedge value");
1722	}
1723
1724	VPRecipeBase &getBackedgeRecipe() override {
1725	// TODO: All operands of base recipe must exist and be at same index in
1726	// derived recipe.
1727	llvm_unreachable(
1728	"VPWidenIntOrFpInductionRecipe generates its own backedge value");
1729	}
1730
1731	/// Returns the step value of the induction.
1732	VPValue getStepValue() { return* getOperand(N: `1`); }
1733	const VPValue getStepValue() const* { return getOperand(N: `1`); }
1734
1735	/// Returns the first defined value as TruncInst, if it is one or nullptr
1736	/// otherwise.
1737	TruncInst getTruncInst() { return* Trunc; }
1738	const TruncInst getTruncInst() const* { return Trunc; }
1739
1740	PHINode getPHINode() { return* IV; }
1741
1742	/// Returns the induction descriptor for the recipe.
1743	const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1744
1745	/// Returns true if the induction is canonical, i.e. starting at 0 and
1746	/// incremented by UF VF (= the original IV is incremented by 1).*
1747	bool isCanonical() const;
1748
1749	/// Returns the scalar type of the induction.
1750	Type getScalarType() const* {
1751	return Trunc ? Trunc->getType() : IV->getType();
1752	}
1753	};
1754
1755	class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
1756	const InductionDescriptor &IndDesc;
1757
1758	bool IsScalarAfterVectorization;
1759
1760	public:
1761	/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1762	/// Start.
1763	VPWidenPointerInductionRecipe(PHINode Phi, VPValue Start, VPValue *Step,
1764	const InductionDescriptor &IndDesc,
1765	bool IsScalarAfterVectorization)
1766	: VPHeaderPHIRecipe (VPDef::VPWidenPointerInductionSC, Phi),
1767	IndDesc(IndDesc),
1768	IsScalarAfterVectorization(IsScalarAfterVectorization) {
1769	addOperand(Operand: Start);
1770	addOperand(Operand: Step);
1771	}
1772
1773	~VPWidenPointerInductionRecipe() override = default;
1774
1775	VPWidenPointerInductionRecipe *clone() override {
1776	return new VPWidenPointerInductionRecipe (
1777	cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: `0`), getOperand(N: `1`),
1778	IndDesc, IsScalarAfterVectorization);
1779	}
1780
1781	VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1782
1783	/// Generate vector values for the pointer induction.
1784	void execute(VPTransformState &State) override;
1785
1786	/// Returns true if only scalar values will be generated.
1787	bool onlyScalarsGenerated(bool IsScalable);
1788
1789	/// Returns the induction descriptor for the recipe.
1790	const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1791
1792	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1793	/// Print the recipe.
1794	void print(raw_ostream &O, const Twine &Indent,
1795	VPSlotTracker &SlotTracker) const override;
1796	#endif
1797	};
1798
1799	/// A recipe for handling phis that are widened in the vector loop.
1800	/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1801	/// managed in the recipe directly.
1802	class VPWidenPHIRecipe : public VPSingleDefRecipe {
1803	/// List of incoming blocks. Only used in the VPlan native path.
1804	SmallVector<VPBasicBlock *, `2`> IncomingBlocks;
1805
1806	public:
1807	/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1808	VPWidenPHIRecipe(PHINode Phi, VPValue Start = nullptr)
1809	: VPSingleDefRecipe (VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1810	if (Start)
1811	addOperand(Operand: Start);
1812	}
1813
1814	VPWidenPHIRecipe *clone() override {
1815	llvm_unreachable("cloning not implemented yet");
1816	}
1817
1818	~VPWidenPHIRecipe() override = default;
1819
1820	VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1821
1822	/// Generate the phi/select nodes.
1823	void execute(VPTransformState &State) override;
1824
1825	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1826	/// Print the recipe.
1827	void print(raw_ostream &O, const Twine &Indent,
1828	VPSlotTracker &SlotTracker) const override;
1829	#endif
1830
1831	/// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1832	void addIncoming(VPValue IncomingV, VPBasicBlock IncomingBlock) {
1833	addOperand(Operand: IncomingV);
1834	IncomingBlocks.push_back(Elt: IncomingBlock);
1835	}
1836
1837	/// Returns the \p I th incoming VPBasicBlock.
1838	VPBasicBlock getIncomingBlock(unsigned* I) { return IncomingBlocks [I]; }
1839
1840	/// Returns the \p I th incoming VPValue.
1841	VPValue getIncomingValue(unsigned* I) { return getOperand(N: I); }
1842	};
1843
1844	/// A recipe for handling first-order recurrence phis. The start value is the
1845	/// first operand of the recipe and the incoming value from the backedge is the
1846	/// second operand.
1847	struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
1848	VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
1849	: VPHeaderPHIRecipe (VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1850
1851	VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1852
1853	static inline bool classof(const VPHeaderPHIRecipe *R) {
1854	return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1855	}
1856
1857	VPFirstOrderRecurrencePHIRecipe *clone() override {
1858	return new VPFirstOrderRecurrencePHIRecipe (
1859	cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: `0`));
1860	}
1861
1862	void execute(VPTransformState &State) override;
1863
1864	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1865	/// Print the recipe.
1866	void print(raw_ostream &O, const Twine &Indent,
1867	VPSlotTracker &SlotTracker) const override;
1868	#endif
1869	};
1870
1871	/// A recipe for handling reduction phis. The start value is the first operand
1872	/// of the recipe and the incoming value from the backedge is the second
1873	/// operand.
1874	class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
1875	/// Descriptor for the reduction.
1876	const RecurrenceDescriptor &RdxDesc;
1877
1878	/// The phi is part of an in-loop reduction.
1879	bool IsInLoop;
1880
1881	/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1882	bool IsOrdered;
1883
1884	public:
1885	/// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1886	/// RdxDesc.
1887	VPReductionPHIRecipe(PHINode Phi, const* RecurrenceDescriptor &RdxDesc,
1888	VPValue &Start, bool IsInLoop = false,
1889	bool IsOrdered = false)
1890	: VPHeaderPHIRecipe (VPDef::VPReductionPHISC, Phi, &Start),
1891	RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1892	assert((!IsOrdered \|\| IsInLoop) && "IsOrdered requires IsInLoop");
1893	}
1894
1895	~VPReductionPHIRecipe() override = default;
1896
1897	VPReductionPHIRecipe *clone() override {
1898	auto *R =
1899	new VPReductionPHIRecipe (cast<PHINode>(Val: getUnderlyingInstr()), RdxDesc,
1900	*getOperand(N: `0`), IsInLoop, IsOrdered);
1901	R->addOperand(Operand: getBackedgeValue());
1902	return R;
1903	}
1904
1905	VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1906
1907	static inline bool classof(const VPHeaderPHIRecipe *R) {
1908	return R->getVPDefID() == VPDef::VPReductionPHISC;
1909	}
1910
1911	/// Generate the phi/select nodes.
1912	void execute(VPTransformState &State) override;
1913
1914	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1915	/// Print the recipe.
1916	void print(raw_ostream &O, const Twine &Indent,
1917	VPSlotTracker &SlotTracker) const override;
1918	#endif
1919
1920	const RecurrenceDescriptor &getRecurrenceDescriptor() const {
1921	return RdxDesc;
1922	}
1923
1924	/// Returns true, if the phi is part of an ordered reduction.
1925	bool isOrdered() const { return IsOrdered; }
1926
1927	/// Returns true, if the phi is part of an in-loop reduction.
1928	bool isInLoop() const { return IsInLoop; }
1929	};
1930
1931	/// A recipe for vectorizing a phi-node as a sequence of mask-based select
1932	/// instructions.
1933	class VPBlendRecipe : public VPSingleDefRecipe {
1934	public:
1935	/// The blend operation is a User of the incoming values and of their
1936	/// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
1937	/// incoming value does not have a mask associated.
1938	VPBlendRecipe(PHINode Phi, ArrayRef<VPValue > Operands)
1939	: VPSingleDefRecipe (VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
1940	assert((Operands.size() + `1`) % `2` == `0` &&
1941	"Expected an odd number of operands");
1942	}
1943
1944	VPBlendRecipe *clone() override {
1945	SmallVector<VPValue *> Ops(operands());
1946	return new VPBlendRecipe (cast<PHINode>(Val: getUnderlyingValue()), Ops);
1947	}
1948
1949	VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1950
1951	/// Return the number of incoming values, taking into account that the first
1952	/// incoming value has no mask.
1953	unsigned getNumIncomingValues() const { return (getNumOperands() + `1`) / `2`; }
1954
1955	/// Return incoming value number \p Idx.
1956	VPValue getIncomingValue(unsigned* Idx) const {
1957	return Idx == `0` ? getOperand(N: `0`) : getOperand(N: Idx * `2` - `1`);
1958	}
1959
1960	/// Return mask number \p Idx.
1961	VPValue getMask(unsigned* Idx) const {
1962	assert(Idx > `0` && "First index has no mask associated.");
1963	return getOperand(N: Idx * `2`);
1964	}
1965
1966	/// Generate the phi/select nodes.
1967	void execute(VPTransformState &State) override;
1968
1969	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1970	/// Print the recipe.
1971	void print(raw_ostream &O, const Twine &Indent,
1972	VPSlotTracker &SlotTracker) const override;
1973	#endif
1974
1975	/// Returns true if the recipe only uses the first lane of operand \p Op.
1976	bool onlyFirstLaneUsed(const VPValue Op) const* override {
1977	assert(is_contained(operands(), Op) &&
1978	"Op must be an operand of the recipe");
1979	// Recursing through Blend recipes only, must terminate at header phi's the
1980	// latest.
1981	return all_of(Range: users(),
1982	P: [this](VPUser U) { return* U->onlyFirstLaneUsed(Op: this); });
1983	}
1984	};
1985
1986	/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1987	/// or stores into one wide load/store and shuffles. The first operand of a
1988	/// VPInterleave recipe is the address, followed by the stored values, followed
1989	/// by an optional mask.
1990	class VPInterleaveRecipe : public VPRecipeBase {
1991	const InterleaveGroup<Instruction> *IG;
1992
1993	/// Indicates if the interleave group is in a conditional block and requires a
1994	/// mask.
1995	bool HasMask = false;
1996
1997	/// Indicates if gaps between members of the group need to be masked out or if
1998	/// unusued gaps can be loaded speculatively.
1999	bool NeedsMaskForGaps = false;
2000
2001	public:
2002	VPInterleaveRecipe(const InterleaveGroup<Instruction> IG, VPValue Addr,
2003	ArrayRef<VPValue > StoredValues, VPValue Mask,
2004	bool NeedsMaskForGaps)
2005	: VPRecipeBase (VPDef::VPInterleaveSC, {Addr}), IG(IG),
2006	NeedsMaskForGaps(NeedsMaskForGaps) {
2007	for (unsigned i = `0`; i < IG->getFactor(); ++i)
2008	if (Instruction *I = IG->getMember(Index: i)) {
2009	if (I->getType()->isVoidTy())
2010	continue;
2011	new VPValue (I, this);
2012	}
2013
2014	for (auto *SV : StoredValues)
2015	addOperand(Operand: SV);
2016	if (Mask) {
2017	HasMask = true;
2018	addOperand(Operand: Mask);
2019	}
2020	}
2021	~VPInterleaveRecipe() override = default;
2022
2023	VPInterleaveRecipe *clone() override {
2024	return new VPInterleaveRecipe (IG, getAddr(), getStoredValues(), getMask(),
2025	NeedsMaskForGaps);
2026	}
2027
2028	VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2029
2030	/// Return the address accessed by this recipe.
2031	VPValue getAddr() const* {
2032	return getOperand(N: `0`); // Address is the 1st, mandatory operand.
2033	}
2034
2035	/// Return the mask used by this recipe. Note that a full mask is represented
2036	/// by a nullptr.
2037	VPValue getMask() const* {
2038	// Mask is optional and therefore the last, currently 2nd operand.
2039	return HasMask ? getOperand(N: getNumOperands() - `1`) : nullptr;
2040	}
2041
2042	/// Return the VPValues stored by this interleave group. If it is a load
2043	/// interleave group, return an empty ArrayRef.
2044	ArrayRef<VPValue > getStoredValues() const* {
2045	// The first operand is the address, followed by the stored values, followed
2046	// by an optional mask.
2047	return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2048	.slice(N: `1`, M: getNumStoreOperands());
2049	}
2050
2051	/// Generate the wide load or store, and shuffles.
2052	void execute(VPTransformState &State) override;
2053
2054	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2055	/// Print the recipe.
2056	void print(raw_ostream &O, const Twine &Indent,
2057	VPSlotTracker &SlotTracker) const override;
2058	#endif
2059
2060	const InterleaveGroup<Instruction> getInterleaveGroup() { return* IG; }
2061
2062	/// Returns the number of stored operands of this interleave group. Returns 0
2063	/// for load interleave groups.
2064	unsigned getNumStoreOperands() const {
2065	return getNumOperands() - (HasMask ? `2` : `1`);
2066	}
2067
2068	/// The recipe only uses the first lane of the address.
2069	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2070	assert(is_contained(operands(), Op) &&
2071	"Op must be an operand of the recipe");
2072	return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op);
2073	}
2074	};
2075
2076	/// A recipe to represent inloop reduction operations, performing a reduction on
2077	/// a vector operand into a scalar value, and adding the result to a chain.
2078	/// The Operands are {ChainOp, VecOp, [Condition]}.
2079	class VPReductionRecipe : public VPSingleDefRecipe {
2080	/// The recurrence decriptor for the reduction in question.
2081	const RecurrenceDescriptor &RdxDesc;
2082	bool IsOrdered;
2083
2084	public:
2085	VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
2086	VPValue ChainOp, VPValue VecOp, VPValue *CondOp,
2087	bool IsOrdered)
2088	: VPSingleDefRecipe (VPDef::VPReductionSC,
2089	ArrayRef<VPValue *>({ChainOp, VecOp}), I),
2090	RdxDesc(R), IsOrdered(IsOrdered) {
2091	if (CondOp)
2092	addOperand(Operand: CondOp);
2093	}
2094
2095	~VPReductionRecipe() override = default;
2096
2097	VPReductionRecipe *clone() override {
2098	return new VPReductionRecipe (RdxDesc, getUnderlyingInstr(), getChainOp(),
2099	getVecOp(), getCondOp(), IsOrdered);
2100	}
2101
2102	VP_CLASSOF_IMPL(VPDef::VPReductionSC)
2103
2104	/// Generate the reduction in the loop
2105	void execute(VPTransformState &State) override;
2106
2107	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2108	/// Print the recipe.
2109	void print(raw_ostream &O, const Twine &Indent,
2110	VPSlotTracker &SlotTracker) const override;
2111	#endif
2112
2113	/// The VPValue of the scalar Chain being accumulated.
2114	VPValue getChainOp() const* { return getOperand(N: `0`); }
2115	/// The VPValue of the vector value to be reduced.
2116	VPValue getVecOp() const* { return getOperand(N: `1`); }
2117	/// The VPValue of the condition for the block.
2118	VPValue getCondOp() const* {
2119	return getNumOperands() > `2` ? getOperand(N: `2`) : nullptr;
2120	}
2121	};
2122
2123	/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2124	/// copies of the original scalar type, one per lane, instead of producing a
2125	/// single copy of widened type for all lanes. If the instruction is known to be
2126	/// uniform only one copy, per lane zero, will be generated.
2127	class VPReplicateRecipe : public VPRecipeWithIRFlags {
2128	/// Indicator if only a single replica per lane is needed.
2129	bool IsUniform;
2130
2131	/// Indicator if the replicas are also predicated.
2132	bool IsPredicated;
2133
2134	public:
2135	template <typename IterT>
2136	VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
2137	bool IsUniform, VPValue Mask = nullptr*)
2138	: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2139	IsUniform(IsUniform), IsPredicated(Mask) {
2140	if (Mask)
2141	addOperand(Operand: Mask);
2142	}
2143
2144	~VPReplicateRecipe() override = default;
2145
2146	VPReplicateRecipe *clone() override {
2147	auto *Copy =
2148	new VPReplicateRecipe (getUnderlyingInstr(), operands(), IsUniform,
2149	isPredicated() ? getMask() : nullptr);
2150	Copy->transferFlags(Other&: *this);
2151	return Copy;
2152	}
2153
2154	VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2155
2156	/// Generate replicas of the desired Ingredient. Replicas will be generated
2157	/// for all parts and lanes unless a specific part and lane are specified in
2158	/// the \p State.
2159	void execute(VPTransformState &State) override;
2160
2161	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2162	/// Print the recipe.
2163	void print(raw_ostream &O, const Twine &Indent,
2164	VPSlotTracker &SlotTracker) const override;
2165	#endif
2166
2167	bool isUniform() const { return IsUniform; }
2168
2169	bool isPredicated() const { return IsPredicated; }
2170
2171	/// Returns true if the recipe only uses the first lane of operand \p Op.
2172	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2173	assert(is_contained(operands(), Op) &&
2174	"Op must be an operand of the recipe");
2175	return isUniform();
2176	}
2177
2178	/// Returns true if the recipe uses scalars of operand \p Op.
2179	bool usesScalars(const VPValue Op) const* override {
2180	assert(is_contained(operands(), Op) &&
2181	"Op must be an operand of the recipe");
2182	return true;
2183	}
2184
2185	/// Returns true if the recipe is used by a widened recipe via an intervening
2186	/// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2187	/// in a vector.
2188	bool shouldPack() const;
2189
2190	/// Return the mask of a predicated VPReplicateRecipe.
2191	VPValue *getMask() {
2192	assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2193	return getOperand(N: getNumOperands() - `1`);
2194	}
2195
2196	unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2197	};
2198
2199	/// A recipe for generating conditional branches on the bits of a mask.
2200	class VPBranchOnMaskRecipe : public VPRecipeBase {
2201	public:
2202	VPBranchOnMaskRecipe(VPValue *BlockInMask)
2203	: VPRecipeBase (VPDef::VPBranchOnMaskSC, {}) {
2204	if (BlockInMask) // nullptr means all-one mask.
2205	addOperand(Operand: BlockInMask);
2206	}
2207
2208	VPBranchOnMaskRecipe *clone() override {
2209	return new VPBranchOnMaskRecipe (getOperand(N: `0`));
2210	}
2211
2212	VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2213
2214	/// Generate the extraction of the appropriate bit from the block mask and the
2215	/// conditional branch.
2216	void execute(VPTransformState &State) override;
2217
2218	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2219	/// Print the recipe.
2220	void print(raw_ostream &O, const Twine &Indent,
2221	VPSlotTracker &SlotTracker) const override {
2222	O << Indent << "BRANCH-ON-MASK ";
2223	if (VPValue *Mask = getMask())
2224	Mask->printAsOperand(OS&: O, Tracker&: SlotTracker);
2225	else
2226	O << " All-One";
2227	}
2228	#endif
2229
2230	/// Return the mask used by this recipe. Note that a full mask is represented
2231	/// by a nullptr.
2232	VPValue getMask() const* {
2233	assert(getNumOperands() <= `1` && "should have either 0 or 1 operands");
2234	// Mask is optional.
2235	return getNumOperands() == `1` ? getOperand(N: `0`) : nullptr;
2236	}
2237
2238	/// Returns true if the recipe uses scalars of operand \p Op.
2239	bool usesScalars(const VPValue Op) const* override {
2240	assert(is_contained(operands(), Op) &&
2241	"Op must be an operand of the recipe");
2242	return true;
2243	}
2244	};
2245
2246	/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2247	/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2248	/// order to merge values that are set under such a branch and feed their uses.
2249	/// The phi nodes can be scalar or vector depending on the users of the value.
2250	/// This recipe works in concert with VPBranchOnMaskRecipe.
2251	class VPPredInstPHIRecipe : public VPSingleDefRecipe {
2252	public:
2253	/// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2254	/// nodes after merging back from a Branch-on-Mask.
2255	VPPredInstPHIRecipe(VPValue *PredV)
2256	: VPSingleDefRecipe (VPDef::VPPredInstPHISC, PredV) {}
2257	~VPPredInstPHIRecipe() override = default;
2258
2259	VPPredInstPHIRecipe *clone() override {
2260	return new VPPredInstPHIRecipe (getOperand(N: `0`));
2261	}
2262
2263	VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2264
2265	/// Generates phi nodes for live-outs as needed to retain SSA form.
2266	void execute(VPTransformState &State) override;
2267
2268	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2269	/// Print the recipe.
2270	void print(raw_ostream &O, const Twine &Indent,
2271	VPSlotTracker &SlotTracker) const override;
2272	#endif
2273
2274	/// Returns true if the recipe uses scalars of operand \p Op.
2275	bool usesScalars(const VPValue Op) const* override {
2276	assert(is_contained(operands(), Op) &&
2277	"Op must be an operand of the recipe");
2278	return true;
2279	}
2280	};
2281
2282	/// A common base class for widening memory operations. An optional mask can be
2283	/// provided as the last operand.
2284	class VPWidenMemoryRecipe : public VPRecipeBase {
2285	protected:
2286	Instruction &Ingredient;
2287
2288	/// Whether the accessed addresses are consecutive.
2289	bool Consecutive;
2290
2291	/// Whether the consecutive accessed addresses are in reverse order.
2292	bool Reverse;
2293
2294	/// Whether the memory access is masked.
2295	bool IsMasked = false;
2296
2297	void setMask(VPValue *Mask) {
2298	assert(!IsMasked && "cannot re-set mask");
2299	if (!Mask)
2300	return;
2301	addOperand(Operand: Mask);
2302	IsMasked = true;
2303	}
2304
2305	VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2306	std::initializer_list<VPValue *> Operands,
2307	bool Consecutive, bool Reverse, DebugLoc DL)
2308	: VPRecipeBase (SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2309	Reverse(Reverse) {
2310	assert((Consecutive \|\| !Reverse) && "Reverse implies consecutive");
2311	}
2312
2313	public:
2314	VPWidenMemoryRecipe *clone() override {
2315	llvm_unreachable("cloning not supported");
2316	}
2317
2318	static inline bool classof(const VPRecipeBase *R) {
2319	return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC \|\|
2320	R->getVPDefID() == VPRecipeBase::VPWidenStoreSC \|\|
2321	R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC \|\|
2322	R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2323	}
2324
2325	static inline bool classof(const VPUser *U) {
2326	auto *R = dyn_cast<VPRecipeBase>(Val: U);
2327	return R && classof(R);
2328	}
2329
2330	/// Return whether the loaded-from / stored-to addresses are consecutive.
2331	bool isConsecutive() const { return Consecutive; }
2332
2333	/// Return whether the consecutive loaded/stored addresses are in reverse
2334	/// order.
2335	bool isReverse() const { return Reverse; }
2336
2337	/// Return the address accessed by this recipe.
2338	VPValue getAddr() const* { return getOperand(N: `0`); }
2339
2340	/// Returns true if the recipe is masked.
2341	bool isMasked() const { return IsMasked; }
2342
2343	/// Return the mask used by this recipe. Note that a full mask is represented
2344	/// by a nullptr.
2345	VPValue getMask() const* {
2346	// Mask is optional and therefore the last operand.
2347	return isMasked() ? getOperand(N: getNumOperands() - `1`) : nullptr;
2348	}
2349
2350	/// Generate the wide load/store.
2351	void execute(VPTransformState &State) override {
2352	llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2353	}
2354
2355	Instruction &getIngredient() const { return Ingredient; }
2356	};
2357
2358	/// A recipe for widening load operations, using the address to load from and an
2359	/// optional mask.
2360	struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2361	VPWidenLoadRecipe(LoadInst &Load, VPValue Addr, VPValue Mask,
2362	bool Consecutive, bool Reverse, DebugLoc DL)
2363	: VPWidenMemoryRecipe (VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2364	Reverse, DL),
2365	VPValue (this, &Load) {
2366	setMask(Mask);
2367	}
2368
2369	VPWidenLoadRecipe *clone() override {
2370	return new VPWidenLoadRecipe (cast<LoadInst>(Val&: Ingredient), getAddr(),
2371	getMask(), Consecutive, Reverse,
2372	getDebugLoc());
2373	}
2374
2375	VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2376
2377	/// Generate a wide load or gather.
2378	void execute(VPTransformState &State) override;
2379
2380	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2381	/// Print the recipe.
2382	void print(raw_ostream &O, const Twine &Indent,
2383	VPSlotTracker &SlotTracker) const override;
2384	#endif
2385
2386	/// Returns true if the recipe only uses the first lane of operand \p Op.
2387	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2388	assert(is_contained(operands(), Op) &&
2389	"Op must be an operand of the recipe");
2390	// Widened, consecutive loads operations only demand the first lane of
2391	// their address.
2392	return Op == getAddr() && isConsecutive();
2393	}
2394	};
2395
2396	/// A recipe for widening load operations with vector-predication intrinsics,
2397	/// using the address to load from, the explicit vector length and an optional
2398	/// mask.
2399	struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2400	VPWidenLoadEVLRecipe(VPWidenLoadRecipe L, VPValue EVL, VPValue *Mask)
2401	: VPWidenMemoryRecipe (VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2402	{L->getAddr(), EVL}, L->isConsecutive(), false,
2403	L->getDebugLoc()),
2404	VPValue (this, &getIngredient()) {
2405	setMask(Mask);
2406	}
2407
2408	VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2409
2410	/// Return the EVL operand.
2411	VPValue getEVL() const* { return getOperand(N: `1`); }
2412
2413	/// Generate the wide load or gather.
2414	void execute(VPTransformState &State) override;
2415
2416	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2417	/// Print the recipe.
2418	void print(raw_ostream &O, const Twine &Indent,
2419	VPSlotTracker &SlotTracker) const override;
2420	#endif
2421
2422	/// Returns true if the recipe only uses the first lane of operand \p Op.
2423	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2424	assert(is_contained(operands(), Op) &&
2425	"Op must be an operand of the recipe");
2426	// Widened loads only demand the first lane of EVL and consecutive loads
2427	// only demand the first lane of their address.
2428	return Op == getEVL() \|\| (Op == getAddr() && isConsecutive());
2429	}
2430	};
2431
2432	/// A recipe for widening store operations, using the stored value, the address
2433	/// to store to and an optional mask.
2434	struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
2435	VPWidenStoreRecipe(StoreInst &Store, VPValue Addr, VPValue StoredVal,
2436	VPValue Mask, bool* Consecutive, bool Reverse, DebugLoc DL)
2437	: VPWidenMemoryRecipe (VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2438	Consecutive, Reverse, DL) {
2439	setMask(Mask);
2440	}
2441
2442	VPWidenStoreRecipe *clone() override {
2443	return new VPWidenStoreRecipe (cast<StoreInst>(Val&: Ingredient), getAddr(),
2444	getStoredValue(), getMask(), Consecutive,
2445	Reverse, getDebugLoc());
2446	}
2447
2448	VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2449
2450	/// Return the value stored by this recipe.
2451	VPValue getStoredValue() const* { return getOperand(N: `1`); }
2452
2453	/// Generate a wide store or scatter.
2454	void execute(VPTransformState &State) override;
2455
2456	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2457	/// Print the recipe.
2458	void print(raw_ostream &O, const Twine &Indent,
2459	VPSlotTracker &SlotTracker) const override;
2460	#endif
2461
2462	/// Returns true if the recipe only uses the first lane of operand \p Op.
2463	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2464	assert(is_contained(operands(), Op) &&
2465	"Op must be an operand of the recipe");
2466	// Widened, consecutive stores only demand the first lane of their address,
2467	// unless the same operand is also stored.
2468	return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2469	}
2470	};
2471
2472	/// A recipe for widening store operations with vector-predication intrinsics,
2473	/// using the value to store, the address to store to, the explicit vector
2474	/// length and an optional mask.
2475	struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
2476	VPWidenStoreEVLRecipe(VPWidenStoreRecipe S, VPValue EVL, VPValue *Mask)
2477	: VPWidenMemoryRecipe (VPDef::VPWidenStoreEVLSC, S->getIngredient(),
2478	{S->getAddr(), S->getStoredValue(), EVL},
2479	S->isConsecutive(), false, S->getDebugLoc()) {
2480	setMask(Mask);
2481	}
2482
2483	VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2484
2485	/// Return the address accessed by this recipe.
2486	VPValue getStoredValue() const* { return getOperand(N: `1`); }
2487
2488	/// Return the EVL operand.
2489	VPValue getEVL() const* { return getOperand(N: `2`); }
2490
2491	/// Generate the wide store or scatter.
2492	void execute(VPTransformState &State) override;
2493
2494	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2495	/// Print the recipe.
2496	void print(raw_ostream &O, const Twine &Indent,
2497	VPSlotTracker &SlotTracker) const override;
2498	#endif
2499
2500	/// Returns true if the recipe only uses the first lane of operand \p Op.
2501	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2502	assert(is_contained(operands(), Op) &&
2503	"Op must be an operand of the recipe");
2504	if (Op == getEVL()) {
2505	assert(getStoredValue() != Op && "unexpected store of EVL");
2506	return true;
2507	}
2508	// Widened, consecutive memory operations only demand the first lane of
2509	// their address, unless the same operand is also stored. That latter can
2510	// happen with opaque pointers.
2511	return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2512	}
2513	};
2514
2515	/// Recipe to expand a SCEV expression.
2516	class VPExpandSCEVRecipe : public VPSingleDefRecipe {
2517	const SCEV *Expr;
2518	ScalarEvolution &SE;
2519
2520	public:
2521	VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
2522	: VPSingleDefRecipe (VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2523
2524	~VPExpandSCEVRecipe() override = default;
2525
2526	VPExpandSCEVRecipe *clone() override {
2527	return new VPExpandSCEVRecipe (Expr, SE);
2528	}
2529
2530	VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2531
2532	/// Generate a canonical vector induction variable of the vector loop, with
2533	void execute(VPTransformState &State) override;
2534
2535	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2536	/// Print the recipe.
2537	void print(raw_ostream &O, const Twine &Indent,
2538	VPSlotTracker &SlotTracker) const override;
2539	#endif
2540
2541	const SCEV getSCEV() const* { return Expr; }
2542	};
2543
2544	/// Canonical scalar induction phi of the vector loop. Starting at the specified
2545	/// start value (either 0 or the resume value when vectorizing the epilogue
2546	/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2547	/// canonical induction variable.
2548	class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
2549	public:
2550	VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
2551	: VPHeaderPHIRecipe (VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2552
2553	~VPCanonicalIVPHIRecipe() override = default;
2554
2555	VPCanonicalIVPHIRecipe *clone() override {
2556	auto R = new* VPCanonicalIVPHIRecipe (getOperand(N: `0`), getDebugLoc());
2557	R->addOperand(Operand: getBackedgeValue());
2558	return R;
2559	}
2560
2561	VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2562
2563	static inline bool classof(const VPHeaderPHIRecipe *D) {
2564	return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2565	}
2566
2567	/// Generate the canonical scalar induction phi of the vector loop.
2568	void execute(VPTransformState &State) override;
2569
2570	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2571	/// Print the recipe.
2572	void print(raw_ostream &O, const Twine &Indent,
2573	VPSlotTracker &SlotTracker) const override;
2574	#endif
2575
2576	/// Returns the scalar type of the induction.
2577	Type getScalarType() const* {
2578	return getStartValue()->getLiveInIRValue()->getType();
2579	}
2580
2581	/// Returns true if the recipe only uses the first lane of operand \p Op.
2582	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2583	assert(is_contained(operands(), Op) &&
2584	"Op must be an operand of the recipe");
2585	return true;
2586	}
2587
2588	/// Returns true if the recipe only uses the first part of operand \p Op.
2589	bool onlyFirstPartUsed(const VPValue Op) const* override {
2590	assert(is_contained(operands(), Op) &&
2591	"Op must be an operand of the recipe");
2592	return true;
2593	}
2594
2595	/// Check if the induction described by \p Kind, /p Start and \p Step is
2596	/// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2597	bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
2598	VPValue Step) const*;
2599	};
2600
2601	/// A recipe for generating the active lane mask for the vector loop that is
2602	/// used to predicate the vector operations.
2603	/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2604	/// remove VPActiveLaneMaskPHIRecipe.
2605	class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
2606	public:
2607	VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
2608	: VPHeaderPHIRecipe (VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2609	DL) {}
2610
2611	~VPActiveLaneMaskPHIRecipe() override = default;
2612
2613	VPActiveLaneMaskPHIRecipe *clone() override {
2614	return new VPActiveLaneMaskPHIRecipe (getOperand(N: `0`), getDebugLoc());
2615	}
2616
2617	VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2618
2619	static inline bool classof(const VPHeaderPHIRecipe *D) {
2620	return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2621	}
2622
2623	/// Generate the active lane mask phi of the vector loop.
2624	void execute(VPTransformState &State) override;
2625
2626	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2627	/// Print the recipe.
2628	void print(raw_ostream &O, const Twine &Indent,
2629	VPSlotTracker &SlotTracker) const override;
2630	#endif
2631	};
2632
2633	/// A recipe for generating the phi node for the current index of elements,
2634	/// adjusted in accordance with EVL value. It starts at the start value of the
2635	/// canonical induction and gets incremented by EVL in each iteration of the
2636	/// vector loop.
2637	class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
2638	public:
2639	VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
2640	: VPHeaderPHIRecipe (VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2641
2642	~VPEVLBasedIVPHIRecipe() override = default;
2643
2644	VPEVLBasedIVPHIRecipe *clone() override {
2645	llvm_unreachable("cloning not implemented yet");
2646	}
2647
2648	VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2649
2650	static inline bool classof(const VPHeaderPHIRecipe *D) {
2651	return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2652	}
2653
2654	/// Generate phi for handling IV based on EVL over iterations correctly.
2655	/// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2656	void execute(VPTransformState &State) override;
2657
2658	/// Returns true if the recipe only uses the first lane of operand \p Op.
2659	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2660	assert(is_contained(operands(), Op) &&
2661	"Op must be an operand of the recipe");
2662	return true;
2663	}
2664
2665	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2666	/// Print the recipe.
2667	void print(raw_ostream &O, const Twine &Indent,
2668	VPSlotTracker &SlotTracker) const override;
2669	#endif
2670	};
2671
2672	/// A Recipe for widening the canonical induction variable of the vector loop.
2673	class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
2674	public:
2675	VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
2676	: VPSingleDefRecipe (VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2677
2678	~VPWidenCanonicalIVRecipe() override = default;
2679
2680	VPWidenCanonicalIVRecipe *clone() override {
2681	return new VPWidenCanonicalIVRecipe (
2682	cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: `0`)));
2683	}
2684
2685	VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2686
2687	/// Generate a canonical vector induction variable of the vector loop, with
2688	/// start = {<PartVF, PartVF+1, ..., PartVF+VF-1> for 0 <= Part < UF}, and*
2689	/// step = <VFUF, VFUF, ..., VFUF>.*
2690	void execute(VPTransformState &State) override;
2691
2692	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2693	/// Print the recipe.
2694	void print(raw_ostream &O, const Twine &Indent,
2695	VPSlotTracker &SlotTracker) const override;
2696	#endif
2697
2698	/// Returns the scalar type of the induction.
2699	const Type getScalarType() const* {
2700	return cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: `0`)->getDefiningRecipe())
2701	->getScalarType();
2702	}
2703	};
2704
2705	/// A recipe for converting the input value \p IV value to the corresponding
2706	/// value of an IV with different start and step values, using Start + IV *
2707	/// Step.
2708	class VPDerivedIVRecipe : public VPSingleDefRecipe {
2709	/// Kind of the induction.
2710	const InductionDescriptor::InductionKind Kind;
2711	/// If not nullptr, the floating point induction binary operator. Must be set
2712	/// for floating point inductions.
2713	const FPMathOperator *FPBinOp;
2714
2715	public:
2716	VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
2717	VPCanonicalIVPHIRecipe CanonicalIV, VPValue Step)
2718	: VPDerivedIVRecipe (
2719	IndDesc.getKind(),
2720	dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()),
2721	Start, CanonicalIV, Step) {}
2722
2723	VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
2724	const FPMathOperator FPBinOp, VPValue Start, VPValue *IV,
2725	VPValue *Step)
2726	: VPSingleDefRecipe (VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2727	FPBinOp(FPBinOp) {}
2728
2729	~VPDerivedIVRecipe() override = default;
2730
2731	VPDerivedIVRecipe *clone() override {
2732	return new VPDerivedIVRecipe (Kind, FPBinOp, getStartValue(), getOperand(N: `1`),
2733	getStepValue());
2734	}
2735
2736	VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2737
2738	/// Generate the transformed value of the induction at offset StartValue (1.
2739	/// operand) + IV (2. operand) StepValue (3, operand).*
2740	void execute(VPTransformState &State) override;
2741
2742	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2743	/// Print the recipe.
2744	void print(raw_ostream &O, const Twine &Indent,
2745	VPSlotTracker &SlotTracker) const override;
2746	#endif
2747
2748	Type getScalarType() const* {
2749	return getStartValue()->getLiveInIRValue()->getType();
2750	}
2751
2752	VPValue getStartValue() const* { return getOperand(N: `0`); }
2753	VPValue getStepValue() const* { return getOperand(N: `2`); }
2754
2755	/// Returns true if the recipe only uses the first lane of operand \p Op.
2756	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2757	assert(is_contained(operands(), Op) &&
2758	"Op must be an operand of the recipe");
2759	return true;
2760	}
2761	};
2762
2763	/// A recipe for handling phi nodes of integer and floating-point inductions,
2764	/// producing their scalar values.
2765	class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
2766	Instruction::BinaryOps InductionOpcode;
2767
2768	public:
2769	VPScalarIVStepsRecipe(VPValue IV, VPValue Step,
2770	Instruction::BinaryOps Opcode, FastMathFlags FMFs)
2771	: VPRecipeWithIRFlags (VPDef::VPScalarIVStepsSC,
2772	ArrayRef<VPValue *>({IV, Step}), FMFs),
2773	InductionOpcode(Opcode) {}
2774
2775	VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
2776	VPValue *Step)
2777	: VPScalarIVStepsRecipe (
2778	IV, Step, IndDesc.getInductionOpcode(),
2779	dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp())
2780	? IndDesc.getInductionBinOp()->getFastMathFlags()
2781	: FastMathFlags ()) {}
2782
2783	~VPScalarIVStepsRecipe() override = default;
2784
2785	VPScalarIVStepsRecipe *clone() override {
2786	return new VPScalarIVStepsRecipe (
2787	getOperand(N: `0`), getOperand(N: `1`), InductionOpcode,
2788	hasFastMathFlags() ? getFastMathFlags() : FastMathFlags ());
2789	}
2790
2791	VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2792
2793	/// Generate the scalarized versions of the phi node as needed by their users.
2794	void execute(VPTransformState &State) override;
2795
2796	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2797	/// Print the recipe.
2798	void print(raw_ostream &O, const Twine &Indent,
2799	VPSlotTracker &SlotTracker) const override;
2800	#endif
2801
2802	VPValue getStepValue() const* { return getOperand(N: `1`); }
2803
2804	/// Returns true if the recipe only uses the first lane of operand \p Op.
2805	bool onlyFirstLaneUsed(const VPValue Op) const* override {
2806	assert(is_contained(operands(), Op) &&
2807	"Op must be an operand of the recipe");
2808	return true;
2809	}
2810	};
2811
2812	/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2813	/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2814	/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2815	class VPBasicBlock : public VPBlockBase {
2816	public:
2817	using RecipeListTy = iplist<VPRecipeBase>;
2818
2819	private:
2820	/// The VPRecipes held in the order of output instructions to generate.
2821	RecipeListTy Recipes;
2822
2823	public:
2824	VPBasicBlock(const Twine &Name = "", VPRecipeBase Recipe = nullptr*)
2825	: VPBlockBase (VPBasicBlockSC, Name.str()) {
2826	if (Recipe)
2827	appendRecipe(Recipe);
2828	}
2829
2830	~VPBasicBlock() override {
2831	while (!Recipes.empty())
2832	Recipes.pop_back();
2833	}
2834
2835	/// Instruction iterators...
2836	using iterator = RecipeListTy::iterator;
2837	using const_iterator = RecipeListTy::const_iterator;
2838	using reverse_iterator = RecipeListTy::reverse_iterator;
2839	using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
2840
2841	//===--------------------------------------------------------------------===//
2842	/// Recipe iterator methods
2843	///
2844	inline iterator begin() { return Recipes.begin(); }
2845	inline const_iterator begin() const { return Recipes.begin(); }
2846	inline iterator end() { return Recipes.end(); }
2847	inline const_iterator end() const { return Recipes.end(); }
2848
2849	inline reverse_iterator rbegin() { return Recipes.rbegin(); }
2850	inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
2851	inline reverse_iterator rend() { return Recipes.rend(); }
2852	inline const_reverse_iterator rend() const { return Recipes.rend(); }
2853
2854	inline size_t size() const { return Recipes.size(); }
2855	inline bool empty() const { return Recipes.empty(); }
2856	inline const VPRecipeBase &front() const { return Recipes.front(); }
2857	inline VPRecipeBase &front() { return Recipes.front(); }
2858	inline const VPRecipeBase &back() const { return Recipes.back(); }
2859	inline VPRecipeBase &back() { return Recipes.back(); }
2860
2861	/// Returns a reference to the list of recipes.
2862	RecipeListTy &getRecipeList() { return Recipes; }
2863
2864	/// Returns a pointer to a member of the recipe list.
2865	static RecipeListTy VPBasicBlock::getSublistAccess(VPRecipeBase ) {
2866	return &VPBasicBlock::Recipes;
2867	}
2868
2869	/// Method to support type inquiry through isa, cast, and dyn_cast.
2870	static inline bool classof(const VPBlockBase *V) {
2871	return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
2872	}
2873
2874	void insert(VPRecipeBase *Recipe, iterator InsertPt) {
2875	assert(Recipe && "No recipe to append.");
2876	assert(!Recipe->Parent && "Recipe already in VPlan");
2877	Recipe->Parent = this;
2878	Recipes.insert(where: InsertPt, New: Recipe);
2879	}
2880
2881	/// Augment the existing recipes of a VPBasicBlock with an additional
2882	/// \p Recipe as the last recipe.
2883	void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); }
2884
2885	/// The method which generates the output IR instructions that correspond to
2886	/// this VPBasicBlock, thereby "executing" the VPlan.
2887	void execute(VPTransformState *State) override;
2888
2889	/// Return the position of the first non-phi node recipe in the block.
2890	iterator getFirstNonPhi();
2891
2892	/// Returns an iterator range over the PHI-like recipes in the block.
2893	iterator_range<iterator> phis() {
2894	return make_range(x: begin(), y: getFirstNonPhi());
2895	}
2896
2897	void dropAllReferences(VPValue *NewValue) override;
2898
2899	/// Split current block at \p SplitAt by inserting a new block between the
2900	/// current block and its successors and moving all recipes starting at
2901	/// SplitAt to the new block. Returns the new block.
2902	VPBasicBlock *splitAt(iterator SplitAt);
2903
2904	VPRegionBlock *getEnclosingLoopRegion();
2905
2906	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2907	/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2908	/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2909	///
2910	/// Note that the numbering is applied to the whole VPlan, so printing
2911	/// individual blocks is consistent with the whole VPlan printing.
2912	void print(raw_ostream &O, const Twine &Indent,
2913	VPSlotTracker &SlotTracker) const override;
2914	using VPBlockBase::print; // Get the print(raw_stream &O) version.
2915	#endif
2916
2917	/// If the block has multiple successors, return the branch recipe terminating
2918	/// the block. If there are no or only a single successor, return nullptr;
2919	VPRecipeBase *getTerminator();
2920	const VPRecipeBase getTerminator() const*;
2921
2922	/// Returns true if the block is exiting it's parent region.
2923	bool isExiting() const;
2924
2925	/// Clone the current block and it's recipes, without updating the operands of
2926	/// the cloned recipes.
2927	VPBasicBlock *clone() override {
2928	auto NewBlock = new* VPBasicBlock (getName());
2929	for (VPRecipeBase &R : *this)
2930	NewBlock->appendRecipe(Recipe: R.clone());
2931	return NewBlock;
2932	}
2933
2934	private:
2935	/// Create an IR BasicBlock to hold the output instructions generated by this
2936	/// VPBasicBlock, and return it. Update the CFGState accordingly.
2937	BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2938	};
2939
2940	/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2941	/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2942	/// A VPRegionBlock may indicate that its contents are to be replicated several
2943	/// times. This is designed to support predicated scalarization, in which a
2944	/// scalar if-then code structure needs to be generated VF UF times. Having*
2945	/// this replication indicator helps to keep a single model for multiple
2946	/// candidate VF's. The actual replication takes place only once the desired VF
2947	/// and UF have been determined.
2948	class VPRegionBlock : public VPBlockBase {
2949	/// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2950	VPBlockBase *Entry;
2951
2952	/// Hold the Single Exiting block of the SESE region modelled by the
2953	/// VPRegionBlock.
2954	VPBlockBase *Exiting;
2955
2956	/// An indicator whether this region is to generate multiple replicated
2957	/// instances of output IR corresponding to its VPBlockBases.
2958	bool IsReplicator;
2959
2960	public:
2961	VPRegionBlock(VPBlockBase Entry, VPBlockBase Exiting,
2962	const std::string &Name = "", bool IsReplicator = false)
2963	: VPBlockBase (VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2964	IsReplicator(IsReplicator) {
2965	assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2966	assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2967	Entry->setParent(this);
2968	Exiting->setParent(this);
2969	}
2970	VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
2971	: VPBlockBase (VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2972	IsReplicator(IsReplicator) {}
2973
2974	~VPRegionBlock() override {
2975	if (Entry) {
2976	VPValue DummyValue;
2977	Entry->dropAllReferences(NewValue: &DummyValue);
2978	deleteCFG(Entry);
2979	}
2980	}
2981
2982	/// Method to support type inquiry through isa, cast, and dyn_cast.
2983	static inline bool classof(const VPBlockBase *V) {
2984	return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2985	}
2986
2987	const VPBlockBase getEntry() const* { return Entry; }
2988	VPBlockBase getEntry() { return* Entry; }
2989
2990	/// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2991	/// EntryBlock must have no predecessors.
2992	void setEntry(VPBlockBase *EntryBlock) {
2993	assert(EntryBlock->getPredecessors().empty() &&
2994	"Entry block cannot have predecessors.");
2995	Entry = EntryBlock;
2996	EntryBlock->setParent(this);
2997	}
2998
2999	const VPBlockBase getExiting() const* { return Exiting; }
3000	VPBlockBase getExiting() { return* Exiting; }
3001
3002	/// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3003	/// ExitingBlock must have no successors.
3004	void setExiting(VPBlockBase *ExitingBlock) {
3005	assert(ExitingBlock->getSuccessors().empty() &&
3006	"Exit block cannot have successors.");
3007	Exiting = ExitingBlock;
3008	ExitingBlock->setParent(this);
3009	}
3010
3011	/// Returns the pre-header VPBasicBlock of the loop region.
3012	VPBasicBlock *getPreheaderVPBB() {
3013	assert(!isReplicator() && "should only get pre-header of loop regions");
3014	return getSinglePredecessor()->getExitingBasicBlock();
3015	}
3016
3017	/// An indicator whether this region is to generate multiple replicated
3018	/// instances of output IR corresponding to its VPBlockBases.
3019	bool isReplicator() const { return IsReplicator; }
3020
3021	/// The method which generates the output IR instructions that correspond to
3022	/// this VPRegionBlock, thereby "executing" the VPlan.
3023	void execute(VPTransformState *State) override;
3024
3025	void dropAllReferences(VPValue *NewValue) override;
3026
3027	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3028	/// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3029	/// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3030	/// consequtive numbers.
3031	///
3032	/// Note that the numbering is applied to the whole VPlan, so printing
3033	/// individual regions is consistent with the whole VPlan printing.
3034	void print(raw_ostream &O, const Twine &Indent,
3035	VPSlotTracker &SlotTracker) const override;
3036	using VPBlockBase::print; // Get the print(raw_stream &O) version.
3037	#endif
3038
3039	/// Clone all blocks in the single-entry single-exit region of the block and
3040	/// their recipes without updating the operands of the cloned recipes.
3041	VPRegionBlock *clone() override;
3042	};
3043
3044	/// VPlan models a candidate for vectorization, encoding various decisions take
3045	/// to produce efficient output IR, including which branches, basic-blocks and
3046	/// output IR instructions to generate, and their cost. VPlan holds a
3047	/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3048	/// VPBasicBlock.
3049	class VPlan {
3050	friend class VPlanPrinter;
3051	friend class VPSlotTracker;
3052
3053	/// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3054	/// preheader of the vector loop.
3055	VPBasicBlock *Entry;
3056
3057	/// VPBasicBlock corresponding to the original preheader. Used to place
3058	/// VPExpandSCEV recipes for expressions used during skeleton creation and the
3059	/// rest of VPlan execution.
3060	VPBasicBlock *Preheader;
3061
3062	/// Holds the VFs applicable to this VPlan.
3063	SmallSetVector<ElementCount, `2`> VFs;
3064
3065	/// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3066	/// any UF.
3067	SmallSetVector<unsigned, `2`> UFs;
3068
3069	/// Holds the name of the VPlan, for printing.
3070	std::string Name;
3071
3072	/// Represents the trip count of the original loop, for folding
3073	/// the tail.
3074	VPValue TripCount = nullptr*;
3075
3076	/// Represents the backedge taken count of the original loop, for folding
3077	/// the tail. It equals TripCount - 1.
3078	VPValue BackedgeTakenCount = nullptr*;
3079
3080	/// Represents the vector trip count.
3081	VPValue VectorTripCount;
3082
3083	/// Represents the loop-invariant VF UF of the vector loop region.*
3084	VPValue VFxUF;
3085
3086	/// Holds a mapping between Values and their corresponding VPValue inside
3087	/// VPlan.
3088	Value2VPValueTy Value2VPValue;
3089
3090	/// Contains all the external definitions created for this VPlan. External
3091	/// definitions are VPValues that hold a pointer to their underlying IR.
3092	SmallVector<VPValue *, `16`> VPLiveInsToFree;
3093
3094	/// Values used outside the plan.
3095	MapVector<PHINode , VPLiveOut > LiveOuts;
3096
3097	/// Mapping from SCEVs to the VPValues representing their expansions.
3098	/// NOTE: This mapping is temporary and will be removed once all users have
3099	/// been modeled in VPlan directly.
3100	DenseMap<const SCEV , VPValue > SCEVToExpansion;
3101
3102	public:
3103	/// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3104	/// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3105	/// be disconnected, as the bypass blocks between them are not yet modeled in
3106	/// VPlan.
3107	VPlan(VPBasicBlock Preheader, VPValue TC, VPBasicBlock *Entry)
3108	: VPlan (Preheader, Entry) {
3109	TripCount = TC;
3110	}
3111
3112	/// Construct a VPlan with original preheader \p Preheader and \p Entry to
3113	/// the plan. At the moment, \p Preheader and \p Entry need to be
3114	/// disconnected, as the bypass blocks between them are not yet modeled in
3115	/// VPlan.
3116	VPlan(VPBasicBlock Preheader, VPBasicBlock Entry)
3117	: Entry(Entry), Preheader(Preheader) {
3118	Entry->setPlan(this);
3119	Preheader->setPlan(this);
3120	assert(Preheader->getNumSuccessors() == `0` &&
3121	Preheader->getNumPredecessors() == `0` &&
3122	"preheader must be disconnected");
3123	}
3124
3125	~VPlan();
3126
3127	/// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
3128	/// original scalar pre-header) which contains SCEV expansions that need to
3129	/// happen before the CFG is modified; a VPBasicBlock for the vector
3130	/// pre-header, followed by a region for the vector loop, followed by the
3131	/// middle VPBasicBlock.
3132	static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3133	ScalarEvolution &PSE);
3134
3135	/// Prepare the plan for execution, setting up the required live-in values.
3136	void prepareToExecute(Value TripCount, Value VectorTripCount,
3137	Value *CanonicalIVStartValue, VPTransformState &State);
3138
3139	/// Generate the IR code for this VPlan.
3140	void execute(VPTransformState *State);
3141
3142	VPBasicBlock getEntry() { return* Entry; }
3143	const VPBasicBlock getEntry() const* { return Entry; }
3144
3145	/// The trip count of the original loop.
3146	VPValue getTripCount() const* {
3147	assert(TripCount && "trip count needs to be set before accessing it");
3148	return TripCount;
3149	}
3150
3151	/// Resets the trip count for the VPlan. The caller must make sure all uses of
3152	/// the original trip count have been replaced.
3153	void resetTripCount(VPValue *NewTripCount) {
3154	assert(TripCount && NewTripCount && TripCount->getNumUsers() == `0` &&
3155	"TripCount always must be set");
3156	TripCount = NewTripCount;
3157	}
3158
3159	/// The backedge taken count of the original loop.
3160	VPValue *getOrCreateBackedgeTakenCount() {
3161	if (!BackedgeTakenCount)
3162	BackedgeTakenCount = new VPValue ();
3163	return BackedgeTakenCount;
3164	}
3165
3166	/// The vector trip count.
3167	VPValue &getVectorTripCount() { return VectorTripCount; }
3168
3169	/// Returns VF UF of the vector loop region.*
3170	VPValue &getVFxUF() { return VFxUF; }
3171
3172	void addVF(ElementCount VF) { VFs.insert(X: VF); }
3173
3174	void setVF(ElementCount VF) {
3175	assert(hasVF(VF) && "Cannot set VF not already in plan");
3176	VFs.clear();
3177	VFs.insert(X: VF);
3178	}
3179
3180	bool hasVF(ElementCount VF) { return VFs.count(key: VF); }
3181	bool hasScalableVF() {
3182	return any_of(Range&: VFs, P: [](ElementCount VF) { return VF.isScalable(); });
3183	}
3184
3185	bool hasScalarVFOnly() const { return VFs.size() == `1` && VFs [`0`].isScalar(); }
3186
3187	bool hasUF(unsigned UF) const { return UFs.empty() \|\| UFs.contains(key: UF); }
3188
3189	void setUF(unsigned UF) {
3190	assert(hasUF(UF) && "Cannot set the UF not already in plan");
3191	UFs.clear();
3192	UFs.insert(X: UF);
3193	}
3194
3195	/// Return a string with the name of the plan and the applicable VFs and UFs.
3196	std::string getName() const;
3197
3198	void setName(const Twine &newName) { Name = newName.str(); }
3199
3200	/// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3201	/// yet) for \p V.
3202	VPValue getOrAddLiveIn(Value V) {
3203	assert(V && "Trying to get or add the VPValue of a null Value");
3204	if (!Value2VPValue.count(Val: V)) {
3205	VPValue VPV = new* VPValue (V);
3206	VPLiveInsToFree.push_back(Elt: VPV);
3207	assert(VPV->isLiveIn() && "VPV must be a live-in.");
3208	assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3209	Value2VPValue [V] = VPV;
3210	}
3211
3212	assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3213	assert(Value2VPValue[V]->isLiveIn() &&
3214	"Only live-ins should be in mapping");
3215	return Value2VPValue [V];
3216	}
3217
3218	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3219	/// Print the live-ins of this VPlan to \p O.
3220	void printLiveIns(raw_ostream &O) const;
3221
3222	/// Print this VPlan to \p O.
3223	void print(raw_ostream &O) const;
3224
3225	/// Print this VPlan in DOT format to \p O.
3226	void printDOT(raw_ostream &O) const;
3227
3228	/// Dump the plan to stderr (for debugging).
3229	LLVM_DUMP_METHOD void dump() const;
3230	#endif
3231
3232	/// Returns the VPRegionBlock of the vector loop.
3233	VPRegionBlock *getVectorLoopRegion() {
3234	return cast<VPRegionBlock>(Val: getEntry()->getSingleSuccessor());
3235	}
3236	const VPRegionBlock getVectorLoopRegion() const* {
3237	return cast<VPRegionBlock>(Val: getEntry()->getSingleSuccessor());
3238	}
3239
3240	/// Returns the canonical induction recipe of the vector loop.
3241	VPCanonicalIVPHIRecipe *getCanonicalIV() {
3242	VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
3243	if (EntryVPBB->empty()) {
3244	// VPlan native path.
3245	EntryVPBB = cast<VPBasicBlock>(Val: EntryVPBB->getSingleSuccessor());
3246	}
3247	return cast<VPCanonicalIVPHIRecipe>(Val: &*EntryVPBB->begin());
3248	}
3249
3250	void addLiveOut(PHINode PN, VPValue V);
3251
3252	void removeLiveOut(PHINode *PN) {
3253	delete LiveOuts [PN];
3254	LiveOuts.erase(Key: PN);
3255	}
3256
3257	const MapVector<PHINode , VPLiveOut > &getLiveOuts() const {
3258	return LiveOuts;
3259	}
3260
3261	VPValue getSCEVExpansion(const* SCEV S) const* {
3262	return SCEVToExpansion.lookup(Val: S);
3263	}
3264
3265	void addSCEVExpansion(const SCEV S, VPValue V) {
3266	assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3267	SCEVToExpansion [S] = V;
3268	}
3269
3270	/// \return The block corresponding to the original preheader.
3271	VPBasicBlock getPreheader() { return* Preheader; }
3272	const VPBasicBlock getPreheader() const* { return Preheader; }
3273
3274	/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3275	/// recipes to refer to the clones, and return it.
3276	VPlan *duplicate();
3277
3278	private:
3279	/// Add to the given dominator tree the header block and every new basic block
3280	/// that was created between it and the latch block, inclusive.
3281	static void updateDominatorTree(DominatorTree DT, BasicBlock LoopLatchBB,
3282	BasicBlock *LoopPreHeaderBB,
3283	BasicBlock *LoopExitBB);
3284	};
3285
3286	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3287	/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3288	/// indented and follows the dot format.
3289	class VPlanPrinter {
3290	raw_ostream &OS;
3291	const VPlan &Plan;
3292	unsigned Depth = `0`;
3293	unsigned TabWidth = `2`;
3294	std::string Indent;
3295	unsigned BID = `0`;
3296	SmallDenseMap<const VPBlockBase , unsigned*> BlockID;
3297
3298	VPSlotTracker SlotTracker;
3299
3300	/// Handle indentation.
3301	void bumpIndent(int b) { Indent = std::string ((Depth += b) * TabWidth, `' '`); }
3302
3303	/// Print a given \p Block of the Plan.
3304	void dumpBlock(const VPBlockBase *Block);
3305
3306	/// Print the information related to the CFG edges going out of a given
3307	/// \p Block, followed by printing the successor blocks themselves.
3308	void dumpEdges(const VPBlockBase *Block);
3309
3310	/// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3311	/// its successor blocks.
3312	void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3313
3314	/// Print a given \p Region of the Plan.
3315	void dumpRegion(const VPRegionBlock *Region);
3316
3317	unsigned getOrCreateBID(const VPBlockBase *Block) {
3318	return BlockID.count(Val: Block) ? BlockID [Block] : BlockID [Block] = BID++;
3319	}
3320
3321	Twine getOrCreateName(const VPBlockBase *Block);
3322
3323	Twine getUID(const VPBlockBase *Block);
3324
3325	/// Print the information related to a CFG edge between two VPBlockBases.
3326	void drawEdge(const VPBlockBase From, const* VPBlockBase To, bool* Hidden,
3327	const Twine &Label);
3328
3329	public:
3330	VPlanPrinter(raw_ostream &O, const VPlan &P)
3331	: OS(O), Plan(P), SlotTracker (&P) {}
3332
3333	LLVM_DUMP_METHOD void dump();
3334	};
3335
3336	struct VPlanIngredient {
3337	const Value *V;
3338
3339	VPlanIngredient(const Value *V) : V(V) {}
3340
3341	void print(raw_ostream &O) const;
3342	};
3343
3344	inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
3345	I.print(O&: OS);
3346	return OS;
3347	}
3348
3349	inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
3350	Plan.print(O&: OS);
3351	return OS;
3352	}
3353	#endif
3354
3355	//===----------------------------------------------------------------------===//
3356	// VPlan Utilities
3357	//===----------------------------------------------------------------------===//
3358
3359	/// Class that provides utilities for VPBlockBases in VPlan.
3360	class VPBlockUtils {
3361	public:
3362	VPBlockUtils() = delete;
3363
3364	/// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3365	/// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3366	/// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3367	/// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3368	/// have neither successors nor predecessors.
3369	static void insertBlockAfter(VPBlockBase NewBlock, VPBlockBase BlockPtr) {
3370	assert(NewBlock->getSuccessors().empty() &&
3371	NewBlock->getPredecessors().empty() &&
3372	"Can't insert new block with predecessors or successors.");
3373	NewBlock->setParent(BlockPtr->getParent());
3374	SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3375	for (VPBlockBase *Succ : Succs) {
3376	disconnectBlocks(From: BlockPtr, To: Succ);
3377	connectBlocks(From: NewBlock, To: Succ);
3378	}
3379	connectBlocks(From: BlockPtr, To: NewBlock);
3380	}
3381
3382	/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3383	/// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3384	/// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3385	/// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3386	/// and \p IfTrue and \p IfFalse must have neither successors nor
3387	/// predecessors.
3388	static void insertTwoBlocksAfter(VPBlockBase IfTrue, VPBlockBase IfFalse,
3389	VPBlockBase *BlockPtr) {
3390	assert(IfTrue->getSuccessors().empty() &&
3391	"Can't insert IfTrue with successors.");
3392	assert(IfFalse->getSuccessors().empty() &&
3393	"Can't insert IfFalse with successors.");
3394	BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3395	IfTrue->setPredecessors({BlockPtr});
3396	IfFalse->setPredecessors({BlockPtr});
3397	IfTrue->setParent(BlockPtr->getParent());
3398	IfFalse->setParent(BlockPtr->getParent());
3399	}
3400
3401	/// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3402	/// the successors of \p From and \p From to the predecessors of \p To. Both
3403	/// VPBlockBases must have the same parent, which can be null. Both
3404	/// VPBlockBases can be already connected to other VPBlockBases.
3405	static void connectBlocks(VPBlockBase From, VPBlockBase To) {
3406	assert((From->getParent() == To->getParent()) &&
3407	"Can't connect two block with different parents");
3408	assert(From->getNumSuccessors() < `2` &&
3409	"Blocks can't have more than two successors.");
3410	From->appendSuccessor(Successor: To);
3411	To->appendPredecessor(Predecessor: From);
3412	}
3413
3414	/// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3415	/// from the successors of \p From and \p From from the predecessors of \p To.
3416	static void disconnectBlocks(VPBlockBase From, VPBlockBase To) {
3417	assert(To && "Successor to disconnect is null.");
3418	From->removeSuccessor(Successor: To);
3419	To->removePredecessor(Predecessor: From);
3420	}
3421
3422	/// Return an iterator range over \p Range which only includes \p BlockTy
3423	/// blocks. The accesses are casted to \p BlockTy.
3424	template <typename BlockTy, typename T>
3425	static auto blocksOnly(const T &Range) {
3426	// Create BaseTy with correct const-ness based on BlockTy.
3427	using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3428	const VPBlockBase, VPBlockBase>;
3429
3430	// We need to first create an iterator range over (const) BlocktTy & instead
3431	// of (const) BlockTy for filter_range to work properly.*
3432	auto Mapped =
3433	map_range(Range, [](BaseTy Block) -> BaseTy & { return* *Block; });
3434	auto Filter = make_filter_range(
3435	Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3436	return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3437	return cast<BlockTy>(&Block);
3438	});
3439	}
3440	};
3441
3442	class VPInterleavedAccessInfo {
3443	DenseMap<VPInstruction , InterleaveGroup<VPInstruction> >
3444	InterleaveGroupMap;
3445
3446	/// Type for mapping of instruction based interleave groups to VPInstruction
3447	/// interleave groups
3448	using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
3449	InterleaveGroup<VPInstruction> *>;
3450
3451	/// Recursively \p Region and populate VPlan based interleave groups based on
3452	/// \p IAI.
3453	void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3454	InterleavedAccessInfo &IAI);
3455	/// Recursively traverse \p Block and populate VPlan based interleave groups
3456	/// based on \p IAI.
3457	void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3458	InterleavedAccessInfo &IAI);
3459
3460	public:
3461	VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
3462
3463	~VPInterleavedAccessInfo() {
3464	SmallPtrSet<InterleaveGroup<VPInstruction> *, `4`> DelSet;
3465	// Avoid releasing a pointer twice.
3466	for (auto &I : InterleaveGroupMap)
3467	DelSet.insert(Ptr: I.second);
3468	for (auto *Ptr : DelSet)
3469	delete Ptr;
3470	}
3471
3472	/// Get the interleave group that \p Instr belongs to.
3473	///
3474	/// \returns nullptr if doesn't have such group.
3475	InterleaveGroup<VPInstruction> *
3476	getInterleaveGroup(VPInstruction Instr) const* {
3477	return InterleaveGroupMap.lookup(Val: Instr);
3478	}
3479	};
3480
3481	/// Class that maps (parts of) an existing VPlan to trees of combined
3482	/// VPInstructions.
3483	class VPlanSlp {
3484	enum class OpMode { Failed, Load, Opcode };
3485
3486	/// A DenseMapInfo implementation for using SmallVector<VPValue , 4> as*
3487	/// DenseMap keys.
3488	struct BundleDenseMapInfo {
3489	static SmallVector<VPValue *, `4`> getEmptyKey() {
3490	return {reinterpret_cast<VPValue *>(-`1`)};
3491	}
3492
3493	static SmallVector<VPValue *, `4`> getTombstoneKey() {
3494	return {reinterpret_cast<VPValue *>(-`2`)};
3495	}
3496
3497	static unsigned getHashValue(const SmallVector<VPValue *, `4`> &V) {
3498	return static_cast<unsigned>(hash_combine_range(first: V.begin(), last: V.end()));
3499	}
3500
3501	static bool isEqual(const SmallVector<VPValue *, `4`> &LHS,
3502	const SmallVector<VPValue *, `4`> &RHS) {
3503	return LHS == RHS;
3504	}
3505	};
3506
3507	/// Mapping of values in the original VPlan to a combined VPInstruction.
3508	DenseMap<SmallVector<VPValue , `4`>, VPInstruction , BundleDenseMapInfo>
3509	BundleToCombined;
3510
3511	VPInterleavedAccessInfo &IAI;
3512
3513	/// Basic block to operate on. For now, only instructions in a single BB are
3514	/// considered.
3515	const VPBasicBlock &BB;
3516
3517	/// Indicates whether we managed to combine all visited instructions or not.
3518	bool CompletelySLP = true;
3519
3520	/// Width of the widest combined bundle in bits.
3521	unsigned WidestBundleBits = `0`;
3522
3523	using MultiNodeOpTy =
3524	typename std::pair<VPInstruction , SmallVector<VPValue , `4`>>;
3525
3526	// Input operand bundles for the current multi node. Each multi node operand
3527	// bundle contains values not matching the multi node's opcode. They will
3528	// be reordered in reorderMultiNodeOps, once we completed building a
3529	// multi node.
3530	SmallVector<MultiNodeOpTy, `4`> MultiNodeOps;
3531
3532	/// Indicates whether we are building a multi node currently.
3533	bool MultiNodeActive = false;
3534
3535	/// Check if we can vectorize Operands together.
3536	bool areVectorizable(ArrayRef<VPValue > Operands) const*;
3537
3538	/// Add combined instruction \p New for the bundle \p Operands.
3539	void addCombined(ArrayRef<VPValue > Operands, VPInstruction New);
3540
3541	/// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3542	VPInstruction *markFailed();
3543
3544	/// Reorder operands in the multi node to maximize sequential memory access
3545	/// and commutative operations.
3546	SmallVector<MultiNodeOpTy, `4`> reorderMultiNodeOps();
3547
3548	/// Choose the best candidate to use for the lane after \p Last. The set of
3549	/// candidates to choose from are values with an opcode matching \p Last's
3550	/// or loads consecutive to \p Last.
3551	std::pair<OpMode, VPValue > getBest(OpMode Mode, VPValue Last,
3552	SmallPtrSetImpl<VPValue *> &Candidates,
3553	VPInterleavedAccessInfo &IAI);
3554
3555	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3556	/// Print bundle \p Values to dbgs().
3557	void dumpBundle(ArrayRef<VPValue *> Values);
3558	#endif
3559
3560	public:
3561	VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3562
3563	~VPlanSlp() = default;
3564
3565	/// Tries to build an SLP tree rooted at \p Operands and returns a
3566	/// VPInstruction combining \p Operands, if they can be combined.
3567	VPInstruction buildGraph(ArrayRef<VPValue > Operands);
3568
3569	/// Return the width of the widest combined bundle in bits.
3570	unsigned getWidestBundleBits() const { return WidestBundleBits; }
3571
3572	/// Return true if all visited instruction can be combined.
3573	bool isCompletelySLP() const { return CompletelySLP; }
3574	};
3575
3576	namespace vputils {
3577
3578	/// Returns true if only the first lane of \p Def is used.
3579	bool onlyFirstLaneUsed(const VPValue *Def);
3580
3581	/// Returns true if only the first part of \p Def is used.
3582	bool onlyFirstPartUsed(const VPValue *Def);
3583
3584	/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3585	/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3586	/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3587	/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3588	/// create a new one.
3589	VPValue getOrCreateVPValueForSCEVExpr(VPlan &Plan, const* SCEV *Expr,
3590	ScalarEvolution &SE);
3591
3592	/// Returns true if \p VPV is uniform after vectorization.
3593	inline bool isUniformAfterVectorization(VPValue *VPV) {
3594	// A value defined outside the vector region must be uniform after
3595	// vectorization inside a vector region.
3596	if (VPV->isDefinedOutsideVectorRegions())
3597	return true;
3598	VPRecipeBase *Def = VPV->getDefiningRecipe();
3599	assert(Def && "Must have definition for value defined inside vector region");
3600	if (auto Rep = dyn_cast<VPReplicateRecipe>(Val: Def))
3601	return Rep->isUniform();
3602	if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Val: Def))
3603	return all_of(Range: GEP->operands(), P: isUniformAfterVectorization);
3604	if (auto *VPI = dyn_cast<VPInstruction>(Val: Def))
3605	return VPI->getOpcode() == VPInstruction::ComputeReductionResult;
3606	return false;
3607	}
3608	} // end namespace vputils
3609
3610	} // end namespace llvm
3611
3612	#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
3613

source code of llvm/lib/Transforms/Vectorize/VPlan.h