1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/SmallPtrSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/Twine.h"
36#include "llvm/ADT/ilist.h"
37#include "llvm/ADT/ilist_node.h"
38#include "llvm/Analysis/IVDescriptors.h"
39#include "llvm/Analysis/LoopInfo.h"
40#include "llvm/Analysis/VectorUtils.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/FMF.h"
43#include "llvm/IR/Operator.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <string>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
53class InnerLoopVectorizer;
54class IRBuilderBase;
55class LoopInfo;
56class raw_ostream;
57class RecurrenceDescriptor;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPRegionBlock;
62class VPlan;
63class VPReplicateRecipe;
64class VPlanSlp;
65class Value;
66class LoopVersioning;
67
68namespace Intrinsic {
69typedef unsigned ID;
70}
71
72/// Returns a calculation for the total number of elements for a given \p VF.
73/// For fixed width vectors this value is a constant, whereas for scalable
74/// vectors it is an expression determined at runtime.
75Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
76
77/// Return a value for Step multiplied by VF.
78Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
79 int64_t Step);
80
81const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
82 Loop *CurLoop = nullptr);
83
84/// A range of powers-of-2 vectorization factors with fixed start and
85/// adjustable end. The range includes start and excludes end, e.g.,:
86/// [1, 16) = {1, 2, 4, 8}
87struct VFRange {
88 // A power of 2.
89 const ElementCount Start;
90
91 // A power of 2. If End <= Start range is empty.
92 ElementCount End;
93
94 bool isEmpty() const {
95 return End.getKnownMinValue() <= Start.getKnownMinValue();
96 }
97
98 VFRange(const ElementCount &Start, const ElementCount &End)
99 : Start(Start), End(End) {
100 assert(Start.isScalable() == End.isScalable() &&
101 "Both Start and End should have the same scalable flag");
102 assert(isPowerOf2_32(Start.getKnownMinValue()) &&
103 "Expected Start to be a power of 2");
104 assert(isPowerOf2_32(End.getKnownMinValue()) &&
105 "Expected End to be a power of 2");
106 }
107
108 /// Iterator to iterate over vectorization factors in a VFRange.
109 class iterator
110 : public iterator_facade_base<iterator, std::forward_iterator_tag,
111 ElementCount> {
112 ElementCount VF;
113
114 public:
115 iterator(ElementCount VF) : VF(VF) {}
116
117 bool operator==(const iterator &Other) const { return VF == Other.VF; }
118
119 ElementCount operator*() const { return VF; }
120
121 iterator &operator++() {
122 VF *= 2;
123 return *this;
124 }
125 };
126
127 iterator begin() { return iterator(Start); }
128 iterator end() {
129 assert(isPowerOf2_32(End.getKnownMinValue()));
130 return iterator(End);
131 }
132};
133
134using VPlanPtr = std::unique_ptr<VPlan>;
135
136/// In what follows, the term "input IR" refers to code that is fed into the
137/// vectorizer whereas the term "output IR" refers to code that is generated by
138/// the vectorizer.
139
140/// VPLane provides a way to access lanes in both fixed width and scalable
141/// vectors, where for the latter the lane index sometimes needs calculating
142/// as a runtime expression.
143class VPLane {
144public:
145 /// Kind describes how to interpret Lane.
146 enum class Kind : uint8_t {
147 /// For First, Lane is the index into the first N elements of a
148 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
149 First,
150 /// For ScalableLast, Lane is the offset from the start of the last
151 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
152 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
153 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
154 ScalableLast
155 };
156
157private:
158 /// in [0..VF)
159 unsigned Lane;
160
161 /// Indicates how the Lane should be interpreted, as described above.
162 Kind LaneKind;
163
164public:
165 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
166
167 static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
168
169 static VPLane getLastLaneForVF(const ElementCount &VF) {
170 unsigned LaneOffset = VF.getKnownMinValue() - 1;
171 Kind LaneKind;
172 if (VF.isScalable())
173 // In this case 'LaneOffset' refers to the offset from the start of the
174 // last subvector with VF.getKnownMinValue() elements.
175 LaneKind = VPLane::Kind::ScalableLast;
176 else
177 LaneKind = VPLane::Kind::First;
178 return VPLane(LaneOffset, LaneKind);
179 }
180
181 /// Returns a compile-time known value for the lane index and asserts if the
182 /// lane can only be calculated at runtime.
183 unsigned getKnownLane() const {
184 assert(LaneKind == Kind::First);
185 return Lane;
186 }
187
188 /// Returns an expression describing the lane index that can be used at
189 /// runtime.
190 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
191
192 /// Returns the Kind of lane offset.
193 Kind getKind() const { return LaneKind; }
194
195 /// Returns true if this is the first lane of the whole vector.
196 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
197
198 /// Maps the lane to a cache index based on \p VF.
199 unsigned mapToCacheIndex(const ElementCount &VF) const {
200 switch (LaneKind) {
201 case VPLane::Kind::ScalableLast:
202 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
203 return VF.getKnownMinValue() + Lane;
204 default:
205 assert(Lane < VF.getKnownMinValue());
206 return Lane;
207 }
208 }
209
210 /// Returns the maxmimum number of lanes that we are able to consider
211 /// caching for \p VF.
212 static unsigned getNumCachedLanes(const ElementCount &VF) {
213 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
214 }
215};
216
217/// VPIteration represents a single point in the iteration space of the output
218/// (vectorized and/or unrolled) IR loop.
219struct VPIteration {
220 /// in [0..UF)
221 unsigned Part;
222
223 VPLane Lane;
224
225 VPIteration(unsigned Part, unsigned Lane,
226 VPLane::Kind Kind = VPLane::Kind::First)
227 : Part(Part), Lane(Lane, Kind) {}
228
229 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
230
231 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
232};
233
234/// VPTransformState holds information passed down when "executing" a VPlan,
235/// needed for generating the output IR.
236struct VPTransformState {
237 VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
238 DominatorTree *DT, IRBuilderBase &Builder,
239 InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx);
240
241 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
242 ElementCount VF;
243 unsigned UF;
244
245 /// Hold the indices to generate specific scalar instructions. Null indicates
246 /// that all instances are to be generated, using either scalar or vector
247 /// instructions.
248 std::optional<VPIteration> Instance;
249
250 struct DataState {
251 /// A type for vectorized values in the new loop. Each value from the
252 /// original loop, when vectorized, is represented by UF vector values in
253 /// the new unrolled loop, where UF is the unroll factor.
254 typedef SmallVector<Value *, 2> PerPartValuesTy;
255
256 DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
257
258 using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
259 DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
260 } Data;
261
262 /// Get the generated vector Value for a given VPValue \p Def and a given \p
263 /// Part if \p IsScalar is false, otherwise return the generated scalar
264 /// for \p Part. \See set.
265 Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
266
267 /// Get the generated Value for a given VPValue and given Part and Lane.
268 Value *get(VPValue *Def, const VPIteration &Instance);
269
270 bool hasVectorValue(VPValue *Def, unsigned Part) {
271 auto I = Data.PerPartOutput.find(Val: Def);
272 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
273 I->second[Part];
274 }
275
276 bool hasScalarValue(VPValue *Def, VPIteration Instance) {
277 auto I = Data.PerPartScalars.find(Val: Def);
278 if (I == Data.PerPartScalars.end())
279 return false;
280 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
281 return Instance.Part < I->second.size() &&
282 CacheIdx < I->second[Instance.Part].size() &&
283 I->second[Instance.Part][CacheIdx];
284 }
285
286 /// Set the generated vector Value for a given VPValue and a given Part, if \p
287 /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
288 void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
289 if (IsScalar) {
290 set(Def, V, Instance: VPIteration(Part, 0));
291 return;
292 }
293 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
294 "scalar values must be stored as (Part, 0)");
295 if (!Data.PerPartOutput.count(Val: Def)) {
296 DataState::PerPartValuesTy Entry(UF);
297 Data.PerPartOutput[Def] = Entry;
298 }
299 Data.PerPartOutput[Def][Part] = V;
300 }
301
302 /// Reset an existing vector value for \p Def and a given \p Part.
303 void reset(VPValue *Def, Value *V, unsigned Part) {
304 auto Iter = Data.PerPartOutput.find(Val: Def);
305 assert(Iter != Data.PerPartOutput.end() &&
306 "need to overwrite existing value");
307 Iter->second[Part] = V;
308 }
309
310 /// Set the generated scalar \p V for \p Def and the given \p Instance.
311 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
312 auto Iter = Data.PerPartScalars.insert(KV: {Def, {}});
313 auto &PerPartVec = Iter.first->second;
314 if (PerPartVec.size() <= Instance.Part)
315 PerPartVec.resize(N: Instance.Part + 1);
316 auto &Scalars = PerPartVec[Instance.Part];
317 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
318 if (Scalars.size() <= CacheIdx)
319 Scalars.resize(N: CacheIdx + 1);
320 assert(!Scalars[CacheIdx] && "should overwrite existing value");
321 Scalars[CacheIdx] = V;
322 }
323
324 /// Reset an existing scalar value for \p Def and a given \p Instance.
325 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
326 auto Iter = Data.PerPartScalars.find(Val: Def);
327 assert(Iter != Data.PerPartScalars.end() &&
328 "need to overwrite existing value");
329 assert(Instance.Part < Iter->second.size() &&
330 "need to overwrite existing value");
331 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
332 assert(CacheIdx < Iter->second[Instance.Part].size() &&
333 "need to overwrite existing value");
334 Iter->second[Instance.Part][CacheIdx] = V;
335 }
336
337 /// Add additional metadata to \p To that was not present on \p Orig.
338 ///
339 /// Currently this is used to add the noalias annotations based on the
340 /// inserted memchecks. Use this for instructions that are *cloned* into the
341 /// vector loop.
342 void addNewMetadata(Instruction *To, const Instruction *Orig);
343
344 /// Add metadata from one instruction to another.
345 ///
346 /// This includes both the original MDs from \p From and additional ones (\see
347 /// addNewMetadata). Use this for *newly created* instructions in the vector
348 /// loop.
349 void addMetadata(Value *To, Instruction *From);
350
351 /// Set the debug location in the builder using the debug location \p DL.
352 void setDebugLocFrom(DebugLoc DL);
353
354 /// Construct the vector value of a scalarized value \p V one lane at a time.
355 void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);
356
357 /// Hold state information used when constructing the CFG of the output IR,
358 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
359 struct CFGState {
360 /// The previous VPBasicBlock visited. Initially set to null.
361 VPBasicBlock *PrevVPBB = nullptr;
362
363 /// The previous IR BasicBlock created or used. Initially set to the new
364 /// header BasicBlock.
365 BasicBlock *PrevBB = nullptr;
366
367 /// The last IR BasicBlock in the output IR. Set to the exit block of the
368 /// vector loop.
369 BasicBlock *ExitBB = nullptr;
370
371 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
372 /// of replication, maps the BasicBlock of the last replica created.
373 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
374
375 CFGState() = default;
376
377 /// Returns the BasicBlock* mapped to the pre-header of the loop region
378 /// containing \p R.
379 BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
380 } CFG;
381
382 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
383 LoopInfo *LI;
384
385 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
386 DominatorTree *DT;
387
388 /// Hold a reference to the IRBuilder used to generate output IR code.
389 IRBuilderBase &Builder;
390
391 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
392 InnerLoopVectorizer *ILV;
393
394 /// Pointer to the VPlan code is generated for.
395 VPlan *Plan;
396
397 /// The loop object for the current parent region, or nullptr.
398 Loop *CurrentVectorLoop = nullptr;
399
400 /// LoopVersioning. It's only set up (non-null) if memchecks were
401 /// used.
402 ///
403 /// This is currently only used to add no-alias metadata based on the
404 /// memchecks. The actually versioning is performed manually.
405 LoopVersioning *LVer = nullptr;
406
407 /// Map SCEVs to their expanded values. Populated when executing
408 /// VPExpandSCEVRecipes.
409 DenseMap<const SCEV *, Value *> ExpandedSCEVs;
410
411 /// VPlan-based type analysis.
412 VPTypeAnalysis TypeAnalysis;
413};
414
415/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
416/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
417class VPBlockBase {
418 friend class VPBlockUtils;
419
420 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
421
422 /// An optional name for the block.
423 std::string Name;
424
425 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
426 /// it is a topmost VPBlockBase.
427 VPRegionBlock *Parent = nullptr;
428
429 /// List of predecessor blocks.
430 SmallVector<VPBlockBase *, 1> Predecessors;
431
432 /// List of successor blocks.
433 SmallVector<VPBlockBase *, 1> Successors;
434
435 /// VPlan containing the block. Can only be set on the entry block of the
436 /// plan.
437 VPlan *Plan = nullptr;
438
439 /// Add \p Successor as the last successor to this block.
440 void appendSuccessor(VPBlockBase *Successor) {
441 assert(Successor && "Cannot add nullptr successor!");
442 Successors.push_back(Elt: Successor);
443 }
444
445 /// Add \p Predecessor as the last predecessor to this block.
446 void appendPredecessor(VPBlockBase *Predecessor) {
447 assert(Predecessor && "Cannot add nullptr predecessor!");
448 Predecessors.push_back(Elt: Predecessor);
449 }
450
451 /// Remove \p Predecessor from the predecessors of this block.
452 void removePredecessor(VPBlockBase *Predecessor) {
453 auto Pos = find(Range&: Predecessors, Val: Predecessor);
454 assert(Pos && "Predecessor does not exist");
455 Predecessors.erase(CI: Pos);
456 }
457
458 /// Remove \p Successor from the successors of this block.
459 void removeSuccessor(VPBlockBase *Successor) {
460 auto Pos = find(Range&: Successors, Val: Successor);
461 assert(Pos && "Successor does not exist");
462 Successors.erase(CI: Pos);
463 }
464
465protected:
466 VPBlockBase(const unsigned char SC, const std::string &N)
467 : SubclassID(SC), Name(N) {}
468
469public:
470 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
471 /// that are actually instantiated. Values of this enumeration are kept in the
472 /// SubclassID field of the VPBlockBase objects. They are used for concrete
473 /// type identification.
474 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
475
476 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
477
478 virtual ~VPBlockBase() = default;
479
480 const std::string &getName() const { return Name; }
481
482 void setName(const Twine &newName) { Name = newName.str(); }
483
484 /// \return an ID for the concrete type of this object.
485 /// This is used to implement the classof checks. This should not be used
486 /// for any other purpose, as the values may change as LLVM evolves.
487 unsigned getVPBlockID() const { return SubclassID; }
488
489 VPRegionBlock *getParent() { return Parent; }
490 const VPRegionBlock *getParent() const { return Parent; }
491
492 /// \return A pointer to the plan containing the current block.
493 VPlan *getPlan();
494 const VPlan *getPlan() const;
495
496 /// Sets the pointer of the plan containing the block. The block must be the
497 /// entry block into the VPlan.
498 void setPlan(VPlan *ParentPlan);
499
500 void setParent(VPRegionBlock *P) { Parent = P; }
501
502 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
503 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
504 /// VPBlockBase is a VPBasicBlock, it is returned.
505 const VPBasicBlock *getEntryBasicBlock() const;
506 VPBasicBlock *getEntryBasicBlock();
507
508 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
509 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
510 /// VPBlockBase is a VPBasicBlock, it is returned.
511 const VPBasicBlock *getExitingBasicBlock() const;
512 VPBasicBlock *getExitingBasicBlock();
513
514 const VPBlocksTy &getSuccessors() const { return Successors; }
515 VPBlocksTy &getSuccessors() { return Successors; }
516
517 iterator_range<VPBlockBase **> successors() { return Successors; }
518
519 const VPBlocksTy &getPredecessors() const { return Predecessors; }
520 VPBlocksTy &getPredecessors() { return Predecessors; }
521
522 /// \return the successor of this VPBlockBase if it has a single successor.
523 /// Otherwise return a null pointer.
524 VPBlockBase *getSingleSuccessor() const {
525 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
526 }
527
528 /// \return the predecessor of this VPBlockBase if it has a single
529 /// predecessor. Otherwise return a null pointer.
530 VPBlockBase *getSinglePredecessor() const {
531 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
532 }
533
534 size_t getNumSuccessors() const { return Successors.size(); }
535 size_t getNumPredecessors() const { return Predecessors.size(); }
536
537 /// An Enclosing Block of a block B is any block containing B, including B
538 /// itself. \return the closest enclosing block starting from "this", which
539 /// has successors. \return the root enclosing block if all enclosing blocks
540 /// have no successors.
541 VPBlockBase *getEnclosingBlockWithSuccessors();
542
543 /// \return the closest enclosing block starting from "this", which has
544 /// predecessors. \return the root enclosing block if all enclosing blocks
545 /// have no predecessors.
546 VPBlockBase *getEnclosingBlockWithPredecessors();
547
548 /// \return the successors either attached directly to this VPBlockBase or, if
549 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
550 /// successors of its own, search recursively for the first enclosing
551 /// VPRegionBlock that has successors and return them. If no such
552 /// VPRegionBlock exists, return the (empty) successors of the topmost
553 /// VPBlockBase reached.
554 const VPBlocksTy &getHierarchicalSuccessors() {
555 return getEnclosingBlockWithSuccessors()->getSuccessors();
556 }
557
558 /// \return the hierarchical successor of this VPBlockBase if it has a single
559 /// hierarchical successor. Otherwise return a null pointer.
560 VPBlockBase *getSingleHierarchicalSuccessor() {
561 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
562 }
563
564 /// \return the predecessors either attached directly to this VPBlockBase or,
565 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
566 /// predecessors of its own, search recursively for the first enclosing
567 /// VPRegionBlock that has predecessors and return them. If no such
568 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
569 /// VPBlockBase reached.
570 const VPBlocksTy &getHierarchicalPredecessors() {
571 return getEnclosingBlockWithPredecessors()->getPredecessors();
572 }
573
574 /// \return the hierarchical predecessor of this VPBlockBase if it has a
575 /// single hierarchical predecessor. Otherwise return a null pointer.
576 VPBlockBase *getSingleHierarchicalPredecessor() {
577 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
578 }
579
580 /// Set a given VPBlockBase \p Successor as the single successor of this
581 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
582 /// This VPBlockBase must have no successors.
583 void setOneSuccessor(VPBlockBase *Successor) {
584 assert(Successors.empty() && "Setting one successor when others exist.");
585 assert(Successor->getParent() == getParent() &&
586 "connected blocks must have the same parent");
587 appendSuccessor(Successor);
588 }
589
590 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
591 /// successors of this VPBlockBase. This VPBlockBase is not added as
592 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
593 /// successors.
594 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
595 assert(Successors.empty() && "Setting two successors when others exist.");
596 appendSuccessor(Successor: IfTrue);
597 appendSuccessor(Successor: IfFalse);
598 }
599
600 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
601 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
602 /// as successor of any VPBasicBlock in \p NewPreds.
603 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
604 assert(Predecessors.empty() && "Block predecessors already set.");
605 for (auto *Pred : NewPreds)
606 appendPredecessor(Predecessor: Pred);
607 }
608
609 /// Remove all the predecessor of this block.
610 void clearPredecessors() { Predecessors.clear(); }
611
612 /// Remove all the successors of this block.
613 void clearSuccessors() { Successors.clear(); }
614
615 /// The method which generates the output IR that correspond to this
616 /// VPBlockBase, thereby "executing" the VPlan.
617 virtual void execute(VPTransformState *State) = 0;
618
619 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
620 static void deleteCFG(VPBlockBase *Entry);
621
622 /// Return true if it is legal to hoist instructions into this block.
623 bool isLegalToHoistInto() {
624 // There are currently no constraints that prevent an instruction to be
625 // hoisted into a VPBlockBase.
626 return true;
627 }
628
629 /// Replace all operands of VPUsers in the block with \p NewValue and also
630 /// replaces all uses of VPValues defined in the block with NewValue.
631 virtual void dropAllReferences(VPValue *NewValue) = 0;
632
633#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
634 void printAsOperand(raw_ostream &OS, bool PrintType) const {
635 OS << getName();
636 }
637
638 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
639 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
640 /// consequtive numbers.
641 ///
642 /// Note that the numbering is applied to the whole VPlan, so printing
643 /// individual blocks is consistent with the whole VPlan printing.
644 virtual void print(raw_ostream &O, const Twine &Indent,
645 VPSlotTracker &SlotTracker) const = 0;
646
647 /// Print plain-text dump of this VPlan to \p O.
648 void print(raw_ostream &O) const {
649 VPSlotTracker SlotTracker(getPlan());
650 print(O, Indent: "", SlotTracker);
651 }
652
653 /// Print the successors of this block to \p O, prefixing all lines with \p
654 /// Indent.
655 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
656
657 /// Dump this VPBlockBase to dbgs().
658 LLVM_DUMP_METHOD void dump() const { print(O&: dbgs()); }
659#endif
660
661 /// Clone the current block and it's recipes without updating the operands of
662 /// the cloned recipes, including all blocks in the single-entry single-exit
663 /// region for VPRegionBlocks.
664 virtual VPBlockBase *clone() = 0;
665};
666
667/// A value that is used outside the VPlan. The operand of the user needs to be
668/// added to the associated LCSSA phi node.
669class VPLiveOut : public VPUser {
670 PHINode *Phi;
671
672public:
673 VPLiveOut(PHINode *Phi, VPValue *Op)
674 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
675
676 static inline bool classof(const VPUser *U) {
677 return U->getVPUserID() == VPUser::VPUserID::LiveOut;
678 }
679
680 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
681 /// means we need to add the appropriate incoming value from the middle
682 /// block as exiting edges from the scalar epilogue loop (if present) are
683 /// already in place, and we exit the vector loop exclusively to the middle
684 /// block.
685 void fixPhi(VPlan &Plan, VPTransformState &State);
686
687 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
688 bool usesScalars(const VPValue *Op) const override {
689 assert(is_contained(operands(), Op) &&
690 "Op must be an operand of the recipe");
691 return true;
692 }
693
694 PHINode *getPhi() const { return Phi; }
695
696#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
697 /// Print the VPLiveOut to \p O.
698 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
699#endif
700};
701
702/// VPRecipeBase is a base class modeling a sequence of one or more output IR
703/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
704/// and is responsible for deleting its defined values. Single-value
705/// recipes must inherit from VPSingleDef instead of inheriting from both
706/// VPRecipeBase and VPValue separately.
707class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
708 public VPDef,
709 public VPUser {
710 friend VPBasicBlock;
711 friend class VPBlockUtils;
712
713 /// Each VPRecipe belongs to a single VPBasicBlock.
714 VPBasicBlock *Parent = nullptr;
715
716 /// The debug location for the recipe.
717 DebugLoc DL;
718
719public:
720 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
721 DebugLoc DL = {})
722 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
723
724 template <typename IterT>
725 VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
726 DebugLoc DL = {})
727 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
728 virtual ~VPRecipeBase() = default;
729
730 /// Clone the current recipe.
731 virtual VPRecipeBase *clone() = 0;
732
733 /// \return the VPBasicBlock which this VPRecipe belongs to.
734 VPBasicBlock *getParent() { return Parent; }
735 const VPBasicBlock *getParent() const { return Parent; }
736
737 /// The method which generates the output IR instructions that correspond to
738 /// this VPRecipe, thereby "executing" the VPlan.
739 virtual void execute(VPTransformState &State) = 0;
740
741 /// Insert an unlinked recipe into a basic block immediately before
742 /// the specified recipe.
743 void insertBefore(VPRecipeBase *InsertPos);
744 /// Insert an unlinked recipe into \p BB immediately before the insertion
745 /// point \p IP;
746 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
747
748 /// Insert an unlinked Recipe into a basic block immediately after
749 /// the specified Recipe.
750 void insertAfter(VPRecipeBase *InsertPos);
751
752 /// Unlink this recipe from its current VPBasicBlock and insert it into
753 /// the VPBasicBlock that MovePos lives in, right after MovePos.
754 void moveAfter(VPRecipeBase *MovePos);
755
756 /// Unlink this recipe and insert into BB before I.
757 ///
758 /// \pre I is a valid iterator into BB.
759 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
760
761 /// This method unlinks 'this' from the containing basic block, but does not
762 /// delete it.
763 void removeFromParent();
764
765 /// This method unlinks 'this' from the containing basic block and deletes it.
766 ///
767 /// \returns an iterator pointing to the element after the erased one
768 iplist<VPRecipeBase>::iterator eraseFromParent();
769
770 /// Method to support type inquiry through isa, cast, and dyn_cast.
771 static inline bool classof(const VPDef *D) {
772 // All VPDefs are also VPRecipeBases.
773 return true;
774 }
775
776 static inline bool classof(const VPUser *U) {
777 return U->getVPUserID() == VPUser::VPUserID::Recipe;
778 }
779
780 /// Returns true if the recipe may have side-effects.
781 bool mayHaveSideEffects() const;
782
783 /// Returns true for PHI-like recipes.
784 bool isPhi() const {
785 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
786 }
787
788 /// Returns true if the recipe may read from memory.
789 bool mayReadFromMemory() const;
790
791 /// Returns true if the recipe may write to memory.
792 bool mayWriteToMemory() const;
793
794 /// Returns true if the recipe may read from or write to memory.
795 bool mayReadOrWriteMemory() const {
796 return mayReadFromMemory() || mayWriteToMemory();
797 }
798
799 /// Returns the debug location of the recipe.
800 DebugLoc getDebugLoc() const { return DL; }
801};
802
803// Helper macro to define common classof implementations for recipes.
804#define VP_CLASSOF_IMPL(VPDefID) \
805 static inline bool classof(const VPDef *D) { \
806 return D->getVPDefID() == VPDefID; \
807 } \
808 static inline bool classof(const VPValue *V) { \
809 auto *R = V->getDefiningRecipe(); \
810 return R && R->getVPDefID() == VPDefID; \
811 } \
812 static inline bool classof(const VPUser *U) { \
813 auto *R = dyn_cast<VPRecipeBase>(U); \
814 return R && R->getVPDefID() == VPDefID; \
815 } \
816 static inline bool classof(const VPRecipeBase *R) { \
817 return R->getVPDefID() == VPDefID; \
818 } \
819 static inline bool classof(const VPSingleDefRecipe *R) { \
820 return R->getVPDefID() == VPDefID; \
821 }
822
823/// VPSingleDef is a base class for recipes for modeling a sequence of one or
824/// more output IR that define a single result VPValue.
825/// Note that VPRecipeBase must be inherited from before VPValue.
826class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
827public:
828 template <typename IterT>
829 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
830 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
831
832 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
833 DebugLoc DL = {})
834 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
835
836 template <typename IterT>
837 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
838 DebugLoc DL = {})
839 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
840
841 static inline bool classof(const VPRecipeBase *R) {
842 switch (R->getVPDefID()) {
843 case VPRecipeBase::VPDerivedIVSC:
844 case VPRecipeBase::VPExpandSCEVSC:
845 case VPRecipeBase::VPInstructionSC:
846 case VPRecipeBase::VPReductionSC:
847 case VPRecipeBase::VPReplicateSC:
848 case VPRecipeBase::VPScalarIVStepsSC:
849 case VPRecipeBase::VPVectorPointerSC:
850 case VPRecipeBase::VPWidenCallSC:
851 case VPRecipeBase::VPWidenCanonicalIVSC:
852 case VPRecipeBase::VPWidenCastSC:
853 case VPRecipeBase::VPWidenGEPSC:
854 case VPRecipeBase::VPWidenSC:
855 case VPRecipeBase::VPWidenSelectSC:
856 case VPRecipeBase::VPBlendSC:
857 case VPRecipeBase::VPPredInstPHISC:
858 case VPRecipeBase::VPCanonicalIVPHISC:
859 case VPRecipeBase::VPActiveLaneMaskPHISC:
860 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
861 case VPRecipeBase::VPWidenPHISC:
862 case VPRecipeBase::VPWidenIntOrFpInductionSC:
863 case VPRecipeBase::VPWidenPointerInductionSC:
864 case VPRecipeBase::VPReductionPHISC:
865 case VPRecipeBase::VPScalarCastSC:
866 return true;
867 case VPRecipeBase::VPInterleaveSC:
868 case VPRecipeBase::VPBranchOnMaskSC:
869 case VPRecipeBase::VPWidenLoadEVLSC:
870 case VPRecipeBase::VPWidenLoadSC:
871 case VPRecipeBase::VPWidenStoreEVLSC:
872 case VPRecipeBase::VPWidenStoreSC:
873 // TODO: Widened stores don't define a value, but widened loads do. Split
874 // the recipes to be able to make widened loads VPSingleDefRecipes.
875 return false;
876 }
877 llvm_unreachable("Unhandled VPDefID");
878 }
879
880 static inline bool classof(const VPUser *U) {
881 auto *R = dyn_cast<VPRecipeBase>(Val: U);
882 return R && classof(R);
883 }
884
885 virtual VPSingleDefRecipe *clone() override = 0;
886
887 /// Returns the underlying instruction.
888 Instruction *getUnderlyingInstr() {
889 return cast<Instruction>(Val: getUnderlyingValue());
890 }
891 const Instruction *getUnderlyingInstr() const {
892 return cast<Instruction>(Val: getUnderlyingValue());
893 }
894};
895
896/// Class to record LLVM IR flag for a recipe along with it.
897class VPRecipeWithIRFlags : public VPSingleDefRecipe {
898 enum class OperationType : unsigned char {
899 Cmp,
900 OverflowingBinOp,
901 DisjointOp,
902 PossiblyExactOp,
903 GEPOp,
904 FPMathOp,
905 NonNegOp,
906 Other
907 };
908
909public:
910 struct WrapFlagsTy {
911 char HasNUW : 1;
912 char HasNSW : 1;
913
914 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
915 };
916
917 struct DisjointFlagsTy {
918 char IsDisjoint : 1;
919 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
920 };
921
922protected:
923 struct GEPFlagsTy {
924 char IsInBounds : 1;
925 GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
926 };
927
928private:
929 struct ExactFlagsTy {
930 char IsExact : 1;
931 };
932 struct NonNegFlagsTy {
933 char NonNeg : 1;
934 };
935 struct FastMathFlagsTy {
936 char AllowReassoc : 1;
937 char NoNaNs : 1;
938 char NoInfs : 1;
939 char NoSignedZeros : 1;
940 char AllowReciprocal : 1;
941 char AllowContract : 1;
942 char ApproxFunc : 1;
943
944 FastMathFlagsTy(const FastMathFlags &FMF);
945 };
946
947 OperationType OpType;
948
949 union {
950 CmpInst::Predicate CmpPredicate;
951 WrapFlagsTy WrapFlags;
952 DisjointFlagsTy DisjointFlags;
953 ExactFlagsTy ExactFlags;
954 GEPFlagsTy GEPFlags;
955 NonNegFlagsTy NonNegFlags;
956 FastMathFlagsTy FMFs;
957 unsigned AllFlags;
958 };
959
960protected:
961 void transferFlags(VPRecipeWithIRFlags &Other) {
962 OpType = Other.OpType;
963 AllFlags = Other.AllFlags;
964 }
965
966public:
967 template <typename IterT>
968 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
969 : VPSingleDefRecipe(SC, Operands, DL) {
970 OpType = OperationType::Other;
971 AllFlags = 0;
972 }
973
974 template <typename IterT>
975 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
976 : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {
977 if (auto *Op = dyn_cast<CmpInst>(Val: &I)) {
978 OpType = OperationType::Cmp;
979 CmpPredicate = Op->getPredicate();
980 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) {
981 OpType = OperationType::DisjointOp;
982 DisjointFlags.IsDisjoint = Op->isDisjoint();
983 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
984 OpType = OperationType::OverflowingBinOp;
985 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
986 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) {
987 OpType = OperationType::PossiblyExactOp;
988 ExactFlags.IsExact = Op->isExact();
989 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
990 OpType = OperationType::GEPOp;
991 GEPFlags.IsInBounds = GEP->isInBounds();
992 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) {
993 OpType = OperationType::NonNegOp;
994 NonNegFlags.NonNeg = PNNI->hasNonNeg();
995 } else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) {
996 OpType = OperationType::FPMathOp;
997 FMFs = Op->getFastMathFlags();
998 } else {
999 OpType = OperationType::Other;
1000 AllFlags = 0;
1001 }
1002 }
1003
1004 template <typename IterT>
1005 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1006 CmpInst::Predicate Pred, DebugLoc DL = {})
1007 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1008 CmpPredicate(Pred) {}
1009
1010 template <typename IterT>
1011 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1012 WrapFlagsTy WrapFlags, DebugLoc DL = {})
1013 : VPSingleDefRecipe(SC, Operands, DL),
1014 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1015
1016 template <typename IterT>
1017 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1018 FastMathFlags FMFs, DebugLoc DL = {})
1019 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1020 FMFs(FMFs) {}
1021
1022 template <typename IterT>
1023 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1024 DisjointFlagsTy DisjointFlags, DebugLoc DL = {})
1025 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1026 DisjointFlags(DisjointFlags) {}
1027
1028protected:
1029 template <typename IterT>
1030 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1031 GEPFlagsTy GEPFlags, DebugLoc DL = {})
1032 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1033 GEPFlags(GEPFlags) {}
1034
1035public:
1036 static inline bool classof(const VPRecipeBase *R) {
1037 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1038 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1039 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1040 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1041 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1042 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1043 }
1044
1045 static inline bool classof(const VPUser *U) {
1046 auto *R = dyn_cast<VPRecipeBase>(Val: U);
1047 return R && classof(R);
1048 }
1049
1050 /// Drop all poison-generating flags.
1051 void dropPoisonGeneratingFlags() {
1052 // NOTE: This needs to be kept in-sync with
1053 // Instruction::dropPoisonGeneratingFlags.
1054 switch (OpType) {
1055 case OperationType::OverflowingBinOp:
1056 WrapFlags.HasNUW = false;
1057 WrapFlags.HasNSW = false;
1058 break;
1059 case OperationType::DisjointOp:
1060 DisjointFlags.IsDisjoint = false;
1061 break;
1062 case OperationType::PossiblyExactOp:
1063 ExactFlags.IsExact = false;
1064 break;
1065 case OperationType::GEPOp:
1066 GEPFlags.IsInBounds = false;
1067 break;
1068 case OperationType::FPMathOp:
1069 FMFs.NoNaNs = false;
1070 FMFs.NoInfs = false;
1071 break;
1072 case OperationType::NonNegOp:
1073 NonNegFlags.NonNeg = false;
1074 break;
1075 case OperationType::Cmp:
1076 case OperationType::Other:
1077 break;
1078 }
1079 }
1080
1081 /// Set the IR flags for \p I.
1082 void setFlags(Instruction *I) const {
1083 switch (OpType) {
1084 case OperationType::OverflowingBinOp:
1085 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1086 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1087 break;
1088 case OperationType::DisjointOp:
1089 cast<PossiblyDisjointInst>(Val: I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1090 break;
1091 case OperationType::PossiblyExactOp:
1092 I->setIsExact(ExactFlags.IsExact);
1093 break;
1094 case OperationType::GEPOp:
1095 cast<GetElementPtrInst>(Val: I)->setIsInBounds(GEPFlags.IsInBounds);
1096 break;
1097 case OperationType::FPMathOp:
1098 I->setHasAllowReassoc(FMFs.AllowReassoc);
1099 I->setHasNoNaNs(FMFs.NoNaNs);
1100 I->setHasNoInfs(FMFs.NoInfs);
1101 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1102 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1103 I->setHasAllowContract(FMFs.AllowContract);
1104 I->setHasApproxFunc(FMFs.ApproxFunc);
1105 break;
1106 case OperationType::NonNegOp:
1107 I->setNonNeg(NonNegFlags.NonNeg);
1108 break;
1109 case OperationType::Cmp:
1110 case OperationType::Other:
1111 break;
1112 }
1113 }
1114
1115 CmpInst::Predicate getPredicate() const {
1116 assert(OpType == OperationType::Cmp &&
1117 "recipe doesn't have a compare predicate");
1118 return CmpPredicate;
1119 }
1120
1121 bool isInBounds() const {
1122 assert(OpType == OperationType::GEPOp &&
1123 "recipe doesn't have inbounds flag");
1124 return GEPFlags.IsInBounds;
1125 }
1126
1127 /// Returns true if the recipe has fast-math flags.
1128 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1129
1130 FastMathFlags getFastMathFlags() const;
1131
1132 bool hasNoUnsignedWrap() const {
1133 assert(OpType == OperationType::OverflowingBinOp &&
1134 "recipe doesn't have a NUW flag");
1135 return WrapFlags.HasNUW;
1136 }
1137
1138 bool hasNoSignedWrap() const {
1139 assert(OpType == OperationType::OverflowingBinOp &&
1140 "recipe doesn't have a NSW flag");
1141 return WrapFlags.HasNSW;
1142 }
1143
1144 bool isDisjoint() const {
1145 assert(OpType == OperationType::DisjointOp &&
1146 "recipe cannot have a disjoing flag");
1147 return DisjointFlags.IsDisjoint;
1148 }
1149
1150#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1151 void printFlags(raw_ostream &O) const;
1152#endif
1153};
1154
1155/// This is a concrete Recipe that models a single VPlan-level instruction.
1156/// While as any Recipe it may generate a sequence of IR instructions when
1157/// executed, these instructions would always form a single-def expression as
1158/// the VPInstruction is also a single def-use vertex.
1159class VPInstruction : public VPRecipeWithIRFlags {
1160 friend class VPlanSlp;
1161
1162public:
1163 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1164 enum {
1165 FirstOrderRecurrenceSplice =
1166 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1167 // values of a first-order recurrence.
1168 Not,
1169 SLPLoad,
1170 SLPStore,
1171 ActiveLaneMask,
1172 ExplicitVectorLength,
1173 CalculateTripCountMinusVF,
1174 // Increment the canonical IV separately for each unrolled part.
1175 CanonicalIVIncrementForPart,
1176 BranchOnCount,
1177 BranchOnCond,
1178 ComputeReductionResult,
1179 // Add an offset in bytes (second operand) to a base pointer (first
1180 // operand). Only generates scalar values (either for the first lane only or
1181 // for all lanes, depending on its uses).
1182 PtrAdd,
1183 };
1184
1185private:
1186 typedef unsigned char OpcodeTy;
1187 OpcodeTy Opcode;
1188
1189 /// An optional name that can be used for the generated IR instruction.
1190 const std::string Name;
1191
1192 /// Returns true if this VPInstruction generates scalar values for all lanes.
1193 /// Most VPInstructions generate a single value per part, either vector or
1194 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1195 /// values per all lanes, stemming from an original ingredient. This method
1196 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1197 /// underlying ingredient.
1198 bool doesGeneratePerAllLanes() const;
1199
1200 /// Returns true if we can generate a scalar for the first lane only if
1201 /// needed.
1202 bool canGenerateScalarForFirstLane() const;
1203
1204 /// Utility methods serving execute(): generates a single instance of the
1205 /// modeled instruction for a given part. \returns the generated value for \p
1206 /// Part. In some cases an existing value is returned rather than a generated
1207 /// one.
1208 Value *generatePerPart(VPTransformState &State, unsigned Part);
1209
1210 /// Utility methods serving execute(): generates a scalar single instance of
1211 /// the modeled instruction for a given lane. \returns the scalar generated
1212 /// value for lane \p Lane.
1213 Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
1214
1215#if !defined(NDEBUG)
1216 /// Return true if the VPInstruction is a floating point math operation, i.e.
1217 /// has fast-math flags.
1218 bool isFPMathOp() const;
1219#endif
1220
1221public:
1222 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
1223 const Twine &Name = "")
1224 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1225 Opcode(Opcode), Name(Name.str()) {}
1226
1227 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1228 DebugLoc DL = {}, const Twine &Name = "")
1229 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
1230
1231 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1232 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1233
1234 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1235 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1236 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1237 Opcode(Opcode), Name(Name.str()) {}
1238
1239 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1240 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1241 const Twine &Name = "")
1242 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1243 Opcode(Opcode), Name(Name.str()) {
1244 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1245 }
1246
1247 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1248 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1249
1250 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1251
1252 VPInstruction *clone() override {
1253 SmallVector<VPValue *, 2> Operands(operands());
1254 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1255 New->transferFlags(Other&: *this);
1256 return New;
1257 }
1258
1259 unsigned getOpcode() const { return Opcode; }
1260
1261 /// Generate the instruction.
1262 /// TODO: We currently execute only per-part unless a specific instance is
1263 /// provided.
1264 void execute(VPTransformState &State) override;
1265
1266#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1267 /// Print the VPInstruction to \p O.
1268 void print(raw_ostream &O, const Twine &Indent,
1269 VPSlotTracker &SlotTracker) const override;
1270
1271 /// Print the VPInstruction to dbgs() (for debugging).
1272 LLVM_DUMP_METHOD void dump() const;
1273#endif
1274
1275 /// Return true if this instruction may modify memory.
1276 bool mayWriteToMemory() const {
1277 // TODO: we can use attributes of the called function to rule out memory
1278 // modifications.
1279 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1280 Opcode == Instruction::Invoke || Opcode == SLPStore;
1281 }
1282
1283 bool hasResult() const {
1284 // CallInst may or may not have a result, depending on the called function.
1285 // Conservatively return calls have results for now.
1286 switch (getOpcode()) {
1287 case Instruction::Ret:
1288 case Instruction::Br:
1289 case Instruction::Store:
1290 case Instruction::Switch:
1291 case Instruction::IndirectBr:
1292 case Instruction::Resume:
1293 case Instruction::CatchRet:
1294 case Instruction::Unreachable:
1295 case Instruction::Fence:
1296 case Instruction::AtomicRMW:
1297 case VPInstruction::BranchOnCond:
1298 case VPInstruction::BranchOnCount:
1299 return false;
1300 default:
1301 return true;
1302 }
1303 }
1304
1305 /// Returns true if the recipe only uses the first lane of operand \p Op.
1306 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1307
1308 /// Returns true if the recipe only uses the first part of operand \p Op.
1309 bool onlyFirstPartUsed(const VPValue *Op) const override {
1310 assert(is_contained(operands(), Op) &&
1311 "Op must be an operand of the recipe");
1312 if (getOperand(N: 0) != Op)
1313 return false;
1314 switch (getOpcode()) {
1315 default:
1316 return false;
1317 case VPInstruction::BranchOnCount:
1318 case VPInstruction::CanonicalIVIncrementForPart:
1319 return true;
1320 };
1321 llvm_unreachable("switch should return");
1322 }
1323};
1324
1325/// VPWidenRecipe is a recipe for producing a copy of vector type its
1326/// ingredient. This recipe covers most of the traditional vectorization cases
1327/// where each ingredient transforms into a vectorized version of itself.
1328class VPWidenRecipe : public VPRecipeWithIRFlags {
1329 unsigned Opcode;
1330
1331public:
1332 template <typename IterT>
1333 VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1334 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1335 Opcode(I.getOpcode()) {}
1336
1337 ~VPWidenRecipe() override = default;
1338
1339 VPWidenRecipe *clone() override {
1340 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1341 R->transferFlags(Other&: *this);
1342 return R;
1343 }
1344
1345 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1346
1347 /// Produce widened copies of all Ingredients.
1348 void execute(VPTransformState &State) override;
1349
1350 unsigned getOpcode() const { return Opcode; }
1351
1352#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1353 /// Print the recipe.
1354 void print(raw_ostream &O, const Twine &Indent,
1355 VPSlotTracker &SlotTracker) const override;
1356#endif
1357};
1358
1359/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1360class VPWidenCastRecipe : public VPRecipeWithIRFlags {
1361 /// Cast instruction opcode.
1362 Instruction::CastOps Opcode;
1363
1364 /// Result type for the cast.
1365 Type *ResultTy;
1366
1367public:
1368 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1369 CastInst &UI)
1370 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1371 ResultTy(ResultTy) {
1372 assert(UI.getOpcode() == Opcode &&
1373 "opcode of underlying cast doesn't match");
1374 assert(UI.getType() == ResultTy &&
1375 "result type of underlying cast doesn't match");
1376 }
1377
1378 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1379 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1380 ResultTy(ResultTy) {}
1381
1382 ~VPWidenCastRecipe() override = default;
1383
1384 VPWidenCastRecipe *clone() override {
1385 if (auto *UV = getUnderlyingValue())
1386 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy,
1387 *cast<CastInst>(Val: UV));
1388
1389 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy);
1390 }
1391
1392 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1393
1394 /// Produce widened copies of the cast.
1395 void execute(VPTransformState &State) override;
1396
1397#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1398 /// Print the recipe.
1399 void print(raw_ostream &O, const Twine &Indent,
1400 VPSlotTracker &SlotTracker) const override;
1401#endif
1402
1403 Instruction::CastOps getOpcode() const { return Opcode; }
1404
1405 /// Returns the result type of the cast.
1406 Type *getResultType() const { return ResultTy; }
1407};
1408
1409/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1410class VPScalarCastRecipe : public VPSingleDefRecipe {
1411 Instruction::CastOps Opcode;
1412
1413 Type *ResultTy;
1414
1415 Value *generate(VPTransformState &State, unsigned Part);
1416
1417public:
1418 VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1419 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1420 ResultTy(ResultTy) {}
1421
1422 ~VPScalarCastRecipe() override = default;
1423
1424 VPScalarCastRecipe *clone() override {
1425 return new VPScalarCastRecipe(Opcode, getOperand(N: 0), ResultTy);
1426 }
1427
1428 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1429
1430 void execute(VPTransformState &State) override;
1431
1432#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1433 void print(raw_ostream &O, const Twine &Indent,
1434 VPSlotTracker &SlotTracker) const override;
1435#endif
1436
1437 /// Returns the result type of the cast.
1438 Type *getResultType() const { return ResultTy; }
1439
1440 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1441 // At the moment, only uniform codegen is implemented.
1442 assert(is_contained(operands(), Op) &&
1443 "Op must be an operand of the recipe");
1444 return true;
1445 }
1446};
1447
1448/// A recipe for widening Call instructions.
1449class VPWidenCallRecipe : public VPSingleDefRecipe {
1450 /// ID of the vector intrinsic to call when widening the call. If set the
1451 /// Intrinsic::not_intrinsic, a library call will be used instead.
1452 Intrinsic::ID VectorIntrinsicID;
1453 /// If this recipe represents a library call, Variant stores a pointer to
1454 /// the chosen function. There is a 1:1 mapping between a given VF and the
1455 /// chosen vectorized variant, so there will be a different vplan for each
1456 /// VF with a valid variant.
1457 Function *Variant;
1458
1459public:
1460 template <typename IterT>
1461 VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
1462 Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1463 Function *Variant = nullptr)
1464 : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, &I, DL),
1465 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
1466
1467 ~VPWidenCallRecipe() override = default;
1468
1469 VPWidenCallRecipe *clone() override {
1470 return new VPWidenCallRecipe(*cast<CallInst>(Val: getUnderlyingInstr()),
1471 operands(), VectorIntrinsicID, getDebugLoc(),
1472 Variant);
1473 }
1474
1475 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1476
1477 /// Produce a widened version of the call instruction.
1478 void execute(VPTransformState &State) override;
1479
1480#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1481 /// Print the recipe.
1482 void print(raw_ostream &O, const Twine &Indent,
1483 VPSlotTracker &SlotTracker) const override;
1484#endif
1485};
1486
1487/// A recipe for widening select instructions.
1488struct VPWidenSelectRecipe : public VPSingleDefRecipe {
1489 template <typename IterT>
1490 VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
1491 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1492 I.getDebugLoc()) {}
1493
1494 ~VPWidenSelectRecipe() override = default;
1495
1496 VPWidenSelectRecipe *clone() override {
1497 return new VPWidenSelectRecipe(*cast<SelectInst>(Val: getUnderlyingInstr()),
1498 operands());
1499 }
1500
1501 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1502
1503 /// Produce a widened version of the select instruction.
1504 void execute(VPTransformState &State) override;
1505
1506#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1507 /// Print the recipe.
1508 void print(raw_ostream &O, const Twine &Indent,
1509 VPSlotTracker &SlotTracker) const override;
1510#endif
1511
1512 VPValue *getCond() const {
1513 return getOperand(N: 0);
1514 }
1515
1516 bool isInvariantCond() const {
1517 return getCond()->isDefinedOutsideVectorRegions();
1518 }
1519};
1520
1521/// A recipe for handling GEP instructions.
1522class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
1523 bool isPointerLoopInvariant() const {
1524 return getOperand(N: 0)->isDefinedOutsideVectorRegions();
1525 }
1526
1527 bool isIndexLoopInvariant(unsigned I) const {
1528 return getOperand(N: I + 1)->isDefinedOutsideVectorRegions();
1529 }
1530
1531 bool areAllOperandsInvariant() const {
1532 return all_of(Range: operands(), P: [](VPValue *Op) {
1533 return Op->isDefinedOutsideVectorRegions();
1534 });
1535 }
1536
1537public:
1538 template <typename IterT>
1539 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
1540 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1541
1542 ~VPWidenGEPRecipe() override = default;
1543
1544 VPWidenGEPRecipe *clone() override {
1545 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Val: getUnderlyingInstr()),
1546 operands());
1547 }
1548
1549 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1550
1551 /// Generate the gep nodes.
1552 void execute(VPTransformState &State) override;
1553
1554#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1555 /// Print the recipe.
1556 void print(raw_ostream &O, const Twine &Indent,
1557 VPSlotTracker &SlotTracker) const override;
1558#endif
1559};
1560
1561/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1562/// all parts. If IsReverse is true, compute pointers for accessing the input in
1563/// reverse order per part.
1564class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
1565 Type *IndexedTy;
1566 bool IsReverse;
1567
1568public:
1569 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1570 bool IsInBounds, DebugLoc DL)
1571 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1572 GEPFlagsTy(IsInBounds), DL),
1573 IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1574
1575 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1576
1577 void execute(VPTransformState &State) override;
1578
1579 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1580 assert(is_contained(operands(), Op) &&
1581 "Op must be an operand of the recipe");
1582 return true;
1583 }
1584
1585 VPVectorPointerRecipe *clone() override {
1586 return new VPVectorPointerRecipe(getOperand(N: 0), IndexedTy, IsReverse,
1587 isInBounds(), getDebugLoc());
1588 }
1589
1590#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1591 /// Print the recipe.
1592 void print(raw_ostream &O, const Twine &Indent,
1593 VPSlotTracker &SlotTracker) const override;
1594#endif
1595};
1596
1597/// A pure virtual base class for all recipes modeling header phis, including
1598/// phis for first order recurrences, pointer inductions and reductions. The
1599/// start value is the first operand of the recipe and the incoming value from
1600/// the backedge is the second operand.
1601///
1602/// Inductions are modeled using the following sub-classes:
1603/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1604/// starting at a specified value (zero for the main vector loop, the resume
1605/// value for the epilogue vector loop) and stepping by 1. The induction
1606/// controls exiting of the vector loop by comparing against the vector trip
1607/// count. Produces a single scalar PHI for the induction value per
1608/// iteration.
1609/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1610/// floating point inductions with arbitrary start and step values. Produces
1611/// a vector PHI per-part.
1612/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1613/// value of an IV with different start and step values. Produces a single
1614/// scalar value per iteration
1615/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1616/// canonical or derived induction.
1617/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1618/// pointer induction. Produces either a vector PHI per-part or scalar values
1619/// per-lane based on the canonical induction.
1620class VPHeaderPHIRecipe : public VPSingleDefRecipe {
1621protected:
1622 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1623 VPValue *Start = nullptr, DebugLoc DL = {})
1624 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1625 if (Start)
1626 addOperand(Operand: Start);
1627 }
1628
1629public:
1630 ~VPHeaderPHIRecipe() override = default;
1631
1632 /// Method to support type inquiry through isa, cast, and dyn_cast.
1633 static inline bool classof(const VPRecipeBase *B) {
1634 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1635 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1636 }
1637 static inline bool classof(const VPValue *V) {
1638 auto *B = V->getDefiningRecipe();
1639 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1640 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1641 }
1642
1643 /// Generate the phi nodes.
1644 void execute(VPTransformState &State) override = 0;
1645
1646#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1647 /// Print the recipe.
1648 void print(raw_ostream &O, const Twine &Indent,
1649 VPSlotTracker &SlotTracker) const override = 0;
1650#endif
1651
1652 /// Returns the start value of the phi, if one is set.
1653 VPValue *getStartValue() {
1654 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
1655 }
1656 VPValue *getStartValue() const {
1657 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
1658 }
1659
1660 /// Update the start value of the recipe.
1661 void setStartValue(VPValue *V) { setOperand(I: 0, New: V); }
1662
1663 /// Returns the incoming value from the loop backedge.
1664 virtual VPValue *getBackedgeValue() {
1665 return getOperand(N: 1);
1666 }
1667
1668 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1669 /// to be a recipe.
1670 virtual VPRecipeBase &getBackedgeRecipe() {
1671 return *getBackedgeValue()->getDefiningRecipe();
1672 }
1673};
1674
1675/// A recipe for handling phi nodes of integer and floating-point inductions,
1676/// producing their vector values.
1677class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
1678 PHINode *IV;
1679 TruncInst *Trunc;
1680 const InductionDescriptor &IndDesc;
1681
1682public:
1683 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1684 const InductionDescriptor &IndDesc)
1685 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1686 Trunc(nullptr), IndDesc(IndDesc) {
1687 addOperand(Operand: Step);
1688 }
1689
1690 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1691 const InductionDescriptor &IndDesc,
1692 TruncInst *Trunc)
1693 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1694 IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1695 addOperand(Operand: Step);
1696 }
1697
1698 ~VPWidenIntOrFpInductionRecipe() override = default;
1699
1700 VPWidenIntOrFpInductionRecipe *clone() override {
1701 return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
1702 getStepValue(), IndDesc, Trunc);
1703 }
1704
1705 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1706
1707 /// Generate the vectorized and scalarized versions of the phi node as
1708 /// needed by their users.
1709 void execute(VPTransformState &State) override;
1710
1711#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1712 /// Print the recipe.
1713 void print(raw_ostream &O, const Twine &Indent,
1714 VPSlotTracker &SlotTracker) const override;
1715#endif
1716
1717 VPValue *getBackedgeValue() override {
1718 // TODO: All operands of base recipe must exist and be at same index in
1719 // derived recipe.
1720 llvm_unreachable(
1721 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1722 }
1723
1724 VPRecipeBase &getBackedgeRecipe() override {
1725 // TODO: All operands of base recipe must exist and be at same index in
1726 // derived recipe.
1727 llvm_unreachable(
1728 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1729 }
1730
1731 /// Returns the step value of the induction.
1732 VPValue *getStepValue() { return getOperand(N: 1); }
1733 const VPValue *getStepValue() const { return getOperand(N: 1); }
1734
1735 /// Returns the first defined value as TruncInst, if it is one or nullptr
1736 /// otherwise.
1737 TruncInst *getTruncInst() { return Trunc; }
1738 const TruncInst *getTruncInst() const { return Trunc; }
1739
1740 PHINode *getPHINode() { return IV; }
1741
1742 /// Returns the induction descriptor for the recipe.
1743 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1744
1745 /// Returns true if the induction is canonical, i.e. starting at 0 and
1746 /// incremented by UF * VF (= the original IV is incremented by 1).
1747 bool isCanonical() const;
1748
1749 /// Returns the scalar type of the induction.
1750 Type *getScalarType() const {
1751 return Trunc ? Trunc->getType() : IV->getType();
1752 }
1753};
1754
1755class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
1756 const InductionDescriptor &IndDesc;
1757
1758 bool IsScalarAfterVectorization;
1759
1760public:
1761 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1762 /// Start.
1763 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
1764 const InductionDescriptor &IndDesc,
1765 bool IsScalarAfterVectorization)
1766 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1767 IndDesc(IndDesc),
1768 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1769 addOperand(Operand: Start);
1770 addOperand(Operand: Step);
1771 }
1772
1773 ~VPWidenPointerInductionRecipe() override = default;
1774
1775 VPWidenPointerInductionRecipe *clone() override {
1776 return new VPWidenPointerInductionRecipe(
1777 cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: 0), getOperand(N: 1),
1778 IndDesc, IsScalarAfterVectorization);
1779 }
1780
1781 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1782
1783 /// Generate vector values for the pointer induction.
1784 void execute(VPTransformState &State) override;
1785
1786 /// Returns true if only scalar values will be generated.
1787 bool onlyScalarsGenerated(bool IsScalable);
1788
1789 /// Returns the induction descriptor for the recipe.
1790 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1791
1792#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1793 /// Print the recipe.
1794 void print(raw_ostream &O, const Twine &Indent,
1795 VPSlotTracker &SlotTracker) const override;
1796#endif
1797};
1798
1799/// A recipe for handling phis that are widened in the vector loop.
1800/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1801/// managed in the recipe directly.
1802class VPWidenPHIRecipe : public VPSingleDefRecipe {
1803 /// List of incoming blocks. Only used in the VPlan native path.
1804 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1805
1806public:
1807 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1808 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1809 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1810 if (Start)
1811 addOperand(Operand: Start);
1812 }
1813
1814 VPWidenPHIRecipe *clone() override {
1815 llvm_unreachable("cloning not implemented yet");
1816 }
1817
1818 ~VPWidenPHIRecipe() override = default;
1819
1820 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1821
1822 /// Generate the phi/select nodes.
1823 void execute(VPTransformState &State) override;
1824
1825#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1826 /// Print the recipe.
1827 void print(raw_ostream &O, const Twine &Indent,
1828 VPSlotTracker &SlotTracker) const override;
1829#endif
1830
1831 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1832 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1833 addOperand(Operand: IncomingV);
1834 IncomingBlocks.push_back(Elt: IncomingBlock);
1835 }
1836
1837 /// Returns the \p I th incoming VPBasicBlock.
1838 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1839
1840 /// Returns the \p I th incoming VPValue.
1841 VPValue *getIncomingValue(unsigned I) { return getOperand(N: I); }
1842};
1843
1844/// A recipe for handling first-order recurrence phis. The start value is the
1845/// first operand of the recipe and the incoming value from the backedge is the
1846/// second operand.
1847struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
1848 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
1849 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1850
1851 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1852
1853 static inline bool classof(const VPHeaderPHIRecipe *R) {
1854 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1855 }
1856
1857 VPFirstOrderRecurrencePHIRecipe *clone() override {
1858 return new VPFirstOrderRecurrencePHIRecipe(
1859 cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: 0));
1860 }
1861
1862 void execute(VPTransformState &State) override;
1863
1864#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1865 /// Print the recipe.
1866 void print(raw_ostream &O, const Twine &Indent,
1867 VPSlotTracker &SlotTracker) const override;
1868#endif
1869};
1870
1871/// A recipe for handling reduction phis. The start value is the first operand
1872/// of the recipe and the incoming value from the backedge is the second
1873/// operand.
1874class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
1875 /// Descriptor for the reduction.
1876 const RecurrenceDescriptor &RdxDesc;
1877
1878 /// The phi is part of an in-loop reduction.
1879 bool IsInLoop;
1880
1881 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1882 bool IsOrdered;
1883
1884public:
1885 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1886 /// RdxDesc.
1887 VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
1888 VPValue &Start, bool IsInLoop = false,
1889 bool IsOrdered = false)
1890 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1891 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1892 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1893 }
1894
1895 ~VPReductionPHIRecipe() override = default;
1896
1897 VPReductionPHIRecipe *clone() override {
1898 auto *R =
1899 new VPReductionPHIRecipe(cast<PHINode>(Val: getUnderlyingInstr()), RdxDesc,
1900 *getOperand(N: 0), IsInLoop, IsOrdered);
1901 R->addOperand(Operand: getBackedgeValue());
1902 return R;
1903 }
1904
1905 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1906
1907 static inline bool classof(const VPHeaderPHIRecipe *R) {
1908 return R->getVPDefID() == VPDef::VPReductionPHISC;
1909 }
1910
1911 /// Generate the phi/select nodes.
1912 void execute(VPTransformState &State) override;
1913
1914#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1915 /// Print the recipe.
1916 void print(raw_ostream &O, const Twine &Indent,
1917 VPSlotTracker &SlotTracker) const override;
1918#endif
1919
1920 const RecurrenceDescriptor &getRecurrenceDescriptor() const {
1921 return RdxDesc;
1922 }
1923
1924 /// Returns true, if the phi is part of an ordered reduction.
1925 bool isOrdered() const { return IsOrdered; }
1926
1927 /// Returns true, if the phi is part of an in-loop reduction.
1928 bool isInLoop() const { return IsInLoop; }
1929};
1930
1931/// A recipe for vectorizing a phi-node as a sequence of mask-based select
1932/// instructions.
1933class VPBlendRecipe : public VPSingleDefRecipe {
1934public:
1935 /// The blend operation is a User of the incoming values and of their
1936 /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
1937 /// incoming value does not have a mask associated.
1938 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1939 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
1940 assert((Operands.size() + 1) % 2 == 0 &&
1941 "Expected an odd number of operands");
1942 }
1943
1944 VPBlendRecipe *clone() override {
1945 SmallVector<VPValue *> Ops(operands());
1946 return new VPBlendRecipe(cast<PHINode>(Val: getUnderlyingValue()), Ops);
1947 }
1948
1949 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1950
1951 /// Return the number of incoming values, taking into account that the first
1952 /// incoming value has no mask.
1953 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1954
1955 /// Return incoming value number \p Idx.
1956 VPValue *getIncomingValue(unsigned Idx) const {
1957 return Idx == 0 ? getOperand(N: 0) : getOperand(N: Idx * 2 - 1);
1958 }
1959
1960 /// Return mask number \p Idx.
1961 VPValue *getMask(unsigned Idx) const {
1962 assert(Idx > 0 && "First index has no mask associated.");
1963 return getOperand(N: Idx * 2);
1964 }
1965
1966 /// Generate the phi/select nodes.
1967 void execute(VPTransformState &State) override;
1968
1969#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1970 /// Print the recipe.
1971 void print(raw_ostream &O, const Twine &Indent,
1972 VPSlotTracker &SlotTracker) const override;
1973#endif
1974
1975 /// Returns true if the recipe only uses the first lane of operand \p Op.
1976 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1977 assert(is_contained(operands(), Op) &&
1978 "Op must be an operand of the recipe");
1979 // Recursing through Blend recipes only, must terminate at header phi's the
1980 // latest.
1981 return all_of(Range: users(),
1982 P: [this](VPUser *U) { return U->onlyFirstLaneUsed(Op: this); });
1983 }
1984};
1985
1986/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1987/// or stores into one wide load/store and shuffles. The first operand of a
1988/// VPInterleave recipe is the address, followed by the stored values, followed
1989/// by an optional mask.
1990class VPInterleaveRecipe : public VPRecipeBase {
1991 const InterleaveGroup<Instruction> *IG;
1992
1993 /// Indicates if the interleave group is in a conditional block and requires a
1994 /// mask.
1995 bool HasMask = false;
1996
1997 /// Indicates if gaps between members of the group need to be masked out or if
1998 /// unusued gaps can be loaded speculatively.
1999 bool NeedsMaskForGaps = false;
2000
2001public:
2002 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2003 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2004 bool NeedsMaskForGaps)
2005 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2006 NeedsMaskForGaps(NeedsMaskForGaps) {
2007 for (unsigned i = 0; i < IG->getFactor(); ++i)
2008 if (Instruction *I = IG->getMember(Index: i)) {
2009 if (I->getType()->isVoidTy())
2010 continue;
2011 new VPValue(I, this);
2012 }
2013
2014 for (auto *SV : StoredValues)
2015 addOperand(Operand: SV);
2016 if (Mask) {
2017 HasMask = true;
2018 addOperand(Operand: Mask);
2019 }
2020 }
2021 ~VPInterleaveRecipe() override = default;
2022
2023 VPInterleaveRecipe *clone() override {
2024 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2025 NeedsMaskForGaps);
2026 }
2027
2028 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2029
2030 /// Return the address accessed by this recipe.
2031 VPValue *getAddr() const {
2032 return getOperand(N: 0); // Address is the 1st, mandatory operand.
2033 }
2034
2035 /// Return the mask used by this recipe. Note that a full mask is represented
2036 /// by a nullptr.
2037 VPValue *getMask() const {
2038 // Mask is optional and therefore the last, currently 2nd operand.
2039 return HasMask ? getOperand(N: getNumOperands() - 1) : nullptr;
2040 }
2041
2042 /// Return the VPValues stored by this interleave group. If it is a load
2043 /// interleave group, return an empty ArrayRef.
2044 ArrayRef<VPValue *> getStoredValues() const {
2045 // The first operand is the address, followed by the stored values, followed
2046 // by an optional mask.
2047 return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2048 .slice(N: 1, M: getNumStoreOperands());
2049 }
2050
2051 /// Generate the wide load or store, and shuffles.
2052 void execute(VPTransformState &State) override;
2053
2054#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2055 /// Print the recipe.
2056 void print(raw_ostream &O, const Twine &Indent,
2057 VPSlotTracker &SlotTracker) const override;
2058#endif
2059
2060 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2061
2062 /// Returns the number of stored operands of this interleave group. Returns 0
2063 /// for load interleave groups.
2064 unsigned getNumStoreOperands() const {
2065 return getNumOperands() - (HasMask ? 2 : 1);
2066 }
2067
2068 /// The recipe only uses the first lane of the address.
2069 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2070 assert(is_contained(operands(), Op) &&
2071 "Op must be an operand of the recipe");
2072 return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op);
2073 }
2074};
2075
2076/// A recipe to represent inloop reduction operations, performing a reduction on
2077/// a vector operand into a scalar value, and adding the result to a chain.
2078/// The Operands are {ChainOp, VecOp, [Condition]}.
2079class VPReductionRecipe : public VPSingleDefRecipe {
2080 /// The recurrence decriptor for the reduction in question.
2081 const RecurrenceDescriptor &RdxDesc;
2082 bool IsOrdered;
2083
2084public:
2085 VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
2086 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2087 bool IsOrdered)
2088 : VPSingleDefRecipe(VPDef::VPReductionSC,
2089 ArrayRef<VPValue *>({ChainOp, VecOp}), I),
2090 RdxDesc(R), IsOrdered(IsOrdered) {
2091 if (CondOp)
2092 addOperand(Operand: CondOp);
2093 }
2094
2095 ~VPReductionRecipe() override = default;
2096
2097 VPReductionRecipe *clone() override {
2098 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2099 getVecOp(), getCondOp(), IsOrdered);
2100 }
2101
2102 VP_CLASSOF_IMPL(VPDef::VPReductionSC)
2103
2104 /// Generate the reduction in the loop
2105 void execute(VPTransformState &State) override;
2106
2107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2108 /// Print the recipe.
2109 void print(raw_ostream &O, const Twine &Indent,
2110 VPSlotTracker &SlotTracker) const override;
2111#endif
2112
2113 /// The VPValue of the scalar Chain being accumulated.
2114 VPValue *getChainOp() const { return getOperand(N: 0); }
2115 /// The VPValue of the vector value to be reduced.
2116 VPValue *getVecOp() const { return getOperand(N: 1); }
2117 /// The VPValue of the condition for the block.
2118 VPValue *getCondOp() const {
2119 return getNumOperands() > 2 ? getOperand(N: 2) : nullptr;
2120 }
2121};
2122
2123/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2124/// copies of the original scalar type, one per lane, instead of producing a
2125/// single copy of widened type for all lanes. If the instruction is known to be
2126/// uniform only one copy, per lane zero, will be generated.
2127class VPReplicateRecipe : public VPRecipeWithIRFlags {
2128 /// Indicator if only a single replica per lane is needed.
2129 bool IsUniform;
2130
2131 /// Indicator if the replicas are also predicated.
2132 bool IsPredicated;
2133
2134public:
2135 template <typename IterT>
2136 VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
2137 bool IsUniform, VPValue *Mask = nullptr)
2138 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2139 IsUniform(IsUniform), IsPredicated(Mask) {
2140 if (Mask)
2141 addOperand(Operand: Mask);
2142 }
2143
2144 ~VPReplicateRecipe() override = default;
2145
2146 VPReplicateRecipe *clone() override {
2147 auto *Copy =
2148 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2149 isPredicated() ? getMask() : nullptr);
2150 Copy->transferFlags(Other&: *this);
2151 return Copy;
2152 }
2153
2154 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2155
2156 /// Generate replicas of the desired Ingredient. Replicas will be generated
2157 /// for all parts and lanes unless a specific part and lane are specified in
2158 /// the \p State.
2159 void execute(VPTransformState &State) override;
2160
2161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2162 /// Print the recipe.
2163 void print(raw_ostream &O, const Twine &Indent,
2164 VPSlotTracker &SlotTracker) const override;
2165#endif
2166
2167 bool isUniform() const { return IsUniform; }
2168
2169 bool isPredicated() const { return IsPredicated; }
2170
2171 /// Returns true if the recipe only uses the first lane of operand \p Op.
2172 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2173 assert(is_contained(operands(), Op) &&
2174 "Op must be an operand of the recipe");
2175 return isUniform();
2176 }
2177
2178 /// Returns true if the recipe uses scalars of operand \p Op.
2179 bool usesScalars(const VPValue *Op) const override {
2180 assert(is_contained(operands(), Op) &&
2181 "Op must be an operand of the recipe");
2182 return true;
2183 }
2184
2185 /// Returns true if the recipe is used by a widened recipe via an intervening
2186 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2187 /// in a vector.
2188 bool shouldPack() const;
2189
2190 /// Return the mask of a predicated VPReplicateRecipe.
2191 VPValue *getMask() {
2192 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2193 return getOperand(N: getNumOperands() - 1);
2194 }
2195
2196 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2197};
2198
2199/// A recipe for generating conditional branches on the bits of a mask.
2200class VPBranchOnMaskRecipe : public VPRecipeBase {
2201public:
2202 VPBranchOnMaskRecipe(VPValue *BlockInMask)
2203 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2204 if (BlockInMask) // nullptr means all-one mask.
2205 addOperand(Operand: BlockInMask);
2206 }
2207
2208 VPBranchOnMaskRecipe *clone() override {
2209 return new VPBranchOnMaskRecipe(getOperand(N: 0));
2210 }
2211
2212 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2213
2214 /// Generate the extraction of the appropriate bit from the block mask and the
2215 /// conditional branch.
2216 void execute(VPTransformState &State) override;
2217
2218#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2219 /// Print the recipe.
2220 void print(raw_ostream &O, const Twine &Indent,
2221 VPSlotTracker &SlotTracker) const override {
2222 O << Indent << "BRANCH-ON-MASK ";
2223 if (VPValue *Mask = getMask())
2224 Mask->printAsOperand(OS&: O, Tracker&: SlotTracker);
2225 else
2226 O << " All-One";
2227 }
2228#endif
2229
2230 /// Return the mask used by this recipe. Note that a full mask is represented
2231 /// by a nullptr.
2232 VPValue *getMask() const {
2233 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2234 // Mask is optional.
2235 return getNumOperands() == 1 ? getOperand(N: 0) : nullptr;
2236 }
2237
2238 /// Returns true if the recipe uses scalars of operand \p Op.
2239 bool usesScalars(const VPValue *Op) const override {
2240 assert(is_contained(operands(), Op) &&
2241 "Op must be an operand of the recipe");
2242 return true;
2243 }
2244};
2245
2246/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2247/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2248/// order to merge values that are set under such a branch and feed their uses.
2249/// The phi nodes can be scalar or vector depending on the users of the value.
2250/// This recipe works in concert with VPBranchOnMaskRecipe.
2251class VPPredInstPHIRecipe : public VPSingleDefRecipe {
2252public:
2253 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2254 /// nodes after merging back from a Branch-on-Mask.
2255 VPPredInstPHIRecipe(VPValue *PredV)
2256 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2257 ~VPPredInstPHIRecipe() override = default;
2258
2259 VPPredInstPHIRecipe *clone() override {
2260 return new VPPredInstPHIRecipe(getOperand(N: 0));
2261 }
2262
2263 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2264
2265 /// Generates phi nodes for live-outs as needed to retain SSA form.
2266 void execute(VPTransformState &State) override;
2267
2268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2269 /// Print the recipe.
2270 void print(raw_ostream &O, const Twine &Indent,
2271 VPSlotTracker &SlotTracker) const override;
2272#endif
2273
2274 /// Returns true if the recipe uses scalars of operand \p Op.
2275 bool usesScalars(const VPValue *Op) const override {
2276 assert(is_contained(operands(), Op) &&
2277 "Op must be an operand of the recipe");
2278 return true;
2279 }
2280};
2281
2282/// A common base class for widening memory operations. An optional mask can be
2283/// provided as the last operand.
2284class VPWidenMemoryRecipe : public VPRecipeBase {
2285protected:
2286 Instruction &Ingredient;
2287
2288 /// Whether the accessed addresses are consecutive.
2289 bool Consecutive;
2290
2291 /// Whether the consecutive accessed addresses are in reverse order.
2292 bool Reverse;
2293
2294 /// Whether the memory access is masked.
2295 bool IsMasked = false;
2296
2297 void setMask(VPValue *Mask) {
2298 assert(!IsMasked && "cannot re-set mask");
2299 if (!Mask)
2300 return;
2301 addOperand(Operand: Mask);
2302 IsMasked = true;
2303 }
2304
2305 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2306 std::initializer_list<VPValue *> Operands,
2307 bool Consecutive, bool Reverse, DebugLoc DL)
2308 : VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2309 Reverse(Reverse) {
2310 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2311 }
2312
2313public:
2314 VPWidenMemoryRecipe *clone() override {
2315 llvm_unreachable("cloning not supported");
2316 }
2317
2318 static inline bool classof(const VPRecipeBase *R) {
2319 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2320 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2321 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2322 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2323 }
2324
2325 static inline bool classof(const VPUser *U) {
2326 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2327 return R && classof(R);
2328 }
2329
2330 /// Return whether the loaded-from / stored-to addresses are consecutive.
2331 bool isConsecutive() const { return Consecutive; }
2332
2333 /// Return whether the consecutive loaded/stored addresses are in reverse
2334 /// order.
2335 bool isReverse() const { return Reverse; }
2336
2337 /// Return the address accessed by this recipe.
2338 VPValue *getAddr() const { return getOperand(N: 0); }
2339
2340 /// Returns true if the recipe is masked.
2341 bool isMasked() const { return IsMasked; }
2342
2343 /// Return the mask used by this recipe. Note that a full mask is represented
2344 /// by a nullptr.
2345 VPValue *getMask() const {
2346 // Mask is optional and therefore the last operand.
2347 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
2348 }
2349
2350 /// Generate the wide load/store.
2351 void execute(VPTransformState &State) override {
2352 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2353 }
2354
2355 Instruction &getIngredient() const { return Ingredient; }
2356};
2357
2358/// A recipe for widening load operations, using the address to load from and an
2359/// optional mask.
2360struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2361 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2362 bool Consecutive, bool Reverse, DebugLoc DL)
2363 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2364 Reverse, DL),
2365 VPValue(this, &Load) {
2366 setMask(Mask);
2367 }
2368
2369 VPWidenLoadRecipe *clone() override {
2370 return new VPWidenLoadRecipe(cast<LoadInst>(Val&: Ingredient), getAddr(),
2371 getMask(), Consecutive, Reverse,
2372 getDebugLoc());
2373 }
2374
2375 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2376
2377 /// Generate a wide load or gather.
2378 void execute(VPTransformState &State) override;
2379
2380#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2381 /// Print the recipe.
2382 void print(raw_ostream &O, const Twine &Indent,
2383 VPSlotTracker &SlotTracker) const override;
2384#endif
2385
2386 /// Returns true if the recipe only uses the first lane of operand \p Op.
2387 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2388 assert(is_contained(operands(), Op) &&
2389 "Op must be an operand of the recipe");
2390 // Widened, consecutive loads operations only demand the first lane of
2391 // their address.
2392 return Op == getAddr() && isConsecutive();
2393 }
2394};
2395
2396/// A recipe for widening load operations with vector-predication intrinsics,
2397/// using the address to load from, the explicit vector length and an optional
2398/// mask.
2399struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2400 VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
2401 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2402 {L->getAddr(), EVL}, L->isConsecutive(), false,
2403 L->getDebugLoc()),
2404 VPValue(this, &getIngredient()) {
2405 setMask(Mask);
2406 }
2407
2408 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2409
2410 /// Return the EVL operand.
2411 VPValue *getEVL() const { return getOperand(N: 1); }
2412
2413 /// Generate the wide load or gather.
2414 void execute(VPTransformState &State) override;
2415
2416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2417 /// Print the recipe.
2418 void print(raw_ostream &O, const Twine &Indent,
2419 VPSlotTracker &SlotTracker) const override;
2420#endif
2421
2422 /// Returns true if the recipe only uses the first lane of operand \p Op.
2423 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2424 assert(is_contained(operands(), Op) &&
2425 "Op must be an operand of the recipe");
2426 // Widened loads only demand the first lane of EVL and consecutive loads
2427 // only demand the first lane of their address.
2428 return Op == getEVL() || (Op == getAddr() && isConsecutive());
2429 }
2430};
2431
2432/// A recipe for widening store operations, using the stored value, the address
2433/// to store to and an optional mask.
2434struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
2435 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2436 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2437 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2438 Consecutive, Reverse, DL) {
2439 setMask(Mask);
2440 }
2441
2442 VPWidenStoreRecipe *clone() override {
2443 return new VPWidenStoreRecipe(cast<StoreInst>(Val&: Ingredient), getAddr(),
2444 getStoredValue(), getMask(), Consecutive,
2445 Reverse, getDebugLoc());
2446 }
2447
2448 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2449
2450 /// Return the value stored by this recipe.
2451 VPValue *getStoredValue() const { return getOperand(N: 1); }
2452
2453 /// Generate a wide store or scatter.
2454 void execute(VPTransformState &State) override;
2455
2456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2457 /// Print the recipe.
2458 void print(raw_ostream &O, const Twine &Indent,
2459 VPSlotTracker &SlotTracker) const override;
2460#endif
2461
2462 /// Returns true if the recipe only uses the first lane of operand \p Op.
2463 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2464 assert(is_contained(operands(), Op) &&
2465 "Op must be an operand of the recipe");
2466 // Widened, consecutive stores only demand the first lane of their address,
2467 // unless the same operand is also stored.
2468 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2469 }
2470};
2471
2472/// A recipe for widening store operations with vector-predication intrinsics,
2473/// using the value to store, the address to store to, the explicit vector
2474/// length and an optional mask.
2475struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
2476 VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
2477 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
2478 {S->getAddr(), S->getStoredValue(), EVL},
2479 S->isConsecutive(), false, S->getDebugLoc()) {
2480 setMask(Mask);
2481 }
2482
2483 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2484
2485 /// Return the address accessed by this recipe.
2486 VPValue *getStoredValue() const { return getOperand(N: 1); }
2487
2488 /// Return the EVL operand.
2489 VPValue *getEVL() const { return getOperand(N: 2); }
2490
2491 /// Generate the wide store or scatter.
2492 void execute(VPTransformState &State) override;
2493
2494#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2495 /// Print the recipe.
2496 void print(raw_ostream &O, const Twine &Indent,
2497 VPSlotTracker &SlotTracker) const override;
2498#endif
2499
2500 /// Returns true if the recipe only uses the first lane of operand \p Op.
2501 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2502 assert(is_contained(operands(), Op) &&
2503 "Op must be an operand of the recipe");
2504 if (Op == getEVL()) {
2505 assert(getStoredValue() != Op && "unexpected store of EVL");
2506 return true;
2507 }
2508 // Widened, consecutive memory operations only demand the first lane of
2509 // their address, unless the same operand is also stored. That latter can
2510 // happen with opaque pointers.
2511 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2512 }
2513};
2514
2515/// Recipe to expand a SCEV expression.
2516class VPExpandSCEVRecipe : public VPSingleDefRecipe {
2517 const SCEV *Expr;
2518 ScalarEvolution &SE;
2519
2520public:
2521 VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
2522 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2523
2524 ~VPExpandSCEVRecipe() override = default;
2525
2526 VPExpandSCEVRecipe *clone() override {
2527 return new VPExpandSCEVRecipe(Expr, SE);
2528 }
2529
2530 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2531
2532 /// Generate a canonical vector induction variable of the vector loop, with
2533 void execute(VPTransformState &State) override;
2534
2535#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2536 /// Print the recipe.
2537 void print(raw_ostream &O, const Twine &Indent,
2538 VPSlotTracker &SlotTracker) const override;
2539#endif
2540
2541 const SCEV *getSCEV() const { return Expr; }
2542};
2543
2544/// Canonical scalar induction phi of the vector loop. Starting at the specified
2545/// start value (either 0 or the resume value when vectorizing the epilogue
2546/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2547/// canonical induction variable.
2548class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
2549public:
2550 VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
2551 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2552
2553 ~VPCanonicalIVPHIRecipe() override = default;
2554
2555 VPCanonicalIVPHIRecipe *clone() override {
2556 auto *R = new VPCanonicalIVPHIRecipe(getOperand(N: 0), getDebugLoc());
2557 R->addOperand(Operand: getBackedgeValue());
2558 return R;
2559 }
2560
2561 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2562
2563 static inline bool classof(const VPHeaderPHIRecipe *D) {
2564 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2565 }
2566
2567 /// Generate the canonical scalar induction phi of the vector loop.
2568 void execute(VPTransformState &State) override;
2569
2570#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2571 /// Print the recipe.
2572 void print(raw_ostream &O, const Twine &Indent,
2573 VPSlotTracker &SlotTracker) const override;
2574#endif
2575
2576 /// Returns the scalar type of the induction.
2577 Type *getScalarType() const {
2578 return getStartValue()->getLiveInIRValue()->getType();
2579 }
2580
2581 /// Returns true if the recipe only uses the first lane of operand \p Op.
2582 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2583 assert(is_contained(operands(), Op) &&
2584 "Op must be an operand of the recipe");
2585 return true;
2586 }
2587
2588 /// Returns true if the recipe only uses the first part of operand \p Op.
2589 bool onlyFirstPartUsed(const VPValue *Op) const override {
2590 assert(is_contained(operands(), Op) &&
2591 "Op must be an operand of the recipe");
2592 return true;
2593 }
2594
2595 /// Check if the induction described by \p Kind, /p Start and \p Step is
2596 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2597 bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
2598 VPValue *Step) const;
2599};
2600
2601/// A recipe for generating the active lane mask for the vector loop that is
2602/// used to predicate the vector operations.
2603/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2604/// remove VPActiveLaneMaskPHIRecipe.
2605class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
2606public:
2607 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
2608 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2609 DL) {}
2610
2611 ~VPActiveLaneMaskPHIRecipe() override = default;
2612
2613 VPActiveLaneMaskPHIRecipe *clone() override {
2614 return new VPActiveLaneMaskPHIRecipe(getOperand(N: 0), getDebugLoc());
2615 }
2616
2617 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2618
2619 static inline bool classof(const VPHeaderPHIRecipe *D) {
2620 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2621 }
2622
2623 /// Generate the active lane mask phi of the vector loop.
2624 void execute(VPTransformState &State) override;
2625
2626#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2627 /// Print the recipe.
2628 void print(raw_ostream &O, const Twine &Indent,
2629 VPSlotTracker &SlotTracker) const override;
2630#endif
2631};
2632
2633/// A recipe for generating the phi node for the current index of elements,
2634/// adjusted in accordance with EVL value. It starts at the start value of the
2635/// canonical induction and gets incremented by EVL in each iteration of the
2636/// vector loop.
2637class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
2638public:
2639 VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
2640 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2641
2642 ~VPEVLBasedIVPHIRecipe() override = default;
2643
2644 VPEVLBasedIVPHIRecipe *clone() override {
2645 llvm_unreachable("cloning not implemented yet");
2646 }
2647
2648 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2649
2650 static inline bool classof(const VPHeaderPHIRecipe *D) {
2651 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2652 }
2653
2654 /// Generate phi for handling IV based on EVL over iterations correctly.
2655 /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2656 void execute(VPTransformState &State) override;
2657
2658 /// Returns true if the recipe only uses the first lane of operand \p Op.
2659 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2660 assert(is_contained(operands(), Op) &&
2661 "Op must be an operand of the recipe");
2662 return true;
2663 }
2664
2665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2666 /// Print the recipe.
2667 void print(raw_ostream &O, const Twine &Indent,
2668 VPSlotTracker &SlotTracker) const override;
2669#endif
2670};
2671
2672/// A Recipe for widening the canonical induction variable of the vector loop.
2673class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
2674public:
2675 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
2676 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2677
2678 ~VPWidenCanonicalIVRecipe() override = default;
2679
2680 VPWidenCanonicalIVRecipe *clone() override {
2681 return new VPWidenCanonicalIVRecipe(
2682 cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: 0)));
2683 }
2684
2685 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2686
2687 /// Generate a canonical vector induction variable of the vector loop, with
2688 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2689 /// step = <VF*UF, VF*UF, ..., VF*UF>.
2690 void execute(VPTransformState &State) override;
2691
2692#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2693 /// Print the recipe.
2694 void print(raw_ostream &O, const Twine &Indent,
2695 VPSlotTracker &SlotTracker) const override;
2696#endif
2697
2698 /// Returns the scalar type of the induction.
2699 const Type *getScalarType() const {
2700 return cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: 0)->getDefiningRecipe())
2701 ->getScalarType();
2702 }
2703};
2704
2705/// A recipe for converting the input value \p IV value to the corresponding
2706/// value of an IV with different start and step values, using Start + IV *
2707/// Step.
2708class VPDerivedIVRecipe : public VPSingleDefRecipe {
2709 /// Kind of the induction.
2710 const InductionDescriptor::InductionKind Kind;
2711 /// If not nullptr, the floating point induction binary operator. Must be set
2712 /// for floating point inductions.
2713 const FPMathOperator *FPBinOp;
2714
2715public:
2716 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
2717 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
2718 : VPDerivedIVRecipe(
2719 IndDesc.getKind(),
2720 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()),
2721 Start, CanonicalIV, Step) {}
2722
2723 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
2724 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
2725 VPValue *Step)
2726 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2727 FPBinOp(FPBinOp) {}
2728
2729 ~VPDerivedIVRecipe() override = default;
2730
2731 VPDerivedIVRecipe *clone() override {
2732 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(N: 1),
2733 getStepValue());
2734 }
2735
2736 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2737
2738 /// Generate the transformed value of the induction at offset StartValue (1.
2739 /// operand) + IV (2. operand) * StepValue (3, operand).
2740 void execute(VPTransformState &State) override;
2741
2742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2743 /// Print the recipe.
2744 void print(raw_ostream &O, const Twine &Indent,
2745 VPSlotTracker &SlotTracker) const override;
2746#endif
2747
2748 Type *getScalarType() const {
2749 return getStartValue()->getLiveInIRValue()->getType();
2750 }
2751
2752 VPValue *getStartValue() const { return getOperand(N: 0); }
2753 VPValue *getStepValue() const { return getOperand(N: 2); }
2754
2755 /// Returns true if the recipe only uses the first lane of operand \p Op.
2756 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2757 assert(is_contained(operands(), Op) &&
2758 "Op must be an operand of the recipe");
2759 return true;
2760 }
2761};
2762
2763/// A recipe for handling phi nodes of integer and floating-point inductions,
2764/// producing their scalar values.
2765class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
2766 Instruction::BinaryOps InductionOpcode;
2767
2768public:
2769 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
2770 Instruction::BinaryOps Opcode, FastMathFlags FMFs)
2771 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2772 ArrayRef<VPValue *>({IV, Step}), FMFs),
2773 InductionOpcode(Opcode) {}
2774
2775 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
2776 VPValue *Step)
2777 : VPScalarIVStepsRecipe(
2778 IV, Step, IndDesc.getInductionOpcode(),
2779 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp())
2780 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2781 : FastMathFlags()) {}
2782
2783 ~VPScalarIVStepsRecipe() override = default;
2784
2785 VPScalarIVStepsRecipe *clone() override {
2786 return new VPScalarIVStepsRecipe(
2787 getOperand(N: 0), getOperand(N: 1), InductionOpcode,
2788 hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
2789 }
2790
2791 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2792
2793 /// Generate the scalarized versions of the phi node as needed by their users.
2794 void execute(VPTransformState &State) override;
2795
2796#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2797 /// Print the recipe.
2798 void print(raw_ostream &O, const Twine &Indent,
2799 VPSlotTracker &SlotTracker) const override;
2800#endif
2801
2802 VPValue *getStepValue() const { return getOperand(N: 1); }
2803
2804 /// Returns true if the recipe only uses the first lane of operand \p Op.
2805 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2806 assert(is_contained(operands(), Op) &&
2807 "Op must be an operand of the recipe");
2808 return true;
2809 }
2810};
2811
2812/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2813/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2814/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2815class VPBasicBlock : public VPBlockBase {
2816public:
2817 using RecipeListTy = iplist<VPRecipeBase>;
2818
2819private:
2820 /// The VPRecipes held in the order of output instructions to generate.
2821 RecipeListTy Recipes;
2822
2823public:
2824 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2825 : VPBlockBase(VPBasicBlockSC, Name.str()) {
2826 if (Recipe)
2827 appendRecipe(Recipe);
2828 }
2829
2830 ~VPBasicBlock() override {
2831 while (!Recipes.empty())
2832 Recipes.pop_back();
2833 }
2834
2835 /// Instruction iterators...
2836 using iterator = RecipeListTy::iterator;
2837 using const_iterator = RecipeListTy::const_iterator;
2838 using reverse_iterator = RecipeListTy::reverse_iterator;
2839 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
2840
2841 //===--------------------------------------------------------------------===//
2842 /// Recipe iterator methods
2843 ///
2844 inline iterator begin() { return Recipes.begin(); }
2845 inline const_iterator begin() const { return Recipes.begin(); }
2846 inline iterator end() { return Recipes.end(); }
2847 inline const_iterator end() const { return Recipes.end(); }
2848
2849 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
2850 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
2851 inline reverse_iterator rend() { return Recipes.rend(); }
2852 inline const_reverse_iterator rend() const { return Recipes.rend(); }
2853
2854 inline size_t size() const { return Recipes.size(); }
2855 inline bool empty() const { return Recipes.empty(); }
2856 inline const VPRecipeBase &front() const { return Recipes.front(); }
2857 inline VPRecipeBase &front() { return Recipes.front(); }
2858 inline const VPRecipeBase &back() const { return Recipes.back(); }
2859 inline VPRecipeBase &back() { return Recipes.back(); }
2860
2861 /// Returns a reference to the list of recipes.
2862 RecipeListTy &getRecipeList() { return Recipes; }
2863
2864 /// Returns a pointer to a member of the recipe list.
2865 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
2866 return &VPBasicBlock::Recipes;
2867 }
2868
2869 /// Method to support type inquiry through isa, cast, and dyn_cast.
2870 static inline bool classof(const VPBlockBase *V) {
2871 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
2872 }
2873
2874 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
2875 assert(Recipe && "No recipe to append.");
2876 assert(!Recipe->Parent && "Recipe already in VPlan");
2877 Recipe->Parent = this;
2878 Recipes.insert(where: InsertPt, New: Recipe);
2879 }
2880
2881 /// Augment the existing recipes of a VPBasicBlock with an additional
2882 /// \p Recipe as the last recipe.
2883 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); }
2884
2885 /// The method which generates the output IR instructions that correspond to
2886 /// this VPBasicBlock, thereby "executing" the VPlan.
2887 void execute(VPTransformState *State) override;
2888
2889 /// Return the position of the first non-phi node recipe in the block.
2890 iterator getFirstNonPhi();
2891
2892 /// Returns an iterator range over the PHI-like recipes in the block.
2893 iterator_range<iterator> phis() {
2894 return make_range(x: begin(), y: getFirstNonPhi());
2895 }
2896
2897 void dropAllReferences(VPValue *NewValue) override;
2898
2899 /// Split current block at \p SplitAt by inserting a new block between the
2900 /// current block and its successors and moving all recipes starting at
2901 /// SplitAt to the new block. Returns the new block.
2902 VPBasicBlock *splitAt(iterator SplitAt);
2903
2904 VPRegionBlock *getEnclosingLoopRegion();
2905
2906#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2907 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2908 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2909 ///
2910 /// Note that the numbering is applied to the whole VPlan, so printing
2911 /// individual blocks is consistent with the whole VPlan printing.
2912 void print(raw_ostream &O, const Twine &Indent,
2913 VPSlotTracker &SlotTracker) const override;
2914 using VPBlockBase::print; // Get the print(raw_stream &O) version.
2915#endif
2916
2917 /// If the block has multiple successors, return the branch recipe terminating
2918 /// the block. If there are no or only a single successor, return nullptr;
2919 VPRecipeBase *getTerminator();
2920 const VPRecipeBase *getTerminator() const;
2921
2922 /// Returns true if the block is exiting it's parent region.
2923 bool isExiting() const;
2924
2925 /// Clone the current block and it's recipes, without updating the operands of
2926 /// the cloned recipes.
2927 VPBasicBlock *clone() override {
2928 auto *NewBlock = new VPBasicBlock(getName());
2929 for (VPRecipeBase &R : *this)
2930 NewBlock->appendRecipe(Recipe: R.clone());
2931 return NewBlock;
2932 }
2933
2934private:
2935 /// Create an IR BasicBlock to hold the output instructions generated by this
2936 /// VPBasicBlock, and return it. Update the CFGState accordingly.
2937 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2938};
2939
2940/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2941/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2942/// A VPRegionBlock may indicate that its contents are to be replicated several
2943/// times. This is designed to support predicated scalarization, in which a
2944/// scalar if-then code structure needs to be generated VF * UF times. Having
2945/// this replication indicator helps to keep a single model for multiple
2946/// candidate VF's. The actual replication takes place only once the desired VF
2947/// and UF have been determined.
2948class VPRegionBlock : public VPBlockBase {
2949 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2950 VPBlockBase *Entry;
2951
2952 /// Hold the Single Exiting block of the SESE region modelled by the
2953 /// VPRegionBlock.
2954 VPBlockBase *Exiting;
2955
2956 /// An indicator whether this region is to generate multiple replicated
2957 /// instances of output IR corresponding to its VPBlockBases.
2958 bool IsReplicator;
2959
2960public:
2961 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
2962 const std::string &Name = "", bool IsReplicator = false)
2963 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2964 IsReplicator(IsReplicator) {
2965 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2966 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2967 Entry->setParent(this);
2968 Exiting->setParent(this);
2969 }
2970 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
2971 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2972 IsReplicator(IsReplicator) {}
2973
2974 ~VPRegionBlock() override {
2975 if (Entry) {
2976 VPValue DummyValue;
2977 Entry->dropAllReferences(NewValue: &DummyValue);
2978 deleteCFG(Entry);
2979 }
2980 }
2981
2982 /// Method to support type inquiry through isa, cast, and dyn_cast.
2983 static inline bool classof(const VPBlockBase *V) {
2984 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2985 }
2986
2987 const VPBlockBase *getEntry() const { return Entry; }
2988 VPBlockBase *getEntry() { return Entry; }
2989
2990 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2991 /// EntryBlock must have no predecessors.
2992 void setEntry(VPBlockBase *EntryBlock) {
2993 assert(EntryBlock->getPredecessors().empty() &&
2994 "Entry block cannot have predecessors.");
2995 Entry = EntryBlock;
2996 EntryBlock->setParent(this);
2997 }
2998
2999 const VPBlockBase *getExiting() const { return Exiting; }
3000 VPBlockBase *getExiting() { return Exiting; }
3001
3002 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3003 /// ExitingBlock must have no successors.
3004 void setExiting(VPBlockBase *ExitingBlock) {
3005 assert(ExitingBlock->getSuccessors().empty() &&
3006 "Exit block cannot have successors.");
3007 Exiting = ExitingBlock;
3008 ExitingBlock->setParent(this);
3009 }
3010
3011 /// Returns the pre-header VPBasicBlock of the loop region.
3012 VPBasicBlock *getPreheaderVPBB() {
3013 assert(!isReplicator() && "should only get pre-header of loop regions");
3014 return getSinglePredecessor()->getExitingBasicBlock();
3015 }
3016
3017 /// An indicator whether this region is to generate multiple replicated
3018 /// instances of output IR corresponding to its VPBlockBases.
3019 bool isReplicator() const { return IsReplicator; }
3020
3021 /// The method which generates the output IR instructions that correspond to
3022 /// this VPRegionBlock, thereby "executing" the VPlan.
3023 void execute(VPTransformState *State) override;
3024
3025 void dropAllReferences(VPValue *NewValue) override;
3026
3027#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3028 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3029 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3030 /// consequtive numbers.
3031 ///
3032 /// Note that the numbering is applied to the whole VPlan, so printing
3033 /// individual regions is consistent with the whole VPlan printing.
3034 void print(raw_ostream &O, const Twine &Indent,
3035 VPSlotTracker &SlotTracker) const override;
3036 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3037#endif
3038
3039 /// Clone all blocks in the single-entry single-exit region of the block and
3040 /// their recipes without updating the operands of the cloned recipes.
3041 VPRegionBlock *clone() override;
3042};
3043
3044/// VPlan models a candidate for vectorization, encoding various decisions take
3045/// to produce efficient output IR, including which branches, basic-blocks and
3046/// output IR instructions to generate, and their cost. VPlan holds a
3047/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3048/// VPBasicBlock.
3049class VPlan {
3050 friend class VPlanPrinter;
3051 friend class VPSlotTracker;
3052
3053 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3054 /// preheader of the vector loop.
3055 VPBasicBlock *Entry;
3056
3057 /// VPBasicBlock corresponding to the original preheader. Used to place
3058 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3059 /// rest of VPlan execution.
3060 VPBasicBlock *Preheader;
3061
3062 /// Holds the VFs applicable to this VPlan.
3063 SmallSetVector<ElementCount, 2> VFs;
3064
3065 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3066 /// any UF.
3067 SmallSetVector<unsigned, 2> UFs;
3068
3069 /// Holds the name of the VPlan, for printing.
3070 std::string Name;
3071
3072 /// Represents the trip count of the original loop, for folding
3073 /// the tail.
3074 VPValue *TripCount = nullptr;
3075
3076 /// Represents the backedge taken count of the original loop, for folding
3077 /// the tail. It equals TripCount - 1.
3078 VPValue *BackedgeTakenCount = nullptr;
3079
3080 /// Represents the vector trip count.
3081 VPValue VectorTripCount;
3082
3083 /// Represents the loop-invariant VF * UF of the vector loop region.
3084 VPValue VFxUF;
3085
3086 /// Holds a mapping between Values and their corresponding VPValue inside
3087 /// VPlan.
3088 Value2VPValueTy Value2VPValue;
3089
3090 /// Contains all the external definitions created for this VPlan. External
3091 /// definitions are VPValues that hold a pointer to their underlying IR.
3092 SmallVector<VPValue *, 16> VPLiveInsToFree;
3093
3094 /// Values used outside the plan.
3095 MapVector<PHINode *, VPLiveOut *> LiveOuts;
3096
3097 /// Mapping from SCEVs to the VPValues representing their expansions.
3098 /// NOTE: This mapping is temporary and will be removed once all users have
3099 /// been modeled in VPlan directly.
3100 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3101
3102public:
3103 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3104 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3105 /// be disconnected, as the bypass blocks between them are not yet modeled in
3106 /// VPlan.
3107 VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3108 : VPlan(Preheader, Entry) {
3109 TripCount = TC;
3110 }
3111
3112 /// Construct a VPlan with original preheader \p Preheader and \p Entry to
3113 /// the plan. At the moment, \p Preheader and \p Entry need to be
3114 /// disconnected, as the bypass blocks between them are not yet modeled in
3115 /// VPlan.
3116 VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3117 : Entry(Entry), Preheader(Preheader) {
3118 Entry->setPlan(this);
3119 Preheader->setPlan(this);
3120 assert(Preheader->getNumSuccessors() == 0 &&
3121 Preheader->getNumPredecessors() == 0 &&
3122 "preheader must be disconnected");
3123 }
3124
3125 ~VPlan();
3126
3127 /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
3128 /// original scalar pre-header) which contains SCEV expansions that need to
3129 /// happen before the CFG is modified; a VPBasicBlock for the vector
3130 /// pre-header, followed by a region for the vector loop, followed by the
3131 /// middle VPBasicBlock.
3132 static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3133 ScalarEvolution &PSE);
3134
3135 /// Prepare the plan for execution, setting up the required live-in values.
3136 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3137 Value *CanonicalIVStartValue, VPTransformState &State);
3138
3139 /// Generate the IR code for this VPlan.
3140 void execute(VPTransformState *State);
3141
3142 VPBasicBlock *getEntry() { return Entry; }
3143 const VPBasicBlock *getEntry() const { return Entry; }
3144
3145 /// The trip count of the original loop.
3146 VPValue *getTripCount() const {
3147 assert(TripCount && "trip count needs to be set before accessing it");
3148 return TripCount;
3149 }
3150
3151 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3152 /// the original trip count have been replaced.
3153 void resetTripCount(VPValue *NewTripCount) {
3154 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3155 "TripCount always must be set");
3156 TripCount = NewTripCount;
3157 }
3158
3159 /// The backedge taken count of the original loop.
3160 VPValue *getOrCreateBackedgeTakenCount() {
3161 if (!BackedgeTakenCount)
3162 BackedgeTakenCount = new VPValue();
3163 return BackedgeTakenCount;
3164 }
3165
3166 /// The vector trip count.
3167 VPValue &getVectorTripCount() { return VectorTripCount; }
3168
3169 /// Returns VF * UF of the vector loop region.
3170 VPValue &getVFxUF() { return VFxUF; }
3171
3172 void addVF(ElementCount VF) { VFs.insert(X: VF); }
3173
3174 void setVF(ElementCount VF) {
3175 assert(hasVF(VF) && "Cannot set VF not already in plan");
3176 VFs.clear();
3177 VFs.insert(X: VF);
3178 }
3179
3180 bool hasVF(ElementCount VF) { return VFs.count(key: VF); }
3181 bool hasScalableVF() {
3182 return any_of(Range&: VFs, P: [](ElementCount VF) { return VF.isScalable(); });
3183 }
3184
3185 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3186
3187 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(key: UF); }
3188
3189 void setUF(unsigned UF) {
3190 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3191 UFs.clear();
3192 UFs.insert(X: UF);
3193 }
3194
3195 /// Return a string with the name of the plan and the applicable VFs and UFs.
3196 std::string getName() const;
3197
3198 void setName(const Twine &newName) { Name = newName.str(); }
3199
3200 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3201 /// yet) for \p V.
3202 VPValue *getOrAddLiveIn(Value *V) {
3203 assert(V && "Trying to get or add the VPValue of a null Value");
3204 if (!Value2VPValue.count(Val: V)) {
3205 VPValue *VPV = new VPValue(V);
3206 VPLiveInsToFree.push_back(Elt: VPV);
3207 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3208 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3209 Value2VPValue[V] = VPV;
3210 }
3211
3212 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3213 assert(Value2VPValue[V]->isLiveIn() &&
3214 "Only live-ins should be in mapping");
3215 return Value2VPValue[V];
3216 }
3217
3218#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3219 /// Print the live-ins of this VPlan to \p O.
3220 void printLiveIns(raw_ostream &O) const;
3221
3222 /// Print this VPlan to \p O.
3223 void print(raw_ostream &O) const;
3224
3225 /// Print this VPlan in DOT format to \p O.
3226 void printDOT(raw_ostream &O) const;
3227
3228 /// Dump the plan to stderr (for debugging).
3229 LLVM_DUMP_METHOD void dump() const;
3230#endif
3231
3232 /// Returns the VPRegionBlock of the vector loop.
3233 VPRegionBlock *getVectorLoopRegion() {
3234 return cast<VPRegionBlock>(Val: getEntry()->getSingleSuccessor());
3235 }
3236 const VPRegionBlock *getVectorLoopRegion() const {
3237 return cast<VPRegionBlock>(Val: getEntry()->getSingleSuccessor());
3238 }
3239
3240 /// Returns the canonical induction recipe of the vector loop.
3241 VPCanonicalIVPHIRecipe *getCanonicalIV() {
3242 VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
3243 if (EntryVPBB->empty()) {
3244 // VPlan native path.
3245 EntryVPBB = cast<VPBasicBlock>(Val: EntryVPBB->getSingleSuccessor());
3246 }
3247 return cast<VPCanonicalIVPHIRecipe>(Val: &*EntryVPBB->begin());
3248 }
3249
3250 void addLiveOut(PHINode *PN, VPValue *V);
3251
3252 void removeLiveOut(PHINode *PN) {
3253 delete LiveOuts[PN];
3254 LiveOuts.erase(Key: PN);
3255 }
3256
3257 const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3258 return LiveOuts;
3259 }
3260
3261 VPValue *getSCEVExpansion(const SCEV *S) const {
3262 return SCEVToExpansion.lookup(Val: S);
3263 }
3264
3265 void addSCEVExpansion(const SCEV *S, VPValue *V) {
3266 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3267 SCEVToExpansion[S] = V;
3268 }
3269
3270 /// \return The block corresponding to the original preheader.
3271 VPBasicBlock *getPreheader() { return Preheader; }
3272 const VPBasicBlock *getPreheader() const { return Preheader; }
3273
3274 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3275 /// recipes to refer to the clones, and return it.
3276 VPlan *duplicate();
3277
3278private:
3279 /// Add to the given dominator tree the header block and every new basic block
3280 /// that was created between it and the latch block, inclusive.
3281 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
3282 BasicBlock *LoopPreHeaderBB,
3283 BasicBlock *LoopExitBB);
3284};
3285
3286#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3287/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3288/// indented and follows the dot format.
3289class VPlanPrinter {
3290 raw_ostream &OS;
3291 const VPlan &Plan;
3292 unsigned Depth = 0;
3293 unsigned TabWidth = 2;
3294 std::string Indent;
3295 unsigned BID = 0;
3296 SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
3297
3298 VPSlotTracker SlotTracker;
3299
3300 /// Handle indentation.
3301 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
3302
3303 /// Print a given \p Block of the Plan.
3304 void dumpBlock(const VPBlockBase *Block);
3305
3306 /// Print the information related to the CFG edges going out of a given
3307 /// \p Block, followed by printing the successor blocks themselves.
3308 void dumpEdges(const VPBlockBase *Block);
3309
3310 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3311 /// its successor blocks.
3312 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3313
3314 /// Print a given \p Region of the Plan.
3315 void dumpRegion(const VPRegionBlock *Region);
3316
3317 unsigned getOrCreateBID(const VPBlockBase *Block) {
3318 return BlockID.count(Val: Block) ? BlockID[Block] : BlockID[Block] = BID++;
3319 }
3320
3321 Twine getOrCreateName(const VPBlockBase *Block);
3322
3323 Twine getUID(const VPBlockBase *Block);
3324
3325 /// Print the information related to a CFG edge between two VPBlockBases.
3326 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
3327 const Twine &Label);
3328
3329public:
3330 VPlanPrinter(raw_ostream &O, const VPlan &P)
3331 : OS(O), Plan(P), SlotTracker(&P) {}
3332
3333 LLVM_DUMP_METHOD void dump();
3334};
3335
3336struct VPlanIngredient {
3337 const Value *V;
3338
3339 VPlanIngredient(const Value *V) : V(V) {}
3340
3341 void print(raw_ostream &O) const;
3342};
3343
3344inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
3345 I.print(O&: OS);
3346 return OS;
3347}
3348
3349inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
3350 Plan.print(O&: OS);
3351 return OS;
3352}
3353#endif
3354
3355//===----------------------------------------------------------------------===//
3356// VPlan Utilities
3357//===----------------------------------------------------------------------===//
3358
3359/// Class that provides utilities for VPBlockBases in VPlan.
3360class VPBlockUtils {
3361public:
3362 VPBlockUtils() = delete;
3363
3364 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3365 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3366 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3367 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3368 /// have neither successors nor predecessors.
3369 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3370 assert(NewBlock->getSuccessors().empty() &&
3371 NewBlock->getPredecessors().empty() &&
3372 "Can't insert new block with predecessors or successors.");
3373 NewBlock->setParent(BlockPtr->getParent());
3374 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3375 for (VPBlockBase *Succ : Succs) {
3376 disconnectBlocks(From: BlockPtr, To: Succ);
3377 connectBlocks(From: NewBlock, To: Succ);
3378 }
3379 connectBlocks(From: BlockPtr, To: NewBlock);
3380 }
3381
3382 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3383 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3384 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3385 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3386 /// and \p IfTrue and \p IfFalse must have neither successors nor
3387 /// predecessors.
3388 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
3389 VPBlockBase *BlockPtr) {
3390 assert(IfTrue->getSuccessors().empty() &&
3391 "Can't insert IfTrue with successors.");
3392 assert(IfFalse->getSuccessors().empty() &&
3393 "Can't insert IfFalse with successors.");
3394 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3395 IfTrue->setPredecessors({BlockPtr});
3396 IfFalse->setPredecessors({BlockPtr});
3397 IfTrue->setParent(BlockPtr->getParent());
3398 IfFalse->setParent(BlockPtr->getParent());
3399 }
3400
3401 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3402 /// the successors of \p From and \p From to the predecessors of \p To. Both
3403 /// VPBlockBases must have the same parent, which can be null. Both
3404 /// VPBlockBases can be already connected to other VPBlockBases.
3405 static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
3406 assert((From->getParent() == To->getParent()) &&
3407 "Can't connect two block with different parents");
3408 assert(From->getNumSuccessors() < 2 &&
3409 "Blocks can't have more than two successors.");
3410 From->appendSuccessor(Successor: To);
3411 To->appendPredecessor(Predecessor: From);
3412 }
3413
3414 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3415 /// from the successors of \p From and \p From from the predecessors of \p To.
3416 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
3417 assert(To && "Successor to disconnect is null.");
3418 From->removeSuccessor(Successor: To);
3419 To->removePredecessor(Predecessor: From);
3420 }
3421
3422 /// Return an iterator range over \p Range which only includes \p BlockTy
3423 /// blocks. The accesses are casted to \p BlockTy.
3424 template <typename BlockTy, typename T>
3425 static auto blocksOnly(const T &Range) {
3426 // Create BaseTy with correct const-ness based on BlockTy.
3427 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3428 const VPBlockBase, VPBlockBase>;
3429
3430 // We need to first create an iterator range over (const) BlocktTy & instead
3431 // of (const) BlockTy * for filter_range to work properly.
3432 auto Mapped =
3433 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3434 auto Filter = make_filter_range(
3435 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3436 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3437 return cast<BlockTy>(&Block);
3438 });
3439 }
3440};
3441
3442class VPInterleavedAccessInfo {
3443 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
3444 InterleaveGroupMap;
3445
3446 /// Type for mapping of instruction based interleave groups to VPInstruction
3447 /// interleave groups
3448 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
3449 InterleaveGroup<VPInstruction> *>;
3450
3451 /// Recursively \p Region and populate VPlan based interleave groups based on
3452 /// \p IAI.
3453 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3454 InterleavedAccessInfo &IAI);
3455 /// Recursively traverse \p Block and populate VPlan based interleave groups
3456 /// based on \p IAI.
3457 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3458 InterleavedAccessInfo &IAI);
3459
3460public:
3461 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
3462
3463 ~VPInterleavedAccessInfo() {
3464 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
3465 // Avoid releasing a pointer twice.
3466 for (auto &I : InterleaveGroupMap)
3467 DelSet.insert(Ptr: I.second);
3468 for (auto *Ptr : DelSet)
3469 delete Ptr;
3470 }
3471
3472 /// Get the interleave group that \p Instr belongs to.
3473 ///
3474 /// \returns nullptr if doesn't have such group.
3475 InterleaveGroup<VPInstruction> *
3476 getInterleaveGroup(VPInstruction *Instr) const {
3477 return InterleaveGroupMap.lookup(Val: Instr);
3478 }
3479};
3480
3481/// Class that maps (parts of) an existing VPlan to trees of combined
3482/// VPInstructions.
3483class VPlanSlp {
3484 enum class OpMode { Failed, Load, Opcode };
3485
3486 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3487 /// DenseMap keys.
3488 struct BundleDenseMapInfo {
3489 static SmallVector<VPValue *, 4> getEmptyKey() {
3490 return {reinterpret_cast<VPValue *>(-1)};
3491 }
3492
3493 static SmallVector<VPValue *, 4> getTombstoneKey() {
3494 return {reinterpret_cast<VPValue *>(-2)};
3495 }
3496
3497 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3498 return static_cast<unsigned>(hash_combine_range(first: V.begin(), last: V.end()));
3499 }
3500
3501 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3502 const SmallVector<VPValue *, 4> &RHS) {
3503 return LHS == RHS;
3504 }
3505 };
3506
3507 /// Mapping of values in the original VPlan to a combined VPInstruction.
3508 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
3509 BundleToCombined;
3510
3511 VPInterleavedAccessInfo &IAI;
3512
3513 /// Basic block to operate on. For now, only instructions in a single BB are
3514 /// considered.
3515 const VPBasicBlock &BB;
3516
3517 /// Indicates whether we managed to combine all visited instructions or not.
3518 bool CompletelySLP = true;
3519
3520 /// Width of the widest combined bundle in bits.
3521 unsigned WidestBundleBits = 0;
3522
3523 using MultiNodeOpTy =
3524 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3525
3526 // Input operand bundles for the current multi node. Each multi node operand
3527 // bundle contains values not matching the multi node's opcode. They will
3528 // be reordered in reorderMultiNodeOps, once we completed building a
3529 // multi node.
3530 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3531
3532 /// Indicates whether we are building a multi node currently.
3533 bool MultiNodeActive = false;
3534
3535 /// Check if we can vectorize Operands together.
3536 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3537
3538 /// Add combined instruction \p New for the bundle \p Operands.
3539 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3540
3541 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3542 VPInstruction *markFailed();
3543
3544 /// Reorder operands in the multi node to maximize sequential memory access
3545 /// and commutative operations.
3546 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3547
3548 /// Choose the best candidate to use for the lane after \p Last. The set of
3549 /// candidates to choose from are values with an opcode matching \p Last's
3550 /// or loads consecutive to \p Last.
3551 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3552 SmallPtrSetImpl<VPValue *> &Candidates,
3553 VPInterleavedAccessInfo &IAI);
3554
3555#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3556 /// Print bundle \p Values to dbgs().
3557 void dumpBundle(ArrayRef<VPValue *> Values);
3558#endif
3559
3560public:
3561 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3562
3563 ~VPlanSlp() = default;
3564
3565 /// Tries to build an SLP tree rooted at \p Operands and returns a
3566 /// VPInstruction combining \p Operands, if they can be combined.
3567 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
3568
3569 /// Return the width of the widest combined bundle in bits.
3570 unsigned getWidestBundleBits() const { return WidestBundleBits; }
3571
3572 /// Return true if all visited instruction can be combined.
3573 bool isCompletelySLP() const { return CompletelySLP; }
3574};
3575
3576namespace vputils {
3577
3578/// Returns true if only the first lane of \p Def is used.
3579bool onlyFirstLaneUsed(const VPValue *Def);
3580
3581/// Returns true if only the first part of \p Def is used.
3582bool onlyFirstPartUsed(const VPValue *Def);
3583
3584/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3585/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3586/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3587/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3588/// create a new one.
3589VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3590 ScalarEvolution &SE);
3591
3592/// Returns true if \p VPV is uniform after vectorization.
3593inline bool isUniformAfterVectorization(VPValue *VPV) {
3594 // A value defined outside the vector region must be uniform after
3595 // vectorization inside a vector region.
3596 if (VPV->isDefinedOutsideVectorRegions())
3597 return true;
3598 VPRecipeBase *Def = VPV->getDefiningRecipe();
3599 assert(Def && "Must have definition for value defined inside vector region");
3600 if (auto Rep = dyn_cast<VPReplicateRecipe>(Val: Def))
3601 return Rep->isUniform();
3602 if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Val: Def))
3603 return all_of(Range: GEP->operands(), P: isUniformAfterVectorization);
3604 if (auto *VPI = dyn_cast<VPInstruction>(Val: Def))
3605 return VPI->getOpcode() == VPInstruction::ComputeReductionResult;
3606 return false;
3607}
3608} // end namespace vputils
3609
3610} // end namespace llvm
3611
3612#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
3613

source code of llvm/lib/Transforms/Vectorize/VPlan.h