IndVarSimplify.cpp source code [llvm/lib/Transforms/Scalar/IndVarSimplify.cpp]

1	//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This transformation analyzes and transforms the induction variables (and
10	// computations derived from them) into simpler forms suitable for subsequent
11	// analysis and transformation.
12	//
13	// If the trip count of a loop is computable, this pass also makes the following
14	// changes:
15	// 1. The exit condition for the loop is canonicalized to compare the
16	// induction value against the exit value. This turns loops like:
17	// 'for (i = 7; ii < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'*
18	// 2. Any use outside of the loop of an expression derived from the indvar
19	// is changed to compute the derived value outside of the loop, eliminating
20	// the dependence on the exit value of the induction variable. If the only
21	// purpose of the loop is to compute the exit value of some derived
22	// expression, this transformation will make the loop dead.
23	//
24	//===----------------------------------------------------------------------===//
25
26	#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27	#include "llvm/ADT/APFloat.h"
28	#include "llvm/ADT/ArrayRef.h"
29	#include "llvm/ADT/STLExtras.h"
30	#include "llvm/ADT/SmallPtrSet.h"
31	#include "llvm/ADT/SmallSet.h"
32	#include "llvm/ADT/SmallVector.h"
33	#include "llvm/ADT/Statistic.h"
34	#include "llvm/ADT/iterator_range.h"
35	#include "llvm/Analysis/LoopInfo.h"
36	#include "llvm/Analysis/LoopPass.h"
37	#include "llvm/Analysis/MemorySSA.h"
38	#include "llvm/Analysis/MemorySSAUpdater.h"
39	#include "llvm/Analysis/ScalarEvolution.h"
40	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41	#include "llvm/Analysis/TargetLibraryInfo.h"
42	#include "llvm/Analysis/TargetTransformInfo.h"
43	#include "llvm/Analysis/ValueTracking.h"
44	#include "llvm/IR/BasicBlock.h"
45	#include "llvm/IR/Constant.h"
46	#include "llvm/IR/ConstantRange.h"
47	#include "llvm/IR/Constants.h"
48	#include "llvm/IR/DataLayout.h"
49	#include "llvm/IR/DerivedTypes.h"
50	#include "llvm/IR/Dominators.h"
51	#include "llvm/IR/Function.h"
52	#include "llvm/IR/IRBuilder.h"
53	#include "llvm/IR/InstrTypes.h"
54	#include "llvm/IR/Instruction.h"
55	#include "llvm/IR/Instructions.h"
56	#include "llvm/IR/IntrinsicInst.h"
57	#include "llvm/IR/Intrinsics.h"
58	#include "llvm/IR/Module.h"
59	#include "llvm/IR/Operator.h"
60	#include "llvm/IR/PassManager.h"
61	#include "llvm/IR/PatternMatch.h"
62	#include "llvm/IR/Type.h"
63	#include "llvm/IR/Use.h"
64	#include "llvm/IR/User.h"
65	#include "llvm/IR/Value.h"
66	#include "llvm/IR/ValueHandle.h"
67	#include "llvm/Support/Casting.h"
68	#include "llvm/Support/CommandLine.h"
69	#include "llvm/Support/Compiler.h"
70	#include "llvm/Support/Debug.h"
71	#include "llvm/Support/MathExtras.h"
72	#include "llvm/Support/raw_ostream.h"
73	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74	#include "llvm/Transforms/Utils/Local.h"
75	#include "llvm/Transforms/Utils/LoopUtils.h"
76	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
77	#include "llvm/Transforms/Utils/SimplifyIndVar.h"
78	#include <cassert>
79	#include <cstdint>
80	#include <utility>
81
82	using namespace llvm;
83	using namespace PatternMatch;
84
85	#define DEBUG_TYPE "indvars"
86
87	STATISTIC(NumWidened , "Number of indvars widened");
88	STATISTIC(NumReplaced , "Number of exit values replaced");
89	STATISTIC(NumLFTR , "Number of loop exit tests replaced");
90	STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
91	STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
92
93	static cl::opt<ReplaceExitVal> ReplaceExitValue(
94	"replexitval", cl::Hidden, cl::init(Val: OnlyCheapRepl),
95	cl::desc ("Choose the strategy to replace exit value in IndVarSimplify"),
96	cl::values(
97	clEnumValN(NeverRepl, "never", "never replace exit value"),
98	clEnumValN(OnlyCheapRepl, "cheap",
99	"only replace exit value when the cost is cheap"),
100	clEnumValN(
101	UnusedIndVarInLoop, "unusedindvarinloop",
102	"only replace exit value when it is an unused "
103	"induction variable in the loop and has cheap replacement cost"),
104	clEnumValN(NoHardUse, "noharduse",
105	"only replace exit values when loop def likely dead"),
106	clEnumValN(AlwaysRepl, "always",
107	"always replace exit value whenever possible")));
108
109	static cl::opt<bool> UsePostIncrementRanges(
110	"indvars-post-increment-ranges", cl::Hidden,
111	cl::desc ("Use post increment control-dependent ranges in IndVarSimplify"),
112	cl::init(Val: true));
113
114	static cl::opt<bool>
115	DisableLFTR("disable-lftr", cl::Hidden, cl::init(Val: false),
116	cl::desc ("Disable Linear Function Test Replace optimization"));
117
118	static cl::opt<bool>
119	LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(Val: true),
120	cl::desc ("Predicate conditions in read only loops"));
121
122	static cl::opt<bool>
123	AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(Val: true),
124	cl::desc ("Allow widening of indvars to eliminate s/zext"));
125
126	namespace {
127
128	class IndVarSimplify {
129	LoopInfo *LI;
130	ScalarEvolution *SE;
131	DominatorTree *DT;
132	const DataLayout &DL;
133	TargetLibraryInfo *TLI;
134	const TargetTransformInfo *TTI;
135	std::unique_ptr<MemorySSAUpdater> MSSAU;
136
137	SmallVector<WeakTrackingVH, `16`> DeadInsts;
138	bool WidenIndVars;
139
140	bool handleFloatingPointIV(Loop L, PHINode PH);
141	bool rewriteNonIntegerIVs(Loop *L);
142
143	bool simplifyAndExtend(Loop L, SCEVExpander &Rewriter, LoopInfo LI);
144	/// Try to improve our exit conditions by converting condition from signed
145	/// to unsigned or rotating computation out of the loop.
146	/// (See inline comment about why this is duplicated from simplifyAndExtend)
147	bool canonicalizeExitCondition(Loop *L);
148	/// Try to eliminate loop exits based on analyzeable exit counts
149	bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150	/// Try to form loop invariant tests for loop exits by changing how many
151	/// iterations of the loop run when that is unobservable.
152	bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153
154	bool rewriteFirstIterationLoopExitValues(Loop *L);
155
156	bool linearFunctionTestReplace(Loop L, BasicBlock ExitingBB,
157	const SCEV *ExitCount,
158	PHINode *IndVar, SCEVExpander &Rewriter);
159
160	bool sinkUnusedInvariants(Loop *L);
161
162	public:
163	IndVarSimplify(LoopInfo LI, ScalarEvolution SE, DominatorTree *DT,
164	const DataLayout &DL, TargetLibraryInfo *TLI,
165	TargetTransformInfo TTI, MemorySSA MSSA, bool WidenIndVars)
166	: LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
167	WidenIndVars(WidenIndVars) {
168	if (MSSA)
169	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
170	}
171
172	bool run(Loop *L);
173	};
174
175	} // end anonymous namespace
176
177	//===----------------------------------------------------------------------===//
178	// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
179	//===----------------------------------------------------------------------===//
180
181	/// Convert APF to an integer, if possible.
182	static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
183	bool isExact = false;
184	// See if we can convert this to an int64_t
185	uint64_t UIntVal;
186	if (APF.convertToInteger(Input: MutableArrayRef(UIntVal), Width: `64`, IsSigned: true,
187	RM: APFloat::rmTowardZero, IsExact: &isExact) != APFloat::opOK \|\|
188	!isExact)
189	return false;
190	IntVal = UIntVal;
191	return true;
192	}
193
194	/// If the loop has floating induction variable then insert corresponding
195	/// integer induction variable if possible.
196	/// For example,
197	/// for(double i = 0; i < 10000; ++i)
198	/// bar(i)
199	/// is converted into
200	/// for(int i = 0; i < 10000; ++i)
201	/// bar((double)i);
202	bool IndVarSimplify::handleFloatingPointIV(Loop L, PHINode PN) {
203	unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: `0`));
204	unsigned BackEdge = IncomingEdge^`1`;
205
206	// Check incoming value.
207	auto *InitValueVal = dyn_cast<ConstantFP>(Val: PN->getIncomingValue(i: IncomingEdge));
208
209	int64_t InitValue;
210	if (!InitValueVal \|\| !ConvertToSInt(APF: InitValueVal->getValueAPF(), IntVal&: InitValue))
211	return false;
212
213	// Check IV increment. Reject this PN if increment operation is not
214	// an add or increment value can not be represented by an integer.
215	auto *Incr = dyn_cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
216	if (Incr == nullptr \|\| Incr->getOpcode() != Instruction::FAdd) return false;
217
218	// If this is not an add of the PHI with a constantfp, or if the constant fp
219	// is not an integer, bail out.
220	ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Val: Incr->getOperand(i_nocapture: `1`));
221	int64_t IncValue;
222	if (IncValueVal == nullptr \|\| Incr->getOperand(i_nocapture: `0`) != PN \|\|
223	!ConvertToSInt(APF: IncValueVal->getValueAPF(), IntVal&: IncValue))
224	return false;
225
226	// Check Incr uses. One user is PN and the other user is an exit condition
227	// used by the conditional terminator.
228	Value::user_iterator IncrUse = Incr->user_begin();
229	Instruction U1 = cast<Instruction>(Val: IncrUse ++);
230	if (IncrUse == Incr->user_end()) return false;
231	Instruction U2 = cast<Instruction>(Val: IncrUse ++);
232	if (IncrUse != Incr->user_end()) return false;
233
234	// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
235	// only used by a branch, we can't transform it.
236	FCmpInst *Compare = dyn_cast<FCmpInst>(Val: U1);
237	if (!Compare)
238	Compare = dyn_cast<FCmpInst>(Val: U2);
239	if (!Compare \|\| !Compare->hasOneUse() \|\|
240	!isa<BranchInst>(Val: Compare->user_back()))
241	return false;
242
243	BranchInst *TheBr = cast<BranchInst>(Val: Compare->user_back());
244
245	// We need to verify that the branch actually controls the iteration count
246	// of the loop. If not, the new IV can overflow and no one will notice.
247	// The branch block must be in the loop and one of the successors must be out
248	// of the loop.
249	assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
250	if (!L->contains(BB: TheBr->getParent()) \|\|
251	(L->contains(BB: TheBr->getSuccessor(i: `0`)) &&
252	L->contains(BB: TheBr->getSuccessor(i: `1`))))
253	return false;
254
255	// If it isn't a comparison with an integer-as-fp (the exit value), we can't
256	// transform it.
257	ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Val: Compare->getOperand(i_nocapture: `1`));
258	int64_t ExitValue;
259	if (ExitValueVal == nullptr \|\|
260	!ConvertToSInt(APF: ExitValueVal->getValueAPF(), IntVal&: ExitValue))
261	return false;
262
263	// Find new predicate for integer comparison.
264	CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
265	switch (Compare->getPredicate()) {
266	default: return false; // Unknown comparison.
267	case CmpInst::FCMP_OEQ:
268	case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
269	case CmpInst::FCMP_ONE:
270	case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
271	case CmpInst::FCMP_OGT:
272	case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
273	case CmpInst::FCMP_OGE:
274	case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
275	case CmpInst::FCMP_OLT:
276	case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
277	case CmpInst::FCMP_OLE:
278	case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
279	}
280
281	// We convert the floating point induction variable to a signed i32 value if
282	// we can. This is only safe if the comparison will not overflow in a way
283	// that won't be trapped by the integer equivalent operations. Check for this
284	// now.
285	// TODO: We could use i64 if it is native and the range requires it.
286
287	// The start/stride/exit values must all fit in signed i32.
288	if (!isInt<`32`>(x: InitValue) \|\| !isInt<`32`>(x: IncValue) \|\| !isInt<`32`>(x: ExitValue))
289	return false;
290
291	// If not actually striding (add x, 0.0), avoid touching the code.
292	if (IncValue == `0`)
293	return false;
294
295	// Positive and negative strides have different safety conditions.
296	if (IncValue > `0`) {
297	// If we have a positive stride, we require the init to be less than the
298	// exit value.
299	if (InitValue >= ExitValue)
300	return false;
301
302	uint32_t Range = uint32_t(ExitValue-InitValue);
303	// Check for infinite loop, either:
304	// while (i <= Exit) or until (i > Exit)
305	if (NewPred == CmpInst::ICMP_SLE \|\| NewPred == CmpInst::ICMP_SGT) {
306	if (++Range == `0`) return false; // Range overflows.
307	}
308
309	unsigned Leftover = Range % uint32_t(IncValue);
310
311	// If this is an equality comparison, we require that the strided value
312	// exactly land on the exit value, otherwise the IV condition will wrap
313	// around and do things the fp IV wouldn't.
314	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
315	Leftover != `0`)
316	return false;
317
318	// If the stride would wrap around the i32 before exiting, we can't
319	// transform the IV.
320	if (Leftover != `0` && int32_t(ExitValue+IncValue) < ExitValue)
321	return false;
322	} else {
323	// If we have a negative stride, we require the init to be greater than the
324	// exit value.
325	if (InitValue <= ExitValue)
326	return false;
327
328	uint32_t Range = uint32_t(InitValue-ExitValue);
329	// Check for infinite loop, either:
330	// while (i >= Exit) or until (i < Exit)
331	if (NewPred == CmpInst::ICMP_SGE \|\| NewPred == CmpInst::ICMP_SLT) {
332	if (++Range == `0`) return false; // Range overflows.
333	}
334
335	unsigned Leftover = Range % uint32_t(-IncValue);
336
337	// If this is an equality comparison, we require that the strided value
338	// exactly land on the exit value, otherwise the IV condition will wrap
339	// around and do things the fp IV wouldn't.
340	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
341	Leftover != `0`)
342	return false;
343
344	// If the stride would wrap around the i32 before exiting, we can't
345	// transform the IV.
346	if (Leftover != `0` && int32_t(ExitValue+IncValue) > ExitValue)
347	return false;
348	}
349
350	IntegerType *Int32Ty = Type::getInt32Ty(C&: PN->getContext());
351
352	// Insert new integer induction variable.
353	PHINode *NewPHI = PHINode::Create(Ty: Int32Ty, NumReservedValues: `2`, NameStr: PN->getName()+".int", InsertBefore: PN);
354	NewPHI->addIncoming(V: ConstantInt::get(Ty: Int32Ty, V: InitValue),
355	BB: PN->getIncomingBlock(i: IncomingEdge));
356
357	Value *NewAdd =
358	BinaryOperator::CreateAdd(V1: NewPHI, V2: ConstantInt::get(Ty: Int32Ty, V: IncValue),
359	Name: Incr->getName()+".int", I: Incr);
360	NewPHI->addIncoming(V: NewAdd, BB: PN->getIncomingBlock(i: BackEdge));
361
362	ICmpInst NewCompare = new* ICmpInst (TheBr, NewPred, NewAdd,
363	ConstantInt::get(Ty: Int32Ty, V: ExitValue),
364	Compare->getName());
365
366	// In the following deletions, PN may become dead and may be deleted.
367	// Use a WeakTrackingVH to observe whether this happens.
368	WeakTrackingVH WeakPH = PN;
369
370	// Delete the old floating point exit comparison. The branch starts using the
371	// new comparison.
372	NewCompare->takeName(V: Compare);
373	Compare->replaceAllUsesWith(V: NewCompare);
374	RecursivelyDeleteTriviallyDeadInstructions(V: Compare, TLI, MSSAU: MSSAU.get());
375
376	// Delete the old floating point increment.
377	Incr->replaceAllUsesWith(V: PoisonValue::get(T: Incr->getType()));
378	RecursivelyDeleteTriviallyDeadInstructions(V: Incr, TLI, MSSAU: MSSAU.get());
379
380	// If the FP induction variable still has uses, this is because something else
381	// in the loop uses its value. In order to canonicalize the induction
382	// variable, we chose to eliminate the IV and rewrite it in terms of an
383	// int->fp cast.
384	//
385	// We give preference to sitofp over uitofp because it is faster on most
386	// platforms.
387	if (WeakPH) {
388	Value Conv = new* SIToFPInst (NewPHI, PN->getType(), "indvar.conv",
389	&*PN->getParent()->getFirstInsertionPt());
390	PN->replaceAllUsesWith(V: Conv);
391	RecursivelyDeleteTriviallyDeadInstructions(V: PN, TLI, MSSAU: MSSAU.get());
392	}
393	return true;
394	}
395
396	bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
397	// First step. Check to see if there are any floating-point recurrences.
398	// If there are, change them into integer recurrences, permitting analysis by
399	// the SCEV routines.
400	BasicBlock *Header = L->getHeader();
401
402	SmallVector<WeakTrackingVH, `8`> PHIs;
403	for (PHINode &PN : Header->phis())
404	PHIs.push_back(Elt: &PN);
405
406	bool Changed = false;
407	for (WeakTrackingVH &PHI : PHIs)
408	if (PHINode PN = dyn_cast_or_null<PHINode>(Val: &PHI))
409	Changed \|= handleFloatingPointIV(L, PN);
410
411	// If the loop previously had floating-point IV, ScalarEvolution
412	// may not have been able to compute a trip count. Now that we've done some
413	// re-writing, the trip count may be computable.
414	if (Changed)
415	SE->forgetLoop(L);
416	return Changed;
417	}
418
419	//===---------------------------------------------------------------------===//
420	// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
421	// they will exit at the first iteration.
422	//===---------------------------------------------------------------------===//
423
424	/// Check to see if this loop has loop invariant conditions which lead to loop
425	/// exits. If so, we know that if the exit path is taken, it is at the first
426	/// loop iteration. This lets us predict exit values of PHI nodes that live in
427	/// loop header.
428	bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
429	// Verify the input to the pass is already in LCSSA form.
430	assert(L->isLCSSAForm(*DT));
431
432	SmallVector<BasicBlock *, `8`> ExitBlocks;
433	L->getUniqueExitBlocks(ExitBlocks);
434
435	bool MadeAnyChanges = false;
436	for (auto *ExitBB : ExitBlocks) {
437	// If there are no more PHI nodes in this exit block, then no more
438	// values defined inside the loop are used on this path.
439	for (PHINode &PN : ExitBB->phis()) {
440	for (unsigned IncomingValIdx = `0`, E = PN.getNumIncomingValues();
441	IncomingValIdx != E; ++IncomingValIdx) {
442	auto *IncomingBB = PN.getIncomingBlock(i: IncomingValIdx);
443
444	// Can we prove that the exit must run on the first iteration if it
445	// runs at all? (i.e. early exits are fine for our purposes, but
446	// traces which lead to this exit being taken on the 2nd iteration
447	// aren't.) Note that this is about whether the exit branch is
448	// executed, not about whether it is taken.
449	if (!L->getLoopLatch() \|\|
450	!DT->dominates(A: IncomingBB, B: L->getLoopLatch()))
451	continue;
452
453	// Get condition that leads to the exit path.
454	auto *TermInst = IncomingBB->getTerminator();
455
456	Value Cond = nullptr*;
457	if (auto *BI = dyn_cast<BranchInst>(Val: TermInst)) {
458	// Must be a conditional branch, otherwise the block
459	// should not be in the loop.
460	Cond = BI->getCondition();
461	} else if (auto *SI = dyn_cast<SwitchInst>(Val: TermInst))
462	Cond = SI->getCondition();
463	else
464	continue;
465
466	if (!L->isLoopInvariant(V: Cond))
467	continue;
468
469	auto *ExitVal = dyn_cast<PHINode>(Val: PN.getIncomingValue(i: IncomingValIdx));
470
471	// Only deal with PHIs in the loop header.
472	if (!ExitVal \|\| ExitVal->getParent() != L->getHeader())
473	continue;
474
475	// If ExitVal is a PHI on the loop header, then we know its
476	// value along this exit because the exit can only be taken
477	// on the first iteration.
478	auto *LoopPreheader = L->getLoopPreheader();
479	assert(LoopPreheader && "Invalid loop");
480	int PreheaderIdx = ExitVal->getBasicBlockIndex(BB: LoopPreheader);
481	if (PreheaderIdx != -`1`) {
482	assert(ExitVal->getParent() == L->getHeader() &&
483	"ExitVal must be in loop header");
484	MadeAnyChanges = true;
485	PN.setIncomingValue(i: IncomingValIdx,
486	V: ExitVal->getIncomingValue(i: PreheaderIdx));
487	SE->forgetValue(V: &PN);
488	}
489	}
490	}
491	}
492	return MadeAnyChanges;
493	}
494
495	//===----------------------------------------------------------------------===//
496	// IV Widening - Extend the width of an IV to cover its widest uses.
497	//===----------------------------------------------------------------------===//
498
499	/// Update information about the induction variable that is extended by this
500	/// sign or zero extend operation. This is used to determine the final width of
501	/// the IV before actually widening it.
502	static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
503	ScalarEvolution *SE,
504	const TargetTransformInfo *TTI) {
505	bool IsSigned = Cast->getOpcode() == Instruction::SExt;
506	if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
507	return;
508
509	Type *Ty = Cast->getType();
510	uint64_t Width = SE->getTypeSizeInBits(Ty);
511	if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
512	return;
513
514	// Check that `Cast` actually extends the induction variable (we rely on this
515	// later). This takes care of cases where `Cast` is extending a truncation of
516	// the narrow induction variable, and thus can end up being narrower than the
517	// "narrow" induction variable.
518	uint64_t NarrowIVWidth = SE->getTypeSizeInBits(Ty: WI.NarrowIV->getType());
519	if (NarrowIVWidth >= Width)
520	return;
521
522	// Cast is either an sext or zext up to this point.
523	// We should not widen an indvar if arithmetics on the wider indvar are more
524	// expensive than those on the narrower indvar. We check only the cost of ADD
525	// because at least an ADD is required to increment the induction variable. We
526	// could compute more comprehensively the cost of all instructions on the
527	// induction variable when necessary.
528	if (TTI &&
529	TTI->getArithmeticInstrCost(Opcode: Instruction::Add, Ty) >
530	TTI->getArithmeticInstrCost(Opcode: Instruction::Add,
531	Ty: Cast->getOperand(i_nocapture: `0`)->getType())) {
532	return;
533	}
534
535	if (!WI.WidestNativeType \|\|
536	Width > SE->getTypeSizeInBits(Ty: WI.WidestNativeType)) {
537	WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
538	WI.IsSigned = IsSigned;
539	return;
540	}
541
542	// We extend the IV to satisfy the sign of its user(s), or 'signed'
543	// if there are multiple users with both sign- and zero extensions,
544	// in order not to introduce nondeterministic behaviour based on the
545	// unspecified order of a PHI nodes' users-iterator.
546	WI.IsSigned \|= IsSigned;
547	}
548
549	//===----------------------------------------------------------------------===//
550	// Live IV Reduction - Minimize IVs live across the loop.
551	//===----------------------------------------------------------------------===//
552
553	//===----------------------------------------------------------------------===//
554	// Simplification of IV users based on SCEV evaluation.
555	//===----------------------------------------------------------------------===//
556
557	namespace {
558
559	class IndVarSimplifyVisitor : public IVVisitor {
560	ScalarEvolution *SE;
561	const TargetTransformInfo *TTI;
562	PHINode *IVPhi;
563
564	public:
565	WideIVInfo WI;
566
567	IndVarSimplifyVisitor(PHINode IV, ScalarEvolution SCEV,
568	const TargetTransformInfo *TTI,
569	const DominatorTree *DTree)
570	: SE(SCEV), TTI(TTI), IVPhi(IV) {
571	DT = DTree;
572	WI.NarrowIV = IVPhi;
573	}
574
575	// Implement the interface used by simplifyUsersOfIV.
576	void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
577	};
578
579	} // end anonymous namespace
580
581	/// Iteratively perform simplification on a worklist of IV users. Each
582	/// successive simplification may push more users which may themselves be
583	/// candidates for simplification.
584	///
585	/// Sign/Zero extend elimination is interleaved with IV simplification.
586	bool IndVarSimplify::simplifyAndExtend(Loop *L,
587	SCEVExpander &Rewriter,
588	LoopInfo *LI) {
589	SmallVector<WideIVInfo, `8`> WideIVs;
590
591	auto *GuardDecl = L->getBlocks()[`0`]->getModule()->getFunction(
592	Intrinsic::getName(Intrinsic::experimental_guard));
593	bool HasGuards = GuardDecl && !GuardDecl->use_empty();
594
595	SmallVector<PHINode *, `8`> LoopPhis;
596	for (PHINode &PN : L->getHeader()->phis())
597	LoopPhis.push_back(Elt: &PN);
598
599	// Each round of simplification iterates through the SimplifyIVUsers worklist
600	// for all current phis, then determines whether any IVs can be
601	// widened. Widening adds new phis to LoopPhis, inducing another round of
602	// simplification on the wide IVs.
603	bool Changed = false;
604	while (!LoopPhis.empty()) {
605	// Evaluate as many IV expressions as possible before widening any IVs. This
606	// forces SCEV to set no-wrap flags before evaluating sign/zero
607	// extension. The first time SCEV attempts to normalize sign/zero extension,
608	// the result becomes final. So for the most predictable results, we delay
609	// evaluation of sign/zero extend evaluation until needed, and avoid running
610	// other SCEV based analysis prior to simplifyAndExtend.
611	do {
612	PHINode *CurrIV = LoopPhis.pop_back_val();
613
614	// Information about sign/zero extensions of CurrIV.
615	IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
616
617	Changed \|= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, Dead&: DeadInsts, Rewriter,
618	V: &Visitor);
619
620	if (Visitor.WI.WidestNativeType) {
621	WideIVs.push_back(Elt: Visitor.WI);
622	}
623	} while(!LoopPhis.empty());
624
625	// Continue if we disallowed widening.
626	if (!WidenIndVars)
627	continue;
628
629	for (; !WideIVs.empty(); WideIVs.pop_back()) {
630	unsigned ElimExt;
631	unsigned Widened;
632	if (PHINode *WidePhi = createWideIV(WI: WideIVs.back(), LI, SE, Rewriter,
633	DT, DeadInsts, NumElimExt&: ElimExt, NumWidened&: Widened,
634	HasGuards, UsePostIncrementRanges)) {
635	NumElimExt += ElimExt;
636	NumWidened += Widened;
637	Changed = true;
638	LoopPhis.push_back(Elt: WidePhi);
639	}
640	}
641	}
642	return Changed;
643	}
644
645	//===----------------------------------------------------------------------===//
646	// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
647	//===----------------------------------------------------------------------===//
648
649	/// Given an Value which is hoped to be part of an add recurance in the given
650	/// loop, return the associated Phi node if so. Otherwise, return null. Note
651	/// that this is less general than SCEVs AddRec checking.
652	static PHINode getLoopPhiForCounter(Value IncV, Loop *L) {
653	Instruction *IncI = dyn_cast<Instruction>(Val: IncV);
654	if (!IncI)
655	return nullptr;
656
657	switch (IncI->getOpcode()) {
658	case Instruction::Add:
659	case Instruction::Sub:
660	break;
661	case Instruction::GetElementPtr:
662	// An IV counter must preserve its type.
663	if (IncI->getNumOperands() == `2`)
664	break;
665	[[fallthrough]];
666	default:
667	return nullptr;
668	}
669
670	PHINode *Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: `0`));
671	if (Phi && Phi->getParent() == L->getHeader()) {
672	if (L->isLoopInvariant(V: IncI->getOperand(i: `1`)))
673	return Phi;
674	return nullptr;
675	}
676	if (IncI->getOpcode() == Instruction::GetElementPtr)
677	return nullptr;
678
679	// Allow add/sub to be commuted.
680	Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: `1`));
681	if (Phi && Phi->getParent() == L->getHeader()) {
682	if (L->isLoopInvariant(V: IncI->getOperand(i: `0`)))
683	return Phi;
684	}
685	return nullptr;
686	}
687
688	/// Whether the current loop exit test is based on this value. Currently this
689	/// is limited to a direct use in the loop condition.
690	static bool isLoopExitTestBasedOn(Value V, BasicBlock ExitingBB) {
691	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
692	ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
693	// TODO: Allow non-icmp loop test.
694	if (!ICmp)
695	return false;
696
697	// TODO: Allow indirect use.
698	return ICmp->getOperand(i_nocapture: `0`) == V \|\| ICmp->getOperand(i_nocapture: `1`) == V;
699	}
700
701	/// linearFunctionTestReplace policy. Return true unless we can show that the
702	/// current exit test is already sufficiently canonical.
703	static bool needsLFTR(Loop L, BasicBlock ExitingBB) {
704	assert(L->getLoopLatch() && "Must be in simplified form");
705
706	// Avoid converting a constant or loop invariant test back to a runtime
707	// test. This is critical for when SCEV's cached ExitCount is less precise
708	// than the current IR (such as after we've proven a particular exit is
709	// actually dead and thus the BE count never reaches our ExitCount.)
710	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
711	if (L->isLoopInvariant(V: BI->getCondition()))
712	return false;
713
714	// Do LFTR to simplify the exit condition to an ICMP.
715	ICmpInst *Cond = dyn_cast<ICmpInst>(Val: BI->getCondition());
716	if (!Cond)
717	return true;
718
719	// Do LFTR to simplify the exit ICMP to EQ/NE
720	ICmpInst::Predicate Pred = Cond->getPredicate();
721	if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
722	return true;
723
724	// Look for a loop invariant RHS
725	Value *LHS = Cond->getOperand(i_nocapture: `0`);
726	Value *RHS = Cond->getOperand(i_nocapture: `1`);
727	if (!L->isLoopInvariant(V: RHS)) {
728	if (!L->isLoopInvariant(V: LHS))
729	return true;
730	std::swap(a&: LHS, b&: RHS);
731	}
732	// Look for a simple IV counter LHS
733	PHINode *Phi = dyn_cast<PHINode>(Val: LHS);
734	if (!Phi)
735	Phi = getLoopPhiForCounter(IncV: LHS, L);
736
737	if (!Phi)
738	return true;
739
740	// Do LFTR if PHI node is defined in the loop, but is not* a counter.*
741	int Idx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
742	if (Idx < `0`)
743	return true;
744
745	// Do LFTR if the exit condition's IV is not* a simple counter.*
746	Value *IncV = Phi->getIncomingValue(i: Idx);
747	return Phi != getLoopPhiForCounter(IncV, L);
748	}
749
750	/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
751	/// down to checking that all operands are constant and listing instructions
752	/// that may hide undef.
753	static bool hasConcreteDefImpl(Value V, SmallPtrSetImpl<Value> &Visited,
754	unsigned Depth) {
755	if (isa<Constant>(Val: V))
756	return !isa<UndefValue>(Val: V);
757
758	if (Depth >= `6`)
759	return false;
760
761	// Conservatively handle non-constant non-instructions. For example, Arguments
762	// may be undef.
763	Instruction *I = dyn_cast<Instruction>(Val: V);
764	if (!I)
765	return false;
766
767	// Load and return values may be undef.
768	if(I->mayReadFromMemory() \|\| isa<CallInst>(Val: I) \|\| isa<InvokeInst>(Val: I))
769	return false;
770
771	// Optimistically handle other instructions.
772	for (Value *Op : I->operands()) {
773	if (!Visited.insert(Ptr: Op).second)
774	continue;
775	if (!hasConcreteDefImpl(V: Op, Visited, Depth: Depth+`1`))
776	return false;
777	}
778	return true;
779	}
780
781	/// Return true if the given value is concrete. We must prove that undef can
782	/// never reach it.
783	///
784	/// TODO: If we decide that this is a good approach to checking for undef, we
785	/// may factor it into a common location.
786	static bool hasConcreteDef(Value *V) {
787	SmallPtrSet<Value*, `8`> Visited;
788	Visited.insert(Ptr: V);
789	return hasConcreteDefImpl(V, Visited, Depth: `0`);
790	}
791
792	/// Return true if the given phi is a "counter" in L. A counter is an
793	/// add recurance (of integer or pointer type) with an arbitrary start, and a
794	/// step of 1. Note that L must have exactly one latch.
795	static bool isLoopCounter(PHINode* Phi, Loop *L,
796	ScalarEvolution *SE) {
797	assert(Phi->getParent() == L->getHeader());
798	assert(L->getLoopLatch());
799
800	if (!SE->isSCEVable(Ty: Phi->getType()))
801	return false;
802
803	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Phi));
804	if (!AR \|\| AR->getLoop() != L \|\| !AR->isAffine())
805	return false;
806
807	const SCEV Step = dyn_cast<SCEVConstant>(Val: AR->getStepRecurrence(SE&: SE));
808	if (!Step \|\| !Step->isOne())
809	return false;
810
811	int LatchIdx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
812	Value *IncV = Phi->getIncomingValue(i: LatchIdx);
813	return (getLoopPhiForCounter(IncV, L) == Phi &&
814	isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncV)));
815	}
816
817	/// Search the loop header for a loop counter (anadd rec w/step of one)
818	/// suitable for use by LFTR. If multiple counters are available, select the
819	/// "best" one based profitable heuristics.
820	///
821	/// BECount may be an i8 pointer type. The pointer difference is already*
822	/// valid count without scaling the address stride, so it remains a pointer
823	/// expression as far as SCEV is concerned.
824	static PHINode FindLoopCounter(Loop L, BasicBlock *ExitingBB,
825	const SCEV *BECount,
826	ScalarEvolution SE, DominatorTree DT) {
827	uint64_t BCWidth = SE->getTypeSizeInBits(Ty: BECount->getType());
828
829	Value *Cond = cast<BranchInst>(Val: ExitingBB->getTerminator())->getCondition();
830
831	// Loop over all of the PHI nodes, looking for a simple counter.
832	PHINode BestPhi = nullptr*;
833	const SCEV BestInit = nullptr*;
834	BasicBlock *LatchBlock = L->getLoopLatch();
835	assert(LatchBlock && "Must be in simplified form");
836	const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
837
838	for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ++I) {
839	PHINode *Phi = cast<PHINode>(Val&: I);
840	if (!isLoopCounter(Phi, L, SE))
841	continue;
842
843	const auto *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Phi));
844
845	// AR may be a pointer type, while BECount is an integer type.
846	// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
847	// AR may not be a narrower type, or we may never exit.
848	uint64_t PhiWidth = SE->getTypeSizeInBits(Ty: AR->getType());
849	if (PhiWidth < BCWidth \|\| !DL.isLegalInteger(Width: PhiWidth))
850	continue;
851
852	// Avoid reusing a potentially undef value to compute other values that may
853	// have originally had a concrete definition.
854	if (!hasConcreteDef(V: Phi)) {
855	// We explicitly allow unknown phis as long as they are already used by
856	// the loop exit test. This is legal since performing LFTR could not
857	// increase the number of undef users.
858	Value *IncPhi = Phi->getIncomingValueForBlock(BB: LatchBlock);
859	if (!isLoopExitTestBasedOn(V: Phi, ExitingBB) &&
860	!isLoopExitTestBasedOn(V: IncPhi, ExitingBB))
861	continue;
862	}
863
864	// Avoid introducing undefined behavior due to poison which didn't exist in
865	// the original program. (Annoyingly, the rules for poison and undef
866	// propagation are distinct, so this does NOT cover the undef case above.)
867	// We have to ensure that we don't introduce UB by introducing a use on an
868	// iteration where said IV produces poison. Our strategy here differs for
869	// pointers and integer IVs. For integers, we strip and reinfer as needed,
870	// see code in linearFunctionTestReplace. For pointers, we restrict
871	// transforms as there is no good way to reinfer inbounds once lost.
872	if (!Phi->getType()->isIntegerTy() &&
873	!mustExecuteUBIfPoisonOnPathTo(Root: Phi, OnPathTo: ExitingBB->getTerminator(), DT))
874	continue;
875
876	const SCEV *Init = AR->getStart();
877
878	if (BestPhi && !isAlmostDeadIV(IV: BestPhi, LatchBlock, Cond)) {
879	// Don't force a live loop counter if another IV can be used.
880	if (isAlmostDeadIV(IV: Phi, LatchBlock, Cond))
881	continue;
882
883	// Prefer to count-from-zero. This is a more "canonical" counter form. It
884	// also prefers integer to pointer IVs.
885	if (BestInit->isZero() != Init->isZero()) {
886	if (BestInit->isZero())
887	continue;
888	}
889	// If two IVs both count from zero or both count from nonzero then the
890	// narrower is likely a dead phi that has been widened. Use the wider phi
891	// to allow the other to be eliminated.
892	else if (PhiWidth <= SE->getTypeSizeInBits(Ty: BestPhi->getType()))
893	continue;
894	}
895	BestPhi = Phi;
896	BestInit = Init;
897	}
898	return BestPhi;
899	}
900
901	/// Insert an IR expression which computes the value held by the IV IndVar
902	/// (which must be an loop counter w/unit stride) after the backedge of loop L
903	/// is taken ExitCount times.
904	static Value genLoopLimit(PHINode IndVar, BasicBlock *ExitingBB,
905	const SCEV ExitCount, bool* UsePostInc, Loop *L,
906	SCEVExpander &Rewriter, ScalarEvolution *SE) {
907	assert(isLoopCounter(IndVar, L, SE));
908	assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
909	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IndVar));
910	assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
911
912	// For integer IVs, truncate the IV before computing the limit unless we
913	// know apriori that the limit must be a constant when evaluated in the
914	// bitwidth of the IV. We prefer (potentially) keeping a truncate of the
915	// IV in the loop over a (potentially) expensive expansion of the widened
916	// exit count add(zext(add)) expression.
917	if (IndVar->getType()->isIntegerTy() &&
918	SE->getTypeSizeInBits(Ty: AR->getType()) >
919	SE->getTypeSizeInBits(Ty: ExitCount->getType())) {
920	const SCEV *IVInit = AR->getStart();
921	if (!isa<SCEVConstant>(Val: IVInit) \|\| !isa<SCEVConstant>(Val: ExitCount))
922	AR = cast<SCEVAddRecExpr>(Val: SE->getTruncateExpr(Op: AR, Ty: ExitCount->getType()));
923	}
924
925	const SCEVAddRecExpr ARBase = UsePostInc ? AR->getPostIncExpr(SE&: SE) : AR;
926	const SCEV IVLimit = ARBase->evaluateAtIteration(It: ExitCount, SE&: SE);
927	assert(SE->isLoopInvariant(IVLimit, L) &&
928	"Computed iteration count is not loop invariant!");
929	return Rewriter.expandCodeFor(SH: IVLimit, Ty: ARBase->getType(),
930	I: ExitingBB->getTerminator());
931	}
932
933	/// This method rewrites the exit condition of the loop to be a canonical !=
934	/// comparison against the incremented loop induction variable. This pass is
935	/// able to rewrite the exit tests of any loop where the SCEV analysis can
936	/// determine a loop-invariant trip count of the loop, which is actually a much
937	/// broader range than just linear tests.
938	bool IndVarSimplify::
939	linearFunctionTestReplace(Loop L, BasicBlock ExitingBB,
940	const SCEV *ExitCount,
941	PHINode *IndVar, SCEVExpander &Rewriter) {
942	assert(L->getLoopLatch() && "Loop no longer in simplified form?");
943	assert(isLoopCounter(IndVar, L, SE));
944	Instruction * const IncVar =
945	cast<Instruction>(Val: IndVar->getIncomingValueForBlock(BB: L->getLoopLatch()));
946
947	// Initialize CmpIndVar to the preincremented IV.
948	Value *CmpIndVar = IndVar;
949	bool UsePostInc = false;
950
951	// If the exiting block is the same as the backedge block, we prefer to
952	// compare against the post-incremented value, otherwise we must compare
953	// against the preincremented value.
954	if (ExitingBB == L->getLoopLatch()) {
955	// For pointer IVs, we chose to not strip inbounds which requires us not
956	// to add a potentially UB introducing use. We need to either a) show
957	// the loop test we're modifying is already in post-inc form, or b) show
958	// that adding a use must not introduce UB.
959	bool SafeToPostInc =
960	IndVar->getType()->isIntegerTy() \|\|
961	isLoopExitTestBasedOn(V: IncVar, ExitingBB) \|\|
962	mustExecuteUBIfPoisonOnPathTo(Root: IncVar, OnPathTo: ExitingBB->getTerminator(), DT);
963	if (SafeToPostInc) {
964	UsePostInc = true;
965	CmpIndVar = IncVar;
966	}
967	}
968
969	// It may be necessary to drop nowrap flags on the incrementing instruction
970	// if either LFTR moves from a pre-inc check to a post-inc check (in which
971	// case the increment might have previously been poison on the last iteration
972	// only) or if LFTR switches to a different IV that was previously dynamically
973	// dead (and as such may be arbitrarily poison). We remove any nowrap flags
974	// that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
975	// check), because the pre-inc addrec flags may be adopted from the original
976	// instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
977	// TODO: This handling is inaccurate for one case: If we switch to a
978	// dynamically dead IV that wraps on the first loop iteration only, which is
979	// not covered by the post-inc addrec. (If the new IV was not dynamically
980	// dead, it could not be poison on the first iteration in the first place.)
981	if (auto *BO = dyn_cast<BinaryOperator>(Val: IncVar)) {
982	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncVar));
983	if (BO->hasNoUnsignedWrap())
984	BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
985	if (BO->hasNoSignedWrap())
986	BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
987	}
988
989	Value *ExitCnt = genLoopLimit(
990	IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
991	assert(ExitCnt->getType()->isPointerTy() ==
992	IndVar->getType()->isPointerTy() &&
993	"genLoopLimit missed a cast");
994
995	// Insert a new icmp_ne or icmp_eq instruction before the branch.
996	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
997	ICmpInst::Predicate P;
998	if (L->contains(BB: BI->getSuccessor(i: `0`)))
999	P = ICmpInst::ICMP_NE;
1000	else
1001	P = ICmpInst::ICMP_EQ;
1002
1003	IRBuilder<> Builder(BI);
1004
1005	// The new loop exit condition should reuse the debug location of the
1006	// original loop exit condition.
1007	if (auto *Cond = dyn_cast<Instruction>(Val: BI->getCondition()))
1008	Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1009
1010	// For integer IVs, if we evaluated the limit in the narrower bitwidth to
1011	// avoid the expensive expansion of the limit expression in the wider type,
1012	// emit a truncate to narrow the IV to the ExitCount type. This is safe
1013	// since we know (from the exit count bitwidth), that we can't self-wrap in
1014	// the narrower type.
1015	unsigned CmpIndVarSize = SE->getTypeSizeInBits(Ty: CmpIndVar->getType());
1016	unsigned ExitCntSize = SE->getTypeSizeInBits(Ty: ExitCnt->getType());
1017	if (CmpIndVarSize > ExitCntSize) {
1018	assert(!CmpIndVar->getType()->isPointerTy() &&
1019	!ExitCnt->getType()->isPointerTy());
1020
1021	// Before resorting to actually inserting the truncate, use the same
1022	// reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1023	// the other side of the comparison instead. We still evaluate the limit
1024	// in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1025	// a truncate within in.
1026	bool Extended = false;
1027	const SCEV *IV = SE->getSCEV(V: CmpIndVar);
1028	const SCEV *TruncatedIV = SE->getTruncateExpr(Op: IV, Ty: ExitCnt->getType());
1029	const SCEV *ZExtTrunc =
1030	SE->getZeroExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1031
1032	if (ZExtTrunc == IV) {
1033	Extended = true;
1034	ExitCnt = Builder.CreateZExt(V: ExitCnt, DestTy: IndVar->getType(),
1035	Name: "wide.trip.count");
1036	} else {
1037	const SCEV *SExtTrunc =
1038	SE->getSignExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1039	if (SExtTrunc == IV) {
1040	Extended = true;
1041	ExitCnt = Builder.CreateSExt(V: ExitCnt, DestTy: IndVar->getType(),
1042	Name: "wide.trip.count");
1043	}
1044	}
1045
1046	if (Extended) {
1047	bool Discard;
1048	L->makeLoopInvariant(V: ExitCnt, Changed&: Discard);
1049	} else
1050	CmpIndVar = Builder.CreateTrunc(V: CmpIndVar, DestTy: ExitCnt->getType(),
1051	Name: "lftr.wideiv");
1052	}
1053	LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1054	<< " LHS:" << *CmpIndVar << `'\n'`
1055	<< " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1056	<< "\n"
1057	<< " RHS:\t" << *ExitCnt << "\n"
1058	<< "ExitCount:\t" << *ExitCount << "\n"
1059	<< " was: " << *BI->getCondition() << "\n");
1060
1061	Value *Cond = Builder.CreateICmp(P, LHS: CmpIndVar, RHS: ExitCnt, Name: "exitcond");
1062	Value *OrigCond = BI->getCondition();
1063	// It's tempting to use replaceAllUsesWith here to fully replace the old
1064	// comparison, but that's not immediately safe, since users of the old
1065	// comparison may not be dominated by the new comparison. Instead, just
1066	// update the branch to use the new comparison; in the common case this
1067	// will make old comparison dead.
1068	BI->setCondition(Cond);
1069	DeadInsts.emplace_back(Args&: OrigCond);
1070
1071	++NumLFTR;
1072	return true;
1073	}
1074
1075	//===----------------------------------------------------------------------===//
1076	// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1077	//===----------------------------------------------------------------------===//
1078
1079	/// If there's a single exit block, sink any loop-invariant values that
1080	/// were defined in the preheader but not used inside the loop into the
1081	/// exit block to reduce register pressure in the loop.
1082	bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1083	BasicBlock *ExitBlock = L->getExitBlock();
1084	if (!ExitBlock) return false;
1085
1086	BasicBlock *Preheader = L->getLoopPreheader();
1087	if (!Preheader) return false;
1088
1089	bool MadeAnyChanges = false;
1090	BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1091	BasicBlock::iterator I(Preheader->getTerminator());
1092	while (I != Preheader->begin()) {
1093	--I;
1094	// New instructions were inserted at the end of the preheader.
1095	if (isa<PHINode>(Val: I))
1096	break;
1097
1098	// Don't move instructions which might have side effects, since the side
1099	// effects need to complete before instructions inside the loop. Also don't
1100	// move instructions which might read memory, since the loop may modify
1101	// memory. Note that it's okay if the instruction might have undefined
1102	// behavior: LoopSimplify guarantees that the preheader dominates the exit
1103	// block.
1104	if (I ->mayHaveSideEffects() \|\| I ->mayReadFromMemory())
1105	continue;
1106
1107	// Skip debug info intrinsics.
1108	if (isa<DbgInfoIntrinsic>(Val: I))
1109	continue;
1110
1111	// Skip eh pad instructions.
1112	if (I ->isEHPad())
1113	continue;
1114
1115	// Don't sink alloca: we never want to sink static alloca's out of the
1116	// entry block, and correctly sinking dynamic alloca's requires
1117	// checks for stacksave/stackrestore intrinsics.
1118	// FIXME: Refactor this check somehow?
1119	if (isa<AllocaInst>(Val: I))
1120	continue;
1121
1122	// Determine if there is a use in or before the loop (direct or
1123	// otherwise).
1124	bool UsedInLoop = false;
1125	for (Use &U : I ->uses()) {
1126	Instruction *User = cast<Instruction>(Val: U.getUser());
1127	BasicBlock *UseBB = User->getParent();
1128	if (PHINode *P = dyn_cast<PHINode>(Val: User)) {
1129	unsigned i =
1130	PHINode::getIncomingValueNumForOperand(i: U.getOperandNo());
1131	UseBB = P->getIncomingBlock(i);
1132	}
1133	if (UseBB == Preheader \|\| L->contains(BB: UseBB)) {
1134	UsedInLoop = true;
1135	break;
1136	}
1137	}
1138
1139	// If there is, the def must remain in the preheader.
1140	if (UsedInLoop)
1141	continue;
1142
1143	// Otherwise, sink it to the exit block.
1144	Instruction ToMove = &I;
1145	bool Done = false;
1146
1147	if (I != Preheader->begin()) {
1148	// Skip debug info intrinsics.
1149	do {
1150	--I;
1151	} while (I ->isDebugOrPseudoInst() && I != Preheader->begin());
1152
1153	if (I ->isDebugOrPseudoInst() && I == Preheader->begin())
1154	Done = true;
1155	} else {
1156	Done = true;
1157	}
1158
1159	MadeAnyChanges = true;
1160	ToMove->moveBefore(BB&: *ExitBlock, I: InsertPt);
1161	SE->forgetValue(V: ToMove);
1162	if (Done) break;
1163	InsertPt = ToMove->getIterator();
1164	}
1165
1166	return MadeAnyChanges;
1167	}
1168
1169	static void replaceExitCond(BranchInst BI, Value NewCond,
1170	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1171	auto *OldCond = BI->getCondition();
1172	LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1173	<< " with " << *NewCond << "\n");
1174	BI->setCondition(NewCond);
1175	if (OldCond->use_empty())
1176	DeadInsts.emplace_back(Args&: OldCond);
1177	}
1178
1179	static Constant createFoldedExitCond(const* Loop L, BasicBlock ExitingBB,
1180	bool IsTaken) {
1181	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1182	bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1183	auto *OldCond = BI->getCondition();
1184	return ConstantInt::get(Ty: OldCond->getType(),
1185	V: IsTaken ? ExitIfTrue : !ExitIfTrue);
1186	}
1187
1188	static void foldExit(const Loop L, BasicBlock ExitingBB, bool IsTaken,
1189	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1190	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1191	auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1192	replaceExitCond(BI, NewCond, DeadInsts);
1193	}
1194
1195	static void replaceLoopPHINodesWithPreheaderValues(
1196	LoopInfo LI, Loop L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1197	ScalarEvolution &SE) {
1198	assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1199	auto *LoopPreheader = L->getLoopPreheader();
1200	auto *LoopHeader = L->getHeader();
1201	SmallVector<Instruction *> Worklist;
1202	for (auto &PN : LoopHeader->phis()) {
1203	auto *PreheaderIncoming = PN.getIncomingValueForBlock(BB: LoopPreheader);
1204	for (User *U : PN.users())
1205	Worklist.push_back(Elt: cast<Instruction>(Val: U));
1206	SE.forgetValue(V: &PN);
1207	PN.replaceAllUsesWith(V: PreheaderIncoming);
1208	DeadInsts.emplace_back(Args: &PN);
1209	}
1210
1211	// Replacing with the preheader value will often allow IV users to simplify
1212	// (especially if the preheader value is a constant).
1213	SmallPtrSet<Instruction *, `16`> Visited;
1214	while (!Worklist.empty()) {
1215	auto *I = cast<Instruction>(Val: Worklist.pop_back_val());
1216	if (!Visited.insert(Ptr: I).second)
1217	continue;
1218
1219	// Don't simplify instructions outside the loop.
1220	if (!L->contains(Inst: I))
1221	continue;
1222
1223	Value *Res = simplifyInstruction(I, Q: I->getModule()->getDataLayout());
1224	if (Res && LI->replacementPreservesLCSSAForm(From: I, To: Res)) {
1225	for (User *U : I->users())
1226	Worklist.push_back(Elt: cast<Instruction>(Val: U));
1227	I->replaceAllUsesWith(V: Res);
1228	DeadInsts.emplace_back(Args&: I);
1229	}
1230	}
1231	}
1232
1233	static Value *
1234	createInvariantCond(const Loop L, BasicBlock ExitingBB,
1235	const ScalarEvolution::LoopInvariantPredicate &LIP,
1236	SCEVExpander &Rewriter) {
1237	ICmpInst::Predicate InvariantPred = LIP.Pred;
1238	BasicBlock *Preheader = L->getLoopPreheader();
1239	assert(Preheader && "Preheader doesn't exist");
1240	Rewriter.setInsertPoint(Preheader->getTerminator());
1241	auto *LHSV = Rewriter.expandCodeFor(SH: LIP.LHS);
1242	auto *RHSV = Rewriter.expandCodeFor(SH: LIP.RHS);
1243	bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1244	if (ExitIfTrue)
1245	InvariantPred = ICmpInst::getInversePredicate(pred: InvariantPred);
1246	IRBuilder<> Builder(Preheader->getTerminator());
1247	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1248	return Builder.CreateICmp(P: InvariantPred, LHS: LHSV, RHS: RHSV,
1249	Name: BI->getCondition()->getName());
1250	}
1251
1252	static std::optional<Value *>
1253	createReplacement(ICmpInst ICmp, const* Loop L, BasicBlock ExitingBB,
1254	const SCEV MaxIter, bool* Inverted, bool SkipLastIter,
1255	ScalarEvolution *SE, SCEVExpander &Rewriter) {
1256	ICmpInst::Predicate Pred = ICmp->getPredicate();
1257	Value *LHS = ICmp->getOperand(i_nocapture: `0`);
1258	Value *RHS = ICmp->getOperand(i_nocapture: `1`);
1259
1260	// 'LHS pred RHS' should now mean that we stay in loop.
1261	auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1262	if (Inverted)
1263	Pred = CmpInst::getInversePredicate(pred: Pred);
1264
1265	const SCEV *LHSS = SE->getSCEVAtScope(V: LHS, L);
1266	const SCEV *RHSS = SE->getSCEVAtScope(V: RHS, L);
1267	// Can we prove it to be trivially true or false?
1268	if (auto EV = SE->evaluatePredicateAt(Pred, LHS: LHSS, RHS: RHSS, CtxI: BI))
1269	return createFoldedExitCond(L, ExitingBB, /IsTaken/ !*EV);
1270
1271	auto *ARTy = LHSS->getType();
1272	auto *MaxIterTy = MaxIter->getType();
1273	// If possible, adjust types.
1274	if (SE->getTypeSizeInBits(Ty: ARTy) > SE->getTypeSizeInBits(Ty: MaxIterTy))
1275	MaxIter = SE->getZeroExtendExpr(Op: MaxIter, Ty: ARTy);
1276	else if (SE->getTypeSizeInBits(Ty: ARTy) < SE->getTypeSizeInBits(Ty: MaxIterTy)) {
1277	const SCEV *MinusOne = SE->getMinusOne(Ty: ARTy);
1278	auto *MaxAllowedIter = SE->getZeroExtendExpr(Op: MinusOne, Ty: MaxIterTy);
1279	if (SE->isKnownPredicateAt(Pred: ICmpInst::ICMP_ULE, LHS: MaxIter, RHS: MaxAllowedIter, CtxI: BI))
1280	MaxIter = SE->getTruncateExpr(Op: MaxIter, Ty: ARTy);
1281	}
1282
1283	if (SkipLastIter) {
1284	// Semantically skip last iter is "subtract 1, do not bother about unsigned
1285	// wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1286	// with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1287	// So we manually construct umin(a - 1, b - 1).
1288	SmallVector<const SCEV *, `4`> Elements;
1289	if (auto *UMin = dyn_cast<SCEVUMinExpr>(Val: MaxIter)) {
1290	for (auto *Op : UMin->operands())
1291	Elements.push_back(Elt: SE->getMinusSCEV(LHS: Op, RHS: SE->getOne(Ty: Op->getType())));
1292	MaxIter = SE->getUMinFromMismatchedTypes(Ops&: Elements);
1293	} else
1294	MaxIter = SE->getMinusSCEV(LHS: MaxIter, RHS: SE->getOne(Ty: MaxIter->getType()));
1295	}
1296
1297	// Check if there is a loop-invariant predicate equivalent to our check.
1298	auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHS: LHSS, RHS: RHSS,
1299	L, CtxI: BI, MaxIter);
1300	if (!LIP)
1301	return std::nullopt;
1302
1303	// Can we prove it to be trivially true?
1304	if (SE->isKnownPredicateAt(Pred: LIP ->Pred, LHS: LIP ->LHS, RHS: LIP ->RHS, CtxI: BI))
1305	return createFoldedExitCond(L, ExitingBB, /IsTaken/ false);
1306	else
1307	return createInvariantCond(L, ExitingBB, LIP: *LIP, Rewriter);
1308	}
1309
1310	static bool optimizeLoopExitWithUnknownExitCount(
1311	const Loop L, BranchInst BI, BasicBlock ExitingBB, const* SCEV *MaxIter,
1312	bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1313	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1314	assert(
1315	(L->contains(BI->getSuccessor(`0`)) != L->contains(BI->getSuccessor(`1`))) &&
1316	"Not a loop exit!");
1317
1318	// For branch that stays in loop by TRUE condition, go through AND. For branch
1319	// that stays in loop by FALSE condition, go through OR. Both gives the
1320	// similar logic: "stay in loop iff all conditions are true(false)".
1321	bool Inverted = L->contains(BB: BI->getSuccessor(i: `1`));
1322	SmallVector<ICmpInst *, `4`> LeafConditions;
1323	SmallVector<Value *, `4`> Worklist;
1324	SmallPtrSet<Value *, `4`> Visited;
1325	Value *OldCond = BI->getCondition();
1326	Visited.insert(Ptr: OldCond);
1327	Worklist.push_back(Elt: OldCond);
1328
1329	auto GoThrough = [&](Value *V) {
1330	Value LHS = nullptr, RHS = nullptr;
1331	if (Inverted) {
1332	if (!match(V, P: m_LogicalOr(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1333	return false;
1334	} else {
1335	if (!match(V, P: m_LogicalAnd(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1336	return false;
1337	}
1338	if (Visited.insert(Ptr: LHS).second)
1339	Worklist.push_back(Elt: LHS);
1340	if (Visited.insert(Ptr: RHS).second)
1341	Worklist.push_back(Elt: RHS);
1342	return true;
1343	};
1344
1345	do {
1346	Value *Curr = Worklist.pop_back_val();
1347	// Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1348	// those with one use, to avoid instruction duplication.
1349	if (Curr->hasOneUse())
1350	if (!GoThrough (Curr))
1351	if (auto *ICmp = dyn_cast<ICmpInst>(Val: Curr))
1352	LeafConditions.push_back(Elt: ICmp);
1353	} while (!Worklist.empty());
1354
1355	// If the current basic block has the same exit count as the whole loop, and
1356	// it consists of multiple icmp's, try to collect all icmp's that give exact
1357	// same exit count. For all other icmp's, we could use one less iteration,
1358	// because their value on the last iteration doesn't really matter.
1359	SmallPtrSet<ICmpInst *, `4`> ICmpsFailingOnLastIter;
1360	if (!SkipLastIter && LeafConditions.size() > `1` &&
1361	SE->getExitCount(L, ExitingBlock: ExitingBB,
1362	Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1363	MaxIter)
1364	for (auto *ICmp : LeafConditions) {
1365	auto EL = SE->computeExitLimitFromCond(L, ExitCond: ICmp, ExitIfTrue: Inverted,
1366	/ControlsExit/ ControlsOnlyExit: false);
1367	auto *ExitMax = EL.SymbolicMaxNotTaken;
1368	if (isa<SCEVCouldNotCompute>(Val: ExitMax))
1369	continue;
1370	// They could be of different types (specifically this happens after
1371	// IV widening).
1372	auto *WiderType =
1373	SE->getWiderType(Ty1: ExitMax->getType(), Ty2: MaxIter->getType());
1374	auto *WideExitMax = SE->getNoopOrZeroExtend(V: ExitMax, Ty: WiderType);
1375	auto *WideMaxIter = SE->getNoopOrZeroExtend(V: MaxIter, Ty: WiderType);
1376	if (WideExitMax == WideMaxIter)
1377	ICmpsFailingOnLastIter.insert(Ptr: ICmp);
1378	}
1379
1380	bool Changed = false;
1381	for (auto *OldCond : LeafConditions) {
1382	// Skip last iteration for this icmp under one of two conditions:
1383	// - We do it for all conditions;
1384	// - There is another ICmp that would fail on last iter, so this one doesn't
1385	// really matter.
1386	bool OptimisticSkipLastIter = SkipLastIter;
1387	if (!OptimisticSkipLastIter) {
1388	if (ICmpsFailingOnLastIter.size() > `1`)
1389	OptimisticSkipLastIter = true;
1390	else if (ICmpsFailingOnLastIter.size() == `1`)
1391	OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(Ptr: OldCond);
1392	}
1393	if (auto Replaced =
1394	createReplacement(ICmp: OldCond, L, ExitingBB, MaxIter, Inverted,
1395	SkipLastIter: OptimisticSkipLastIter, SE, Rewriter)) {
1396	Changed = true;
1397	auto NewCond = Replaced;
1398	if (auto *NCI = dyn_cast<Instruction>(Val: NewCond)) {
1399	NCI->setName(OldCond->getName() + ".first_iter");
1400	}
1401	LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1402	<< " with " << *NewCond << "\n");
1403	assert(OldCond->hasOneUse() && "Must be!");
1404	OldCond->replaceAllUsesWith(V: NewCond);
1405	DeadInsts.push_back(Elt: OldCond);
1406	// Make sure we no longer consider this condition as failing on last
1407	// iteration.
1408	ICmpsFailingOnLastIter.erase(Ptr: OldCond);
1409	}
1410	}
1411	return Changed;
1412	}
1413
1414	bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1415	// Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1416	// We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1417	// never reaches the icmp since the zext doesn't fold to an AddRec unless
1418	// it already has flags. The alternative to this would be to extending the
1419	// set of "interesting" IV users to include the icmp, but doing that
1420	// regresses results in practice by querying SCEVs before trip counts which
1421	// rely on them which results in SCEV caching sub-optimal answers. The
1422	// concern about caching sub-optimal results is why we only query SCEVs of
1423	// the loop invariant RHS here.
1424	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1425	L->getExitingBlocks(ExitingBlocks);
1426	bool Changed = false;
1427	for (auto *ExitingBB : ExitingBlocks) {
1428	auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1429	if (!BI)
1430	continue;
1431	assert(BI->isConditional() && "exit branch must be conditional");
1432
1433	auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1434	if (!ICmp \|\| !ICmp->hasOneUse())
1435	continue;
1436
1437	auto *LHS = ICmp->getOperand(i_nocapture: `0`);
1438	auto *RHS = ICmp->getOperand(i_nocapture: `1`);
1439	// For the range reasoning, avoid computing SCEVs in the loop to avoid
1440	// poisoning cache with sub-optimal results. For the must-execute case,
1441	// this is a neccessary precondition for correctness.
1442	if (!L->isLoopInvariant(V: RHS)) {
1443	if (!L->isLoopInvariant(V: LHS))
1444	continue;
1445	// Same logic applies for the inverse case
1446	std::swap(a&: LHS, b&: RHS);
1447	}
1448
1449	// Match (icmp signed-cond zext, RHS)
1450	Value LHSOp = nullptr*;
1451	if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))) \|\| !ICmp->isSigned())
1452	continue;
1453
1454	const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1455	const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1456	const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1457	auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1458	FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1459	auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1460	if (FullCR.contains(CR: RHSCR)) {
1461	// We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1462	// replace the signed condition with the unsigned version.
1463	ICmp->setPredicate(ICmp->getUnsignedPredicate());
1464	Changed = true;
1465	// Note: No SCEV invalidation needed. We've changed the predicate, but
1466	// have not changed exit counts, or the values produced by the compare.
1467	continue;
1468	}
1469	}
1470
1471	// Now that we've canonicalized the condition to match the extend,
1472	// see if we can rotate the extend out of the loop.
1473	for (auto *ExitingBB : ExitingBlocks) {
1474	auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1475	if (!BI)
1476	continue;
1477	assert(BI->isConditional() && "exit branch must be conditional");
1478
1479	auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1480	if (!ICmp \|\| !ICmp->hasOneUse() \|\| !ICmp->isUnsigned())
1481	continue;
1482
1483	bool Swapped = false;
1484	auto *LHS = ICmp->getOperand(i_nocapture: `0`);
1485	auto *RHS = ICmp->getOperand(i_nocapture: `1`);
1486	if (L->isLoopInvariant(V: LHS) == L->isLoopInvariant(V: RHS))
1487	// Nothing to rotate
1488	continue;
1489	if (L->isLoopInvariant(V: LHS)) {
1490	// Same logic applies for the inverse case until we actually pick
1491	// which operand of the compare to update.
1492	Swapped = true;
1493	std::swap(a&: LHS, b&: RHS);
1494	}
1495	assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1496
1497	// Match (icmp unsigned-cond zext, RHS)
1498	// TODO: Extend to handle corresponding sext/signed-cmp case
1499	// TODO: Extend to other invertible functions
1500	Value LHSOp = nullptr*;
1501	if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))))
1502	continue;
1503
1504	// In general, we only rotate if we can do so without increasing the number
1505	// of instructions. The exception is when we have an zext(add-rec). The
1506	// reason for allowing this exception is that we know we need to get rid
1507	// of the zext for SCEV to be able to compute a trip count for said loops;
1508	// we consider the new trip count valuable enough to increase instruction
1509	// count by one.
1510	if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: LHSOp)))
1511	continue;
1512
1513	// Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1514	// replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1515	// when zext is loop varying and RHS is loop invariant. This converts
1516	// loop varying work to loop-invariant work.
1517	auto doRotateTransform = [&]() {
1518	assert(ICmp->isUnsigned() && "must have proven unsigned already");
1519	auto *NewRHS =
1520	CastInst::Create(Instruction::Trunc, S: RHS, Ty: LHSOp->getType(), Name: "",
1521	InsertBefore: L->getLoopPreheader()->getTerminator());
1522	ICmp->setOperand(i_nocapture: Swapped ? `1` : `0`, Val_nocapture: LHSOp);
1523	ICmp->setOperand(i_nocapture: Swapped ? `0` : `1`, Val_nocapture: NewRHS);
1524	if (LHS->use_empty())
1525	DeadInsts.push_back(Elt: LHS);
1526	};
1527
1528
1529	const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1530	const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1531	const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1532	auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1533	FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1534	auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1535	if (FullCR.contains(CR: RHSCR)) {
1536	doRotateTransform ();
1537	Changed = true;
1538	// Note, we are leaving SCEV in an unfortunately imprecise case here
1539	// as rotation tends to reveal information about trip counts not
1540	// previously visible.
1541	continue;
1542	}
1543	}
1544
1545	return Changed;
1546	}
1547
1548	bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1549	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1550	L->getExitingBlocks(ExitingBlocks);
1551
1552	// Remove all exits which aren't both rewriteable and execute on every
1553	// iteration.
1554	llvm::erase_if(C&: ExitingBlocks, P: [&](BasicBlock *ExitingBB) {
1555	// If our exitting block exits multiple loops, we can only rewrite the
1556	// innermost one. Otherwise, we're changing how many times the innermost
1557	// loop runs before it exits.
1558	if (LI->getLoopFor(BB: ExitingBB) != L)
1559	return true;
1560
1561	// Can't rewrite non-branch yet.
1562	BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1563	if (!BI)
1564	return true;
1565
1566	// Likewise, the loop latch must be dominated by the exiting BB.
1567	if (!DT->dominates(A: ExitingBB, B: L->getLoopLatch()))
1568	return true;
1569
1570	if (auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1571	// If already constant, nothing to do. However, if this is an
1572	// unconditional exit, we can still replace header phis with their
1573	// preheader value.
1574	if (!L->contains(BB: BI->getSuccessor(i: CI->isNullValue())))
1575	replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1576	return true;
1577	}
1578
1579	return false;
1580	});
1581
1582	if (ExitingBlocks.empty())
1583	return false;
1584
1585	// Get a symbolic upper bound on the loop backedge taken count.
1586	const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1587	if (isa<SCEVCouldNotCompute>(Val: MaxBECount))
1588	return false;
1589
1590	// Visit our exit blocks in order of dominance. We know from the fact that
1591	// all exits must dominate the latch, so there is a total dominance order
1592	// between them.
1593	llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock A, BasicBlock B) {
1594	// std::sort sorts in ascending order, so we want the inverse of
1595	// the normal dominance relation.
1596	if (A == B) return false;
1597	if (DT->properlyDominates(A, B))
1598	return true;
1599	else {
1600	assert(DT->properlyDominates(B, A) &&
1601	"expected total dominance order!");
1602	return false;
1603	}
1604	});
1605	#ifdef ASSERT
1606	for (unsigned i = `1`; i < ExitingBlocks.size(); i++) {
1607	assert(DT->dominates(ExitingBlocks[i-`1`], ExitingBlocks[i]));
1608	}
1609	#endif
1610
1611	bool Changed = false;
1612	bool SkipLastIter = false;
1613	const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1614	auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1615	if (SkipLastIter \|\| isa<SCEVCouldNotCompute>(Val: MaxExitCount))
1616	return;
1617	if (isa<SCEVCouldNotCompute>(Val: CurrMaxExit))
1618	CurrMaxExit = MaxExitCount;
1619	else
1620	CurrMaxExit = SE->getUMinFromMismatchedTypes(LHS: CurrMaxExit, RHS: MaxExitCount);
1621	// If the loop has more than 1 iteration, all further checks will be
1622	// executed 1 iteration less.
1623	if (CurrMaxExit == MaxBECount)
1624	SkipLastIter = true;
1625	};
1626	SmallSet<const SCEV *, `8`> DominatingExactExitCounts;
1627	for (BasicBlock *ExitingBB : ExitingBlocks) {
1628	const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1629	const SCEV *MaxExitCount = SE->getExitCount(
1630	L, ExitingBlock: ExitingBB, Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum);
1631	if (isa<SCEVCouldNotCompute>(Val: ExactExitCount)) {
1632	// Okay, we do not know the exit count here. Can we at least prove that it
1633	// will remain the same within iteration space?
1634	auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1635	auto OptimizeCond = [&](bool SkipLastIter) {
1636	return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1637	MaxIter: MaxBECount, SkipLastIter,
1638	SE, Rewriter, DeadInsts);
1639	};
1640
1641	// TODO: We might have proved that we can skip the last iteration for
1642	// this check. In this case, we only want to check the condition on the
1643	// pre-last iteration (MaxBECount - 1). However, there is a nasty
1644	// corner case:
1645	//
1646	// for (i = len; i != 0; i--) { ... check (i ult X) ... }
1647	//
1648	// If we could not prove that len != 0, then we also could not prove that
1649	// (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1650	// OptimizeCond will likely not prove anything for it, even if it could
1651	// prove the same fact for len.
1652	//
1653	// As a temporary solution, we query both last and pre-last iterations in
1654	// hope that we will be able to prove triviality for at least one of
1655	// them. We can stop querying MaxBECount for this case once SCEV
1656	// understands that (MaxBECount - 1) will not overflow here.
1657	if (OptimizeCond (false))
1658	Changed = true;
1659	else if (SkipLastIter && OptimizeCond (true))
1660	Changed = true;
1661	UpdateSkipLastIter (MaxExitCount);
1662	continue;
1663	}
1664
1665	UpdateSkipLastIter (ExactExitCount);
1666
1667	// If we know we'd exit on the first iteration, rewrite the exit to
1668	// reflect this. This does not imply the loop must exit through this
1669	// exit; there may be an earlier one taken on the first iteration.
1670	// We know that the backedge can't be taken, so we replace all
1671	// the header PHIs with values coming from the preheader.
1672	if (ExactExitCount->isZero()) {
1673	foldExit(L, ExitingBB, IsTaken: true, DeadInsts);
1674	replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1675	Changed = true;
1676	continue;
1677	}
1678
1679	assert(ExactExitCount->getType()->isIntegerTy() &&
1680	MaxBECount->getType()->isIntegerTy() &&
1681	"Exit counts must be integers");
1682
1683	Type *WiderType =
1684	SE->getWiderType(Ty1: MaxBECount->getType(), Ty2: ExactExitCount->getType());
1685	ExactExitCount = SE->getNoopOrZeroExtend(V: ExactExitCount, Ty: WiderType);
1686	MaxBECount = SE->getNoopOrZeroExtend(V: MaxBECount, Ty: WiderType);
1687	assert(MaxBECount->getType() == ExactExitCount->getType());
1688
1689	// Can we prove that some other exit must be taken strictly before this
1690	// one?
1691	if (SE->isLoopEntryGuardedByCond(L, Pred: CmpInst::ICMP_ULT, LHS: MaxBECount,
1692	RHS: ExactExitCount)) {
1693	foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1694	Changed = true;
1695	continue;
1696	}
1697
1698	// As we run, keep track of which exit counts we've encountered. If we
1699	// find a duplicate, we've found an exit which would have exited on the
1700	// exiting iteration, but (from the visit order) strictly follows another
1701	// which does the same and is thus dead.
1702	if (!DominatingExactExitCounts.insert(Ptr: ExactExitCount).second) {
1703	foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1704	Changed = true;
1705	continue;
1706	}
1707
1708	// TODO: There might be another oppurtunity to leverage SCEV's reasoning
1709	// here. If we kept track of the min of dominanting exits so far, we could
1710	// discharge exits with EC >= MDEC. This is less powerful than the existing
1711	// transform (since later exits aren't considered), but potentially more
1712	// powerful for any case where SCEV can prove a >=u b, but neither a == b
1713	// or a >u b. Such a case is not currently known.
1714	}
1715	return Changed;
1716	}
1717
1718	bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1719	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1720	L->getExitingBlocks(ExitingBlocks);
1721
1722	// Finally, see if we can rewrite our exit conditions into a loop invariant
1723	// form. If we have a read-only loop, and we can tell that we must exit down
1724	// a path which does not need any of the values computed within the loop, we
1725	// can rewrite the loop to exit on the first iteration. Note that this
1726	// doesn't either a) tell us the loop exits on the first iteration (unless
1727	// all* exits are predicateable) or b) tell us which exit might be taken.*
1728	// This transformation looks a lot like a restricted form of dead loop
1729	// elimination, but restricted to read-only loops and without neccesssarily
1730	// needing to kill the loop entirely.
1731	if (!LoopPredication)
1732	return false;
1733
1734	// Note: ExactBTC is the exact backedge taken count iff* the loop exits*
1735	// through explicit* control flow. We have to eliminate the possibility of*
1736	// implicit exits (see below) before we know it's truly exact.
1737	const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1738	if (isa<SCEVCouldNotCompute>(Val: ExactBTC) \|\| !Rewriter.isSafeToExpand(S: ExactBTC))
1739	return false;
1740
1741	assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1742	assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1743
1744	auto BadExit = [&](BasicBlock *ExitingBB) {
1745	// If our exiting block exits multiple loops, we can only rewrite the
1746	// innermost one. Otherwise, we're changing how many times the innermost
1747	// loop runs before it exits.
1748	if (LI->getLoopFor(BB: ExitingBB) != L)
1749	return true;
1750
1751	// Can't rewrite non-branch yet.
1752	BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1753	if (!BI)
1754	return true;
1755
1756	// If already constant, nothing to do.
1757	if (isa<Constant>(Val: BI->getCondition()))
1758	return true;
1759
1760	// If the exit block has phis, we need to be able to compute the values
1761	// within the loop which contains them. This assumes trivially lcssa phis
1762	// have already been removed; TODO: generalize
1763	BasicBlock *ExitBlock =
1764	BI->getSuccessor(i: L->contains(BB: BI->getSuccessor(i: `0`)) ? `1` : `0`);
1765	if (!ExitBlock->phis().empty())
1766	return true;
1767
1768	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1769	if (isa<SCEVCouldNotCompute>(Val: ExitCount) \|\|
1770	!Rewriter.isSafeToExpand(S: ExitCount))
1771	return true;
1772
1773	assert(SE->isLoopInvariant(ExitCount, L) &&
1774	"Exit count must be loop invariant");
1775	assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1776	return false;
1777	};
1778
1779	// If we have any exits which can't be predicated themselves, than we can't
1780	// predicate any exit which isn't guaranteed to execute before it. Consider
1781	// two exits (a) and (b) which would both exit on the same iteration. If we
1782	// can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1783	// we could convert a loop from exiting through (a) to one exiting through
1784	// (b). Note that this problem exists only for exits with the same exit
1785	// count, and we could be more aggressive when exit counts are known inequal.
1786	llvm::sort(C&: ExitingBlocks,
1787	Comp: [&](BasicBlock A, BasicBlock B) {
1788	// std::sort sorts in ascending order, so we want the inverse of
1789	// the normal dominance relation, plus a tie breaker for blocks
1790	// unordered by dominance.
1791	if (DT->properlyDominates(A, B)) return true;
1792	if (DT->properlyDominates(A: B, B: A)) return false;
1793	return A->getName() < B->getName();
1794	});
1795	// Check to see if our exit blocks are a total order (i.e. a linear chain of
1796	// exits before the backedge). If they aren't, reasoning about reachability
1797	// is complicated and we choose not to for now.
1798	for (unsigned i = `1`; i < ExitingBlocks.size(); i++)
1799	if (!DT->dominates(A: ExitingBlocks [i-`1`], B: ExitingBlocks [i]))
1800	return false;
1801
1802	// Given our sorted total order, we know that exit[j] must be evaluated
1803	// after all exit[i] such j > i.
1804	for (unsigned i = `0`, e = ExitingBlocks.size(); i < e; i++)
1805	if (BadExit (ExitingBlocks [i])) {
1806	ExitingBlocks.resize(N: i);
1807	break;
1808	}
1809
1810	if (ExitingBlocks.empty())
1811	return false;
1812
1813	// We rely on not being able to reach an exiting block on a later iteration
1814	// then it's statically compute exit count. The implementaton of
1815	// getExitCount currently has this invariant, but assert it here so that
1816	// breakage is obvious if this ever changes..
1817	assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1818	return DT->dominates(ExitingBB, L->getLoopLatch());
1819	}));
1820
1821	// At this point, ExitingBlocks consists of only those blocks which are
1822	// predicatable. Given that, we know we have at least one exit we can
1823	// predicate if the loop is doesn't have side effects and doesn't have any
1824	// implicit exits (because then our exact BTC isn't actually exact).
1825	// @Reviewers - As structured, this is O(I^2) for loop nests. Any
1826	// suggestions on how to improve this? I can obviously bail out for outer
1827	// loops, but that seems less than ideal. MemorySSA can find memory writes,
1828	// is that enough for all* side effects?*
1829	for (BasicBlock *BB : L->blocks())
1830	for (auto &I : *BB)
1831	// TODO:isGuaranteedToTransfer
1832	if (I.mayHaveSideEffects())
1833	return false;
1834
1835	bool Changed = false;
1836	// Finally, do the actual predication for all predicatable blocks. A couple
1837	// of notes here:
1838	// 1) We don't bother to constant fold dominated exits with identical exit
1839	// counts; that's simply a form of CSE/equality propagation and we leave
1840	// it for dedicated passes.
1841	// 2) We insert the comparison at the branch. Hoisting introduces additional
1842	// legality constraints and we leave that to dedicated logic. We want to
1843	// predicate even if we can't insert a loop invariant expression as
1844	// peeling or unrolling will likely reduce the cost of the otherwise loop
1845	// varying check.
1846	Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1847	IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1848	Value ExactBTCV = nullptr; // Lazily generated if needed.*
1849	for (BasicBlock *ExitingBB : ExitingBlocks) {
1850	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1851
1852	auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1853	Value *NewCond;
1854	if (ExitCount == ExactBTC) {
1855	NewCond = L->contains(BB: BI->getSuccessor(i: `0`)) ?
1856	B.getFalse() : B.getTrue();
1857	} else {
1858	Value *ECV = Rewriter.expandCodeFor(SH: ExitCount);
1859	if (!ExactBTCV)
1860	ExactBTCV = Rewriter.expandCodeFor(SH: ExactBTC);
1861	Value *RHS = ExactBTCV;
1862	if (ECV->getType() != RHS->getType()) {
1863	Type *WiderTy = SE->getWiderType(Ty1: ECV->getType(), Ty2: RHS->getType());
1864	ECV = B.CreateZExt(V: ECV, DestTy: WiderTy);
1865	RHS = B.CreateZExt(V: RHS, DestTy: WiderTy);
1866	}
1867	auto Pred = L->contains(BB: BI->getSuccessor(i: `0`)) ?
1868	ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1869	NewCond = B.CreateICmp(P: Pred, LHS: ECV, RHS);
1870	}
1871	Value *OldCond = BI->getCondition();
1872	BI->setCondition(NewCond);
1873	if (OldCond->use_empty())
1874	DeadInsts.emplace_back(Args&: OldCond);
1875	Changed = true;
1876	}
1877
1878	return Changed;
1879	}
1880
1881	//===----------------------------------------------------------------------===//
1882	// IndVarSimplify driver. Manage several subpasses of IV simplification.
1883	//===----------------------------------------------------------------------===//
1884
1885	bool IndVarSimplify::run(Loop *L) {
1886	// We need (and expect!) the incoming loop to be in LCSSA.
1887	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
1888	"LCSSA required to run indvars!");
1889
1890	// If LoopSimplify form is not available, stay out of trouble. Some notes:
1891	// - LSR currently only supports LoopSimplify-form loops. Indvars'
1892	// canonicalization can be a pessimization without LSR to "clean up"
1893	// afterwards.
1894	// - We depend on having a preheader; in particular,
1895	// Loop::getCanonicalInductionVariable only supports loops with preheaders,
1896	// and we're in trouble if we can't find the induction variable even when
1897	// we've manually inserted one.
1898	// - LFTR relies on having a single backedge.
1899	if (!L->isLoopSimplifyForm())
1900	return false;
1901
1902	bool Changed = false;
1903	// If there are any floating-point recurrences, attempt to
1904	// transform them to use integer recurrences.
1905	Changed \|= rewriteNonIntegerIVs(L);
1906
1907	// Create a rewriter object which we'll use to transform the code with.
1908	SCEVExpander Rewriter(*SE, DL, "indvars");
1909	#ifndef NDEBUG
1910	Rewriter.setDebugType(DEBUG_TYPE);
1911	#endif
1912
1913	// Eliminate redundant IV users.
1914	//
1915	// Simplification works best when run before other consumers of SCEV. We
1916	// attempt to avoid evaluating SCEVs for sign/zero extend operations until
1917	// other expressions involving loop IVs have been evaluated. This helps SCEV
1918	// set no-wrap flags before normalizing sign/zero extension.
1919	Rewriter.disableCanonicalMode();
1920	Changed \|= simplifyAndExtend(L, Rewriter, LI);
1921
1922	// Check to see if we can compute the final value of any expressions
1923	// that are recurrent in the loop, and substitute the exit values from the
1924	// loop into any instructions outside of the loop that use the final values
1925	// of the current expressions.
1926	if (ReplaceExitValue != NeverRepl) {
1927	if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1928	ReplaceExitValue, DeadInsts)) {
1929	NumReplaced += Rewrites;
1930	Changed = true;
1931	}
1932	}
1933
1934	// Eliminate redundant IV cycles.
1935	NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1936
1937	// Try to convert exit conditions to unsigned and rotate computation
1938	// out of the loop. Note: Handles invalidation internally if needed.
1939	Changed \|= canonicalizeExitCondition(L);
1940
1941	// Try to eliminate loop exits based on analyzeable exit counts
1942	if (optimizeLoopExits(L, Rewriter)) {
1943	Changed = true;
1944	// Given we've changed exit counts, notify SCEV
1945	// Some nested loops may share same folded exit basic block,
1946	// thus we need to notify top most loop.
1947	SE->forgetTopmostLoop(L);
1948	}
1949
1950	// Try to form loop invariant tests for loop exits by changing how many
1951	// iterations of the loop run when that is unobservable.
1952	if (predicateLoopExits(L, Rewriter)) {
1953	Changed = true;
1954	// Given we've changed exit counts, notify SCEV
1955	SE->forgetLoop(L);
1956	}
1957
1958	// If we have a trip count expression, rewrite the loop's exit condition
1959	// using it.
1960	if (!DisableLFTR) {
1961	BasicBlock *PreHeader = L->getLoopPreheader();
1962
1963	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1964	L->getExitingBlocks(ExitingBlocks);
1965	for (BasicBlock *ExitingBB : ExitingBlocks) {
1966	// Can't rewrite non-branch yet.
1967	if (!isa<BranchInst>(Val: ExitingBB->getTerminator()))
1968	continue;
1969
1970	// If our exitting block exits multiple loops, we can only rewrite the
1971	// innermost one. Otherwise, we're changing how many times the innermost
1972	// loop runs before it exits.
1973	if (LI->getLoopFor(BB: ExitingBB) != L)
1974	continue;
1975
1976	if (!needsLFTR(L, ExitingBB))
1977	continue;
1978
1979	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1980	if (isa<SCEVCouldNotCompute>(Val: ExitCount))
1981	continue;
1982
1983	// This was handled above, but as we form SCEVs, we can sometimes refine
1984	// existing ones; this allows exit counts to be folded to zero which
1985	// weren't when optimizeLoopExits saw them. Arguably, we should iterate
1986	// until stable to handle cases like this better.
1987	if (ExitCount->isZero())
1988	continue;
1989
1990	PHINode *IndVar = FindLoopCounter(L, ExitingBB, BECount: ExitCount, SE, DT);
1991	if (!IndVar)
1992	continue;
1993
1994	// Avoid high cost expansions. Note: This heuristic is questionable in
1995	// that our definition of "high cost" is not exactly principled.
1996	if (Rewriter.isHighCostExpansion(Exprs: ExitCount, L, Budget: SCEVCheapExpansionBudget,
1997	TTI, At: PreHeader->getTerminator()))
1998	continue;
1999
2000	if (!Rewriter.isSafeToExpand(S: ExitCount))
2001	continue;
2002
2003	Changed \|= linearFunctionTestReplace(L, ExitingBB,
2004	ExitCount, IndVar,
2005	Rewriter);
2006	}
2007	}
2008	// Clear the rewriter cache, because values that are in the rewriter's cache
2009	// can be deleted in the loop below, causing the AssertingVH in the cache to
2010	// trigger.
2011	Rewriter.clear();
2012
2013	// Now that we're done iterating through lists, clean up any instructions
2014	// which are now dead.
2015	while (!DeadInsts.empty()) {
2016	Value *V = DeadInsts.pop_back_val();
2017
2018	if (PHINode *PHI = dyn_cast_or_null<PHINode>(Val: V))
2019	Changed \|= RecursivelyDeleteDeadPHINode(PN: PHI, TLI, MSSAU: MSSAU.get());
2020	else if (Instruction *Inst = dyn_cast_or_null<Instruction>(Val: V))
2021	Changed \|=
2022	RecursivelyDeleteTriviallyDeadInstructions(V: Inst, TLI, MSSAU: MSSAU.get());
2023	}
2024
2025	// The Rewriter may not be used from this point on.
2026
2027	// Loop-invariant instructions in the preheader that aren't used in the
2028	// loop may be sunk below the loop to reduce register pressure.
2029	Changed \|= sinkUnusedInvariants(L);
2030
2031	// rewriteFirstIterationLoopExitValues does not rely on the computation of
2032	// trip count and therefore can further simplify exit values in addition to
2033	// rewriteLoopExitValues.
2034	Changed \|= rewriteFirstIterationLoopExitValues(L);
2035
2036	// Clean up dead instructions.
2037	Changed \|= DeleteDeadPHIs(BB: L->getHeader(), TLI, MSSAU: MSSAU.get());
2038
2039	// Check a post-condition.
2040	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
2041	"Indvars did not preserve LCSSA!");
2042	if (VerifyMemorySSA && MSSAU)
2043	MSSAU ->getMemorySSA()->verifyMemorySSA();
2044
2045	return Changed;
2046	}
2047
2048	PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2049	LoopStandardAnalysisResults &AR,
2050	LPMUpdater &) {
2051	Function *F = L.getHeader()->getParent();
2052	const DataLayout &DL = F->getParent()->getDataLayout();
2053
2054	IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2055	WidenIndVars && AllowIVWidening);
2056	if (!IVS.run(L: &L))
2057	return PreservedAnalyses::all();
2058
2059	auto PA = getLoopPassPreservedAnalyses();
2060	PA.preserveSet<CFGAnalyses>();
2061	if (AR.MSSA)
2062	PA.preserve<MemorySSAAnalysis>();
2063	return PA;
2064	}
2065

source code of llvm/lib/Transforms/Scalar/IndVarSimplify.cpp