1//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Correlated Value Propagation pass.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
14#include "llvm/ADT/DepthFirstIterator.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/Analysis/DomTreeUpdater.h"
18#include "llvm/Analysis/GlobalsModRef.h"
19#include "llvm/Analysis/InstructionSimplify.h"
20#include "llvm/Analysis/LazyValueInfo.h"
21#include "llvm/Analysis/ValueTracking.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/ConstantRange.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstrTypes.h"
32#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Instructions.h"
34#include "llvm/IR/IntrinsicInst.h"
35#include "llvm/IR/Operator.h"
36#include "llvm/IR/PassManager.h"
37#include "llvm/IR/Type.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Transforms/Utils/Local.h"
42#include <cassert>
43#include <optional>
44#include <utility>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "correlated-value-propagation"
49
50STATISTIC(NumPhis, "Number of phis propagated");
51STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
52STATISTIC(NumSelects, "Number of selects propagated");
53STATISTIC(NumCmps, "Number of comparisons propagated");
54STATISTIC(NumReturns, "Number of return values propagated");
55STATISTIC(NumDeadCases, "Number of switch cases removed");
56STATISTIC(NumSDivSRemsNarrowed,
57 "Number of sdivs/srems whose width was decreased");
58STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
59STATISTIC(NumUDivURemsNarrowed,
60 "Number of udivs/urems whose width was decreased");
61STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr");
62STATISTIC(NumAShrsRemoved, "Number of ashr removed");
63STATISTIC(NumSRems, "Number of srem converted to urem");
64STATISTIC(NumSExt, "Number of sext converted to zext");
65STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned");
66STATISTIC(NumAnd, "Number of ands removed");
67STATISTIC(NumNW, "Number of no-wrap deductions");
68STATISTIC(NumNSW, "Number of no-signed-wrap deductions");
69STATISTIC(NumNUW, "Number of no-unsigned-wrap deductions");
70STATISTIC(NumAddNW, "Number of no-wrap deductions for add");
71STATISTIC(NumAddNSW, "Number of no-signed-wrap deductions for add");
72STATISTIC(NumAddNUW, "Number of no-unsigned-wrap deductions for add");
73STATISTIC(NumSubNW, "Number of no-wrap deductions for sub");
74STATISTIC(NumSubNSW, "Number of no-signed-wrap deductions for sub");
75STATISTIC(NumSubNUW, "Number of no-unsigned-wrap deductions for sub");
76STATISTIC(NumMulNW, "Number of no-wrap deductions for mul");
77STATISTIC(NumMulNSW, "Number of no-signed-wrap deductions for mul");
78STATISTIC(NumMulNUW, "Number of no-unsigned-wrap deductions for mul");
79STATISTIC(NumShlNW, "Number of no-wrap deductions for shl");
80STATISTIC(NumShlNSW, "Number of no-signed-wrap deductions for shl");
81STATISTIC(NumShlNUW, "Number of no-unsigned-wrap deductions for shl");
82STATISTIC(NumAbs, "Number of llvm.abs intrinsics removed");
83STATISTIC(NumOverflows, "Number of overflow checks removed");
84STATISTIC(NumSaturating,
85 "Number of saturating arithmetics converted to normal arithmetics");
86STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
87STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
88STATISTIC(NumSMinMax,
89 "Number of llvm.s{min,max} intrinsics simplified to unsigned");
90STATISTIC(NumUDivURemsNarrowedExpanded,
91 "Number of bound udiv's/urem's expanded");
92STATISTIC(NumZExt, "Number of non-negative deductions");
93
94static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
95 if (Constant *C = LVI->getConstant(V, CxtI: At))
96 return C;
97
98 // TODO: The following really should be sunk inside LVI's core algorithm, or
99 // at least the outer shims around such.
100 auto *C = dyn_cast<CmpInst>(Val: V);
101 if (!C)
102 return nullptr;
103
104 Value *Op0 = C->getOperand(i_nocapture: 0);
105 Constant *Op1 = dyn_cast<Constant>(Val: C->getOperand(i_nocapture: 1));
106 if (!Op1)
107 return nullptr;
108
109 LazyValueInfo::Tristate Result = LVI->getPredicateAt(
110 Pred: C->getPredicate(), V: Op0, C: Op1, CxtI: At, /*UseBlockValue=*/false);
111 if (Result == LazyValueInfo::Unknown)
112 return nullptr;
113
114 return (Result == LazyValueInfo::True)
115 ? ConstantInt::getTrue(Context&: C->getContext())
116 : ConstantInt::getFalse(Context&: C->getContext());
117}
118
119static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
120 if (S->getType()->isVectorTy() || isa<Constant>(Val: S->getCondition()))
121 return false;
122
123 bool Changed = false;
124 for (Use &U : make_early_inc_range(Range: S->uses())) {
125 auto *I = cast<Instruction>(Val: U.getUser());
126 Constant *C;
127 if (auto *PN = dyn_cast<PHINode>(Val: I))
128 C = LVI->getConstantOnEdge(V: S->getCondition(), FromBB: PN->getIncomingBlock(U),
129 ToBB: I->getParent(), CxtI: I);
130 else
131 C = getConstantAt(V: S->getCondition(), At: I, LVI);
132
133 auto *CI = dyn_cast_or_null<ConstantInt>(Val: C);
134 if (!CI)
135 continue;
136
137 U.set(CI->isOne() ? S->getTrueValue() : S->getFalseValue());
138 Changed = true;
139 ++NumSelects;
140 }
141
142 if (Changed && S->use_empty())
143 S->eraseFromParent();
144
145 return Changed;
146}
147
148/// Try to simplify a phi with constant incoming values that match the edge
149/// values of a non-constant value on all other edges:
150/// bb0:
151/// %isnull = icmp eq i8* %x, null
152/// br i1 %isnull, label %bb2, label %bb1
153/// bb1:
154/// br label %bb2
155/// bb2:
156/// %r = phi i8* [ %x, %bb1 ], [ null, %bb0 ]
157/// -->
158/// %r = %x
159static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI,
160 DominatorTree *DT) {
161 // Collect incoming constants and initialize possible common value.
162 SmallVector<std::pair<Constant *, unsigned>, 4> IncomingConstants;
163 Value *CommonValue = nullptr;
164 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
165 Value *Incoming = P->getIncomingValue(i);
166 if (auto *IncomingConstant = dyn_cast<Constant>(Val: Incoming)) {
167 IncomingConstants.push_back(Elt: std::make_pair(x&: IncomingConstant, y&: i));
168 } else if (!CommonValue) {
169 // The potential common value is initialized to the first non-constant.
170 CommonValue = Incoming;
171 } else if (Incoming != CommonValue) {
172 // There can be only one non-constant common value.
173 return false;
174 }
175 }
176
177 if (!CommonValue || IncomingConstants.empty())
178 return false;
179
180 // The common value must be valid in all incoming blocks.
181 BasicBlock *ToBB = P->getParent();
182 if (auto *CommonInst = dyn_cast<Instruction>(Val: CommonValue))
183 if (!DT->dominates(Def: CommonInst, BB: ToBB))
184 return false;
185
186 // We have a phi with exactly 1 variable incoming value and 1 or more constant
187 // incoming values. See if all constant incoming values can be mapped back to
188 // the same incoming variable value.
189 for (auto &IncomingConstant : IncomingConstants) {
190 Constant *C = IncomingConstant.first;
191 BasicBlock *IncomingBB = P->getIncomingBlock(i: IncomingConstant.second);
192 if (C != LVI->getConstantOnEdge(V: CommonValue, FromBB: IncomingBB, ToBB, CxtI: P))
193 return false;
194 }
195
196 // LVI only guarantees that the value matches a certain constant if the value
197 // is not poison. Make sure we don't replace a well-defined value with poison.
198 // This is usually satisfied due to a prior branch on the value.
199 if (!isGuaranteedNotToBePoison(V: CommonValue, AC: nullptr, CtxI: P, DT))
200 return false;
201
202 // All constant incoming values map to the same variable along the incoming
203 // edges of the phi. The phi is unnecessary.
204 P->replaceAllUsesWith(V: CommonValue);
205 P->eraseFromParent();
206 ++NumPhiCommon;
207 return true;
208}
209
210static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming,
211 BasicBlock *From, BasicBlock *To,
212 Instruction *CxtI) {
213 if (Constant *C = LVI->getConstantOnEdge(V: Incoming, FromBB: From, ToBB: To, CxtI))
214 return C;
215
216 // Look if the incoming value is a select with a scalar condition for which
217 // LVI can tells us the value. In that case replace the incoming value with
218 // the appropriate value of the select. This often allows us to remove the
219 // select later.
220 auto *SI = dyn_cast<SelectInst>(Val: Incoming);
221 if (!SI)
222 return nullptr;
223
224 // Once LVI learns to handle vector types, we could also add support
225 // for vector type constants that are not all zeroes or all ones.
226 Value *Condition = SI->getCondition();
227 if (!Condition->getType()->isVectorTy()) {
228 if (Constant *C = LVI->getConstantOnEdge(V: Condition, FromBB: From, ToBB: To, CxtI)) {
229 if (C->isOneValue())
230 return SI->getTrueValue();
231 if (C->isZeroValue())
232 return SI->getFalseValue();
233 }
234 }
235
236 // Look if the select has a constant but LVI tells us that the incoming
237 // value can never be that constant. In that case replace the incoming
238 // value with the other value of the select. This often allows us to
239 // remove the select later.
240
241 // The "false" case
242 if (auto *C = dyn_cast<Constant>(Val: SI->getFalseValue()))
243 if (LVI->getPredicateOnEdge(Pred: ICmpInst::ICMP_EQ, V: SI, C, FromBB: From, ToBB: To, CxtI) ==
244 LazyValueInfo::False)
245 return SI->getTrueValue();
246
247 // The "true" case,
248 // similar to the select "false" case, but try the select "true" value
249 if (auto *C = dyn_cast<Constant>(Val: SI->getTrueValue()))
250 if (LVI->getPredicateOnEdge(Pred: ICmpInst::ICMP_EQ, V: SI, C, FromBB: From, ToBB: To, CxtI) ==
251 LazyValueInfo::False)
252 return SI->getFalseValue();
253
254 return nullptr;
255}
256
257static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
258 const SimplifyQuery &SQ) {
259 bool Changed = false;
260
261 BasicBlock *BB = P->getParent();
262 for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
263 Value *Incoming = P->getIncomingValue(i);
264 if (isa<Constant>(Val: Incoming)) continue;
265
266 Value *V = getValueOnEdge(LVI, Incoming, From: P->getIncomingBlock(i), To: BB, CxtI: P);
267 if (V) {
268 P->setIncomingValue(i, V);
269 Changed = true;
270 }
271 }
272
273 if (Value *V = simplifyInstruction(I: P, Q: SQ)) {
274 P->replaceAllUsesWith(V);
275 P->eraseFromParent();
276 Changed = true;
277 }
278
279 if (!Changed)
280 Changed = simplifyCommonValuePhi(P, LVI, DT);
281
282 if (Changed)
283 ++NumPhis;
284
285 return Changed;
286}
287
288static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
289 // Only for signed relational comparisons of scalar integers.
290 if (Cmp->getType()->isVectorTy() ||
291 !Cmp->getOperand(i_nocapture: 0)->getType()->isIntegerTy())
292 return false;
293
294 if (!Cmp->isSigned())
295 return false;
296
297 ICmpInst::Predicate UnsignedPred =
298 ConstantRange::getEquivalentPredWithFlippedSignedness(
299 Pred: Cmp->getPredicate(),
300 CR1: LVI->getConstantRangeAtUse(U: Cmp->getOperandUse(i: 0),
301 /*UndefAllowed*/ true),
302 CR2: LVI->getConstantRangeAtUse(U: Cmp->getOperandUse(i: 1),
303 /*UndefAllowed*/ true));
304
305 if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE)
306 return false;
307
308 ++NumSICmps;
309 Cmp->setPredicate(UnsignedPred);
310
311 return true;
312}
313
314/// See if LazyValueInfo's ability to exploit edge conditions or range
315/// information is sufficient to prove this comparison. Even for local
316/// conditions, this can sometimes prove conditions instcombine can't by
317/// exploiting range information.
318static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
319 Value *Op0 = Cmp->getOperand(i_nocapture: 0);
320 Value *Op1 = Cmp->getOperand(i_nocapture: 1);
321 LazyValueInfo::Tristate Result =
322 LVI->getPredicateAt(Pred: Cmp->getPredicate(), LHS: Op0, RHS: Op1, CxtI: Cmp,
323 /*UseBlockValue=*/true);
324 if (Result == LazyValueInfo::Unknown)
325 return false;
326
327 ++NumCmps;
328 Constant *TorF =
329 ConstantInt::get(Ty: CmpInst::makeCmpResultType(opnd_type: Op0->getType()), V: Result);
330 Cmp->replaceAllUsesWith(V: TorF);
331 Cmp->eraseFromParent();
332 return true;
333}
334
335static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
336 if (constantFoldCmp(Cmp, LVI))
337 return true;
338
339 if (auto *ICmp = dyn_cast<ICmpInst>(Val: Cmp))
340 if (processICmp(Cmp: ICmp, LVI))
341 return true;
342
343 return false;
344}
345
346/// Simplify a switch instruction by removing cases which can never fire. If the
347/// uselessness of a case could be determined locally then constant propagation
348/// would already have figured it out. Instead, walk the predecessors and
349/// statically evaluate cases based on information available on that edge. Cases
350/// that cannot fire no matter what the incoming edge can safely be removed. If
351/// a case fires on every incoming edge then the entire switch can be removed
352/// and replaced with a branch to the case destination.
353static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
354 DominatorTree *DT) {
355 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
356 Value *Cond = I->getCondition();
357 BasicBlock *BB = I->getParent();
358
359 // Analyse each switch case in turn.
360 bool Changed = false;
361 DenseMap<BasicBlock*, int> SuccessorsCount;
362 for (auto *Succ : successors(BB))
363 SuccessorsCount[Succ]++;
364
365 { // Scope for SwitchInstProfUpdateWrapper. It must not live during
366 // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
367 SwitchInstProfUpdateWrapper SI(*I);
368
369 for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
370 ConstantInt *Case = CI->getCaseValue();
371 LazyValueInfo::Tristate State =
372 LVI->getPredicateAt(Pred: CmpInst::ICMP_EQ, V: Cond, C: Case, CxtI: I,
373 /* UseBlockValue */ true);
374
375 if (State == LazyValueInfo::False) {
376 // This case never fires - remove it.
377 BasicBlock *Succ = CI->getCaseSuccessor();
378 Succ->removePredecessor(Pred: BB);
379 CI = SI.removeCase(I: CI);
380 CE = SI->case_end();
381
382 // The condition can be modified by removePredecessor's PHI simplification
383 // logic.
384 Cond = SI->getCondition();
385
386 ++NumDeadCases;
387 Changed = true;
388 if (--SuccessorsCount[Succ] == 0)
389 DTU.applyUpdatesPermissive(Updates: {{DominatorTree::Delete, BB, Succ}});
390 continue;
391 }
392 if (State == LazyValueInfo::True) {
393 // This case always fires. Arrange for the switch to be turned into an
394 // unconditional branch by replacing the switch condition with the case
395 // value.
396 SI->setCondition(Case);
397 NumDeadCases += SI->getNumCases();
398 Changed = true;
399 break;
400 }
401
402 // Increment the case iterator since we didn't delete it.
403 ++CI;
404 }
405 }
406
407 if (Changed)
408 // If the switch has been simplified to the point where it can be replaced
409 // by a branch then do so now.
410 ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false,
411 /*TLI = */ nullptr, DTU: &DTU);
412 return Changed;
413}
414
415// See if we can prove that the given binary op intrinsic will not overflow.
416static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
417 ConstantRange LRange =
418 LVI->getConstantRangeAtUse(U: BO->getOperandUse(i: 0), /*UndefAllowed*/ false);
419 ConstantRange RRange =
420 LVI->getConstantRangeAtUse(U: BO->getOperandUse(i: 1), /*UndefAllowed*/ false);
421 ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
422 BinOp: BO->getBinaryOp(), Other: RRange, NoWrapKind: BO->getNoWrapKind());
423 return NWRegion.contains(CR: LRange);
424}
425
426static void setDeducedOverflowingFlags(Value *V, Instruction::BinaryOps Opcode,
427 bool NewNSW, bool NewNUW) {
428 Statistic *OpcNW, *OpcNSW, *OpcNUW;
429 switch (Opcode) {
430 case Instruction::Add:
431 OpcNW = &NumAddNW;
432 OpcNSW = &NumAddNSW;
433 OpcNUW = &NumAddNUW;
434 break;
435 case Instruction::Sub:
436 OpcNW = &NumSubNW;
437 OpcNSW = &NumSubNSW;
438 OpcNUW = &NumSubNUW;
439 break;
440 case Instruction::Mul:
441 OpcNW = &NumMulNW;
442 OpcNSW = &NumMulNSW;
443 OpcNUW = &NumMulNUW;
444 break;
445 case Instruction::Shl:
446 OpcNW = &NumShlNW;
447 OpcNSW = &NumShlNSW;
448 OpcNUW = &NumShlNUW;
449 break;
450 default:
451 llvm_unreachable("Will not be called with other binops");
452 }
453
454 auto *Inst = dyn_cast<Instruction>(Val: V);
455 if (NewNSW) {
456 ++NumNW;
457 ++*OpcNW;
458 ++NumNSW;
459 ++*OpcNSW;
460 if (Inst)
461 Inst->setHasNoSignedWrap();
462 }
463 if (NewNUW) {
464 ++NumNW;
465 ++*OpcNW;
466 ++NumNUW;
467 ++*OpcNUW;
468 if (Inst)
469 Inst->setHasNoUnsignedWrap();
470 }
471}
472
473static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI);
474
475// See if @llvm.abs argument is alays positive/negative, and simplify.
476// Notably, INT_MIN can belong to either range, regardless of the NSW,
477// because it is negation-invariant.
478static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
479 Value *X = II->getArgOperand(i: 0);
480 Type *Ty = X->getType();
481 if (!Ty->isIntegerTy())
482 return false;
483
484 bool IsIntMinPoison = cast<ConstantInt>(Val: II->getArgOperand(i: 1))->isOne();
485 APInt IntMin = APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits());
486 ConstantRange Range = LVI->getConstantRangeAtUse(
487 U: II->getOperandUse(i: 0), /*UndefAllowed*/ IsIntMinPoison);
488
489 // Is X in [0, IntMin]? NOTE: INT_MIN is fine!
490 if (Range.icmp(Pred: CmpInst::ICMP_ULE, Other: IntMin)) {
491 ++NumAbs;
492 II->replaceAllUsesWith(V: X);
493 II->eraseFromParent();
494 return true;
495 }
496
497 // Is X in [IntMin, 0]? NOTE: INT_MIN is fine!
498 if (Range.getSignedMax().isNonPositive()) {
499 IRBuilder<> B(II);
500 Value *NegX = B.CreateNeg(V: X, Name: II->getName(),
501 /*HasNSW=*/IsIntMinPoison);
502 ++NumAbs;
503 II->replaceAllUsesWith(V: NegX);
504 II->eraseFromParent();
505
506 // See if we can infer some no-wrap flags.
507 if (auto *BO = dyn_cast<BinaryOperator>(Val: NegX))
508 processBinOp(BinOp: BO, LVI);
509
510 return true;
511 }
512
513 // Argument's range crosses zero.
514 // Can we at least tell that the argument is never INT_MIN?
515 if (!IsIntMinPoison && !Range.contains(Val: IntMin)) {
516 ++NumNSW;
517 ++NumSubNSW;
518 II->setArgOperand(i: 1, v: ConstantInt::getTrue(Context&: II->getContext()));
519 return true;
520 }
521 return false;
522}
523
524// See if this min/max intrinsic always picks it's one specific operand.
525// If not, check whether we can canonicalize signed minmax into unsigned version
526static bool processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) {
527 CmpInst::Predicate Pred = CmpInst::getNonStrictPredicate(pred: MM->getPredicate());
528 ConstantRange LHS_CR = LVI->getConstantRangeAtUse(U: MM->getOperandUse(i: 0),
529 /*UndefAllowed*/ false);
530 ConstantRange RHS_CR = LVI->getConstantRangeAtUse(U: MM->getOperandUse(i: 1),
531 /*UndefAllowed*/ false);
532 if (LHS_CR.icmp(Pred, Other: RHS_CR)) {
533 ++NumMinMax;
534 MM->replaceAllUsesWith(V: MM->getLHS());
535 MM->eraseFromParent();
536 return true;
537 }
538 if (RHS_CR.icmp(Pred, Other: LHS_CR)) {
539 ++NumMinMax;
540 MM->replaceAllUsesWith(V: MM->getRHS());
541 MM->eraseFromParent();
542 return true;
543 }
544
545 if (MM->isSigned() &&
546 ConstantRange::areInsensitiveToSignednessOfICmpPredicate(CR1: LHS_CR,
547 CR2: RHS_CR)) {
548 ++NumSMinMax;
549 IRBuilder<> B(MM);
550 MM->replaceAllUsesWith(V: B.CreateBinaryIntrinsic(
551 ID: MM->getIntrinsicID() == Intrinsic::smin ? Intrinsic::umin
552 : Intrinsic::umax,
553 LHS: MM->getLHS(), RHS: MM->getRHS()));
554 MM->eraseFromParent();
555 return true;
556 }
557
558 return false;
559}
560
561// Rewrite this with.overflow intrinsic as non-overflowing.
562static bool processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) {
563 IRBuilder<> B(WO);
564 Instruction::BinaryOps Opcode = WO->getBinaryOp();
565 bool NSW = WO->isSigned();
566 bool NUW = !WO->isSigned();
567
568 Value *NewOp =
569 B.CreateBinOp(Opc: Opcode, LHS: WO->getLHS(), RHS: WO->getRHS(), Name: WO->getName());
570 setDeducedOverflowingFlags(V: NewOp, Opcode, NewNSW: NSW, NewNUW: NUW);
571
572 StructType *ST = cast<StructType>(Val: WO->getType());
573 Constant *Struct = ConstantStruct::get(T: ST,
574 V: { PoisonValue::get(T: ST->getElementType(N: 0)),
575 ConstantInt::getFalse(Ty: ST->getElementType(N: 1)) });
576 Value *NewI = B.CreateInsertValue(Agg: Struct, Val: NewOp, Idxs: 0);
577 WO->replaceAllUsesWith(V: NewI);
578 WO->eraseFromParent();
579 ++NumOverflows;
580
581 // See if we can infer the other no-wrap too.
582 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewOp))
583 processBinOp(BinOp: BO, LVI);
584
585 return true;
586}
587
588static bool processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) {
589 Instruction::BinaryOps Opcode = SI->getBinaryOp();
590 bool NSW = SI->isSigned();
591 bool NUW = !SI->isSigned();
592 BinaryOperator *BinOp = BinaryOperator::Create(
593 Op: Opcode, S1: SI->getLHS(), S2: SI->getRHS(), Name: SI->getName(), InsertBefore: SI->getIterator());
594 BinOp->setDebugLoc(SI->getDebugLoc());
595 setDeducedOverflowingFlags(V: BinOp, Opcode, NewNSW: NSW, NewNUW: NUW);
596
597 SI->replaceAllUsesWith(V: BinOp);
598 SI->eraseFromParent();
599 ++NumSaturating;
600
601 // See if we can infer the other no-wrap too.
602 if (auto *BO = dyn_cast<BinaryOperator>(Val: BinOp))
603 processBinOp(BinOp: BO, LVI);
604
605 return true;
606}
607
608/// Infer nonnull attributes for the arguments at the specified callsite.
609static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
610
611 if (CB.getIntrinsicID() == Intrinsic::abs) {
612 return processAbsIntrinsic(II: &cast<IntrinsicInst>(Val&: CB), LVI);
613 }
614
615 if (auto *MM = dyn_cast<MinMaxIntrinsic>(Val: &CB)) {
616 return processMinMaxIntrinsic(MM, LVI);
617 }
618
619 if (auto *WO = dyn_cast<WithOverflowInst>(Val: &CB)) {
620 if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(BO: WO, LVI)) {
621 return processOverflowIntrinsic(WO, LVI);
622 }
623 }
624
625 if (auto *SI = dyn_cast<SaturatingInst>(Val: &CB)) {
626 if (SI->getType()->isIntegerTy() && willNotOverflow(BO: SI, LVI)) {
627 return processSaturatingInst(SI, LVI);
628 }
629 }
630
631 bool Changed = false;
632
633 // Deopt bundle operands are intended to capture state with minimal
634 // perturbance of the code otherwise. If we can find a constant value for
635 // any such operand and remove a use of the original value, that's
636 // desireable since it may allow further optimization of that value (e.g. via
637 // single use rules in instcombine). Since deopt uses tend to,
638 // idiomatically, appear along rare conditional paths, it's reasonable likely
639 // we may have a conditional fact with which LVI can fold.
640 if (auto DeoptBundle = CB.getOperandBundle(ID: LLVMContext::OB_deopt)) {
641 for (const Use &ConstU : DeoptBundle->Inputs) {
642 Use &U = const_cast<Use&>(ConstU);
643 Value *V = U.get();
644 if (V->getType()->isVectorTy()) continue;
645 if (isa<Constant>(Val: V)) continue;
646
647 Constant *C = LVI->getConstant(V, CxtI: &CB);
648 if (!C) continue;
649 U.set(C);
650 Changed = true;
651 }
652 }
653
654 SmallVector<unsigned, 4> ArgNos;
655 unsigned ArgNo = 0;
656
657 for (Value *V : CB.args()) {
658 PointerType *Type = dyn_cast<PointerType>(Val: V->getType());
659 // Try to mark pointer typed parameters as non-null. We skip the
660 // relatively expensive analysis for constants which are obviously either
661 // null or non-null to start with.
662 if (Type && !CB.paramHasAttr(ArgNo, Attribute::Kind: NonNull) &&
663 !isa<Constant>(Val: V) &&
664 LVI->getPredicateAt(Pred: ICmpInst::ICMP_EQ, V,
665 C: ConstantPointerNull::get(T: Type), CxtI: &CB,
666 /*UseBlockValue=*/false) == LazyValueInfo::False)
667 ArgNos.push_back(Elt: ArgNo);
668 ArgNo++;
669 }
670
671 assert(ArgNo == CB.arg_size() && "Call arguments not processed correctly.");
672
673 if (ArgNos.empty())
674 return Changed;
675
676 NumNonNull += ArgNos.size();
677 AttributeList AS = CB.getAttributes();
678 LLVMContext &Ctx = CB.getContext();
679 AS = AS.addParamAttribute(Ctx, ArgNos,
680 Attribute::get(Ctx, Attribute::NonNull));
681 CB.setAttributes(AS);
682
683 return true;
684}
685
686enum class Domain { NonNegative, NonPositive, Unknown };
687
688static Domain getDomain(const ConstantRange &CR) {
689 if (CR.isAllNonNegative())
690 return Domain::NonNegative;
691 if (CR.icmp(Pred: ICmpInst::ICMP_SLE, Other: APInt::getZero(numBits: CR.getBitWidth())))
692 return Domain::NonPositive;
693 return Domain::Unknown;
694}
695
696/// Try to shrink a sdiv/srem's width down to the smallest power of two that's
697/// sufficient to contain its operands.
698static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR,
699 const ConstantRange &RCR) {
700 assert(Instr->getOpcode() == Instruction::SDiv ||
701 Instr->getOpcode() == Instruction::SRem);
702 assert(!Instr->getType()->isVectorTy());
703
704 // Find the smallest power of two bitwidth that's sufficient to hold Instr's
705 // operands.
706 unsigned OrigWidth = Instr->getType()->getIntegerBitWidth();
707
708 // What is the smallest bit width that can accommodate the entire value ranges
709 // of both of the operands?
710 unsigned MinSignedBits =
711 std::max(a: LCR.getMinSignedBits(), b: RCR.getMinSignedBits());
712
713 // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
714 // prove that such a combination is impossible, we need to bump the bitwidth.
715 if (RCR.contains(Val: APInt::getAllOnes(numBits: OrigWidth)) &&
716 LCR.contains(Val: APInt::getSignedMinValue(numBits: MinSignedBits).sext(width: OrigWidth)))
717 ++MinSignedBits;
718
719 // Don't shrink below 8 bits wide.
720 unsigned NewWidth = std::max<unsigned>(a: PowerOf2Ceil(A: MinSignedBits), b: 8);
721
722 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
723 // two.
724 if (NewWidth >= OrigWidth)
725 return false;
726
727 ++NumSDivSRemsNarrowed;
728 IRBuilder<> B{Instr};
729 auto *TruncTy = Type::getIntNTy(C&: Instr->getContext(), N: NewWidth);
730 auto *LHS = B.CreateTruncOrBitCast(V: Instr->getOperand(i_nocapture: 0), DestTy: TruncTy,
731 Name: Instr->getName() + ".lhs.trunc");
732 auto *RHS = B.CreateTruncOrBitCast(V: Instr->getOperand(i_nocapture: 1), DestTy: TruncTy,
733 Name: Instr->getName() + ".rhs.trunc");
734 auto *BO = B.CreateBinOp(Opc: Instr->getOpcode(), LHS, RHS, Name: Instr->getName());
735 auto *Sext = B.CreateSExt(V: BO, DestTy: Instr->getType(), Name: Instr->getName() + ".sext");
736 if (auto *BinOp = dyn_cast<BinaryOperator>(Val: BO))
737 if (BinOp->getOpcode() == Instruction::SDiv)
738 BinOp->setIsExact(Instr->isExact());
739
740 Instr->replaceAllUsesWith(V: Sext);
741 Instr->eraseFromParent();
742 return true;
743}
744
745static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
746 const ConstantRange &YCR) {
747 Type *Ty = Instr->getType();
748 assert(Instr->getOpcode() == Instruction::UDiv ||
749 Instr->getOpcode() == Instruction::URem);
750 assert(!Ty->isVectorTy());
751 bool IsRem = Instr->getOpcode() == Instruction::URem;
752
753 Value *X = Instr->getOperand(i_nocapture: 0);
754 Value *Y = Instr->getOperand(i_nocapture: 1);
755
756 // X u/ Y -> 0 iff X u< Y
757 // X u% Y -> X iff X u< Y
758 if (XCR.icmp(Pred: ICmpInst::ICMP_ULT, Other: YCR)) {
759 Instr->replaceAllUsesWith(V: IsRem ? X : Constant::getNullValue(Ty));
760 Instr->eraseFromParent();
761 ++NumUDivURemsNarrowedExpanded;
762 return true;
763 }
764
765 // Given
766 // R = X u% Y
767 // We can represent the modulo operation as a loop/self-recursion:
768 // urem_rec(X, Y):
769 // Z = X - Y
770 // if X u< Y
771 // ret X
772 // else
773 // ret urem_rec(Z, Y)
774 // which isn't better, but if we only need a single iteration
775 // to compute the answer, this becomes quite good:
776 // R = X < Y ? X : X - Y iff X u< 2*Y (w/ unsigned saturation)
777 // Now, we do not care about all full multiples of Y in X, they do not change
778 // the answer, thus we could rewrite the expression as:
779 // X* = X - (Y * |_ X / Y _|)
780 // R = X* % Y
781 // so we don't need the *first* iteration to return, we just need to
782 // know *which* iteration will always return, so we could also rewrite it as:
783 // X* = X - (Y * |_ X / Y _|)
784 // R = X* % Y iff X* u< 2*Y (w/ unsigned saturation)
785 // but that does not seem profitable here.
786
787 // Even if we don't know X's range, the divisor may be so large, X can't ever
788 // be 2x larger than that. I.e. if divisor is always negative.
789 if (!XCR.icmp(Pred: ICmpInst::ICMP_ULT,
790 Other: YCR.umul_sat(Other: APInt(YCR.getBitWidth(), 2))) &&
791 !YCR.isAllNegative())
792 return false;
793
794 IRBuilder<> B(Instr);
795 Value *ExpandedOp;
796 if (XCR.icmp(Pred: ICmpInst::ICMP_UGE, Other: YCR)) {
797 // If X is between Y and 2*Y the result is known.
798 if (IsRem)
799 ExpandedOp = B.CreateNUWSub(LHS: X, RHS: Y);
800 else
801 ExpandedOp = ConstantInt::get(Ty: Instr->getType(), V: 1);
802 } else if (IsRem) {
803 // NOTE: this transformation introduces two uses of X,
804 // but it may be undef so we must freeze it first.
805 Value *FrozenX = X;
806 if (!isGuaranteedNotToBeUndef(V: X))
807 FrozenX = B.CreateFreeze(V: X, Name: X->getName() + ".frozen");
808 Value *FrozenY = Y;
809 if (!isGuaranteedNotToBeUndef(V: Y))
810 FrozenY = B.CreateFreeze(V: Y, Name: Y->getName() + ".frozen");
811 auto *AdjX = B.CreateNUWSub(LHS: FrozenX, RHS: FrozenY, Name: Instr->getName() + ".urem");
812 auto *Cmp = B.CreateICmp(P: ICmpInst::ICMP_ULT, LHS: FrozenX, RHS: FrozenY,
813 Name: Instr->getName() + ".cmp");
814 ExpandedOp = B.CreateSelect(C: Cmp, True: FrozenX, False: AdjX);
815 } else {
816 auto *Cmp =
817 B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: X, RHS: Y, Name: Instr->getName() + ".cmp");
818 ExpandedOp = B.CreateZExt(V: Cmp, DestTy: Ty, Name: Instr->getName() + ".udiv");
819 }
820 ExpandedOp->takeName(V: Instr);
821 Instr->replaceAllUsesWith(V: ExpandedOp);
822 Instr->eraseFromParent();
823 ++NumUDivURemsNarrowedExpanded;
824 return true;
825}
826
827/// Try to shrink a udiv/urem's width down to the smallest power of two that's
828/// sufficient to contain its operands.
829static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
830 const ConstantRange &YCR) {
831 assert(Instr->getOpcode() == Instruction::UDiv ||
832 Instr->getOpcode() == Instruction::URem);
833 assert(!Instr->getType()->isVectorTy());
834
835 // Find the smallest power of two bitwidth that's sufficient to hold Instr's
836 // operands.
837
838 // What is the smallest bit width that can accommodate the entire value ranges
839 // of both of the operands?
840 unsigned MaxActiveBits = std::max(a: XCR.getActiveBits(), b: YCR.getActiveBits());
841 // Don't shrink below 8 bits wide.
842 unsigned NewWidth = std::max<unsigned>(a: PowerOf2Ceil(A: MaxActiveBits), b: 8);
843
844 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
845 // two.
846 if (NewWidth >= Instr->getType()->getIntegerBitWidth())
847 return false;
848
849 ++NumUDivURemsNarrowed;
850 IRBuilder<> B{Instr};
851 auto *TruncTy = Type::getIntNTy(C&: Instr->getContext(), N: NewWidth);
852 auto *LHS = B.CreateTruncOrBitCast(V: Instr->getOperand(i_nocapture: 0), DestTy: TruncTy,
853 Name: Instr->getName() + ".lhs.trunc");
854 auto *RHS = B.CreateTruncOrBitCast(V: Instr->getOperand(i_nocapture: 1), DestTy: TruncTy,
855 Name: Instr->getName() + ".rhs.trunc");
856 auto *BO = B.CreateBinOp(Opc: Instr->getOpcode(), LHS, RHS, Name: Instr->getName());
857 auto *Zext = B.CreateZExt(V: BO, DestTy: Instr->getType(), Name: Instr->getName() + ".zext");
858 if (auto *BinOp = dyn_cast<BinaryOperator>(Val: BO))
859 if (BinOp->getOpcode() == Instruction::UDiv)
860 BinOp->setIsExact(Instr->isExact());
861
862 Instr->replaceAllUsesWith(V: Zext);
863 Instr->eraseFromParent();
864 return true;
865}
866
867static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
868 assert(Instr->getOpcode() == Instruction::UDiv ||
869 Instr->getOpcode() == Instruction::URem);
870 if (Instr->getType()->isVectorTy())
871 return false;
872
873 ConstantRange XCR = LVI->getConstantRangeAtUse(U: Instr->getOperandUse(i: 0),
874 /*UndefAllowed*/ false);
875 // Allow undef for RHS, as we can assume it is division by zero UB.
876 ConstantRange YCR = LVI->getConstantRangeAtUse(U: Instr->getOperandUse(i: 1),
877 /*UndefAllowed*/ true);
878 if (expandUDivOrURem(Instr, XCR, YCR))
879 return true;
880
881 return narrowUDivOrURem(Instr, XCR, YCR);
882}
883
884static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR,
885 const ConstantRange &RCR, LazyValueInfo *LVI) {
886 assert(SDI->getOpcode() == Instruction::SRem);
887 assert(!SDI->getType()->isVectorTy());
888
889 if (LCR.abs().icmp(Pred: CmpInst::ICMP_ULT, Other: RCR.abs())) {
890 SDI->replaceAllUsesWith(V: SDI->getOperand(i_nocapture: 0));
891 SDI->eraseFromParent();
892 return true;
893 }
894
895 struct Operand {
896 Value *V;
897 Domain D;
898 };
899 std::array<Operand, 2> Ops = {._M_elems: {{.V: SDI->getOperand(i_nocapture: 0), .D: getDomain(CR: LCR)},
900 {.V: SDI->getOperand(i_nocapture: 1), .D: getDomain(CR: RCR)}}};
901 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown)
902 return false;
903
904 // We know domains of both of the operands!
905 ++NumSRems;
906
907 // We need operands to be non-negative, so negate each one that isn't.
908 for (Operand &Op : Ops) {
909 if (Op.D == Domain::NonNegative)
910 continue;
911 auto *BO = BinaryOperator::CreateNeg(Op: Op.V, Name: Op.V->getName() + ".nonneg",
912 InsertBefore: SDI->getIterator());
913 BO->setDebugLoc(SDI->getDebugLoc());
914 Op.V = BO;
915 }
916
917 auto *URem = BinaryOperator::CreateURem(V1: Ops[0].V, V2: Ops[1].V, Name: SDI->getName(),
918 It: SDI->getIterator());
919 URem->setDebugLoc(SDI->getDebugLoc());
920
921 auto *Res = URem;
922
923 // If the divident was non-positive, we need to negate the result.
924 if (Ops[0].D == Domain::NonPositive) {
925 Res = BinaryOperator::CreateNeg(Op: Res, Name: Res->getName() + ".neg",
926 InsertBefore: SDI->getIterator());
927 Res->setDebugLoc(SDI->getDebugLoc());
928 }
929
930 SDI->replaceAllUsesWith(V: Res);
931 SDI->eraseFromParent();
932
933 // Try to simplify our new urem.
934 processUDivOrURem(Instr: URem, LVI);
935
936 return true;
937}
938
939/// See if LazyValueInfo's ability to exploit edge conditions or range
940/// information is sufficient to prove the signs of both operands of this SDiv.
941/// If this is the case, replace the SDiv with a UDiv. Even for local
942/// conditions, this can sometimes prove conditions instcombine can't by
943/// exploiting range information.
944static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
945 const ConstantRange &RCR, LazyValueInfo *LVI) {
946 assert(SDI->getOpcode() == Instruction::SDiv);
947 assert(!SDI->getType()->isVectorTy());
948
949 // Check whether the division folds to a constant.
950 ConstantRange DivCR = LCR.sdiv(Other: RCR);
951 if (const APInt *Elem = DivCR.getSingleElement()) {
952 SDI->replaceAllUsesWith(V: ConstantInt::get(Ty: SDI->getType(), V: *Elem));
953 SDI->eraseFromParent();
954 return true;
955 }
956
957 struct Operand {
958 Value *V;
959 Domain D;
960 };
961 std::array<Operand, 2> Ops = {._M_elems: {{.V: SDI->getOperand(i_nocapture: 0), .D: getDomain(CR: LCR)},
962 {.V: SDI->getOperand(i_nocapture: 1), .D: getDomain(CR: RCR)}}};
963 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown)
964 return false;
965
966 // We know domains of both of the operands!
967 ++NumSDivs;
968
969 // We need operands to be non-negative, so negate each one that isn't.
970 for (Operand &Op : Ops) {
971 if (Op.D == Domain::NonNegative)
972 continue;
973 auto *BO = BinaryOperator::CreateNeg(Op: Op.V, Name: Op.V->getName() + ".nonneg",
974 InsertBefore: SDI->getIterator());
975 BO->setDebugLoc(SDI->getDebugLoc());
976 Op.V = BO;
977 }
978
979 auto *UDiv = BinaryOperator::CreateUDiv(V1: Ops[0].V, V2: Ops[1].V, Name: SDI->getName(),
980 It: SDI->getIterator());
981 UDiv->setDebugLoc(SDI->getDebugLoc());
982 UDiv->setIsExact(SDI->isExact());
983
984 auto *Res = UDiv;
985
986 // If the operands had two different domains, we need to negate the result.
987 if (Ops[0].D != Ops[1].D) {
988 Res = BinaryOperator::CreateNeg(Op: Res, Name: Res->getName() + ".neg",
989 InsertBefore: SDI->getIterator());
990 Res->setDebugLoc(SDI->getDebugLoc());
991 }
992
993 SDI->replaceAllUsesWith(V: Res);
994 SDI->eraseFromParent();
995
996 // Try to simplify our new udiv.
997 processUDivOrURem(Instr: UDiv, LVI);
998
999 return true;
1000}
1001
1002static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
1003 assert(Instr->getOpcode() == Instruction::SDiv ||
1004 Instr->getOpcode() == Instruction::SRem);
1005 if (Instr->getType()->isVectorTy())
1006 return false;
1007
1008 ConstantRange LCR =
1009 LVI->getConstantRangeAtUse(U: Instr->getOperandUse(i: 0), /*AllowUndef*/ UndefAllowed: false);
1010 // Allow undef for RHS, as we can assume it is division by zero UB.
1011 ConstantRange RCR =
1012 LVI->getConstantRangeAtUse(U: Instr->getOperandUse(i: 1), /*AlloweUndef*/ UndefAllowed: true);
1013 if (Instr->getOpcode() == Instruction::SDiv)
1014 if (processSDiv(SDI: Instr, LCR, RCR, LVI))
1015 return true;
1016
1017 if (Instr->getOpcode() == Instruction::SRem) {
1018 if (processSRem(SDI: Instr, LCR, RCR, LVI))
1019 return true;
1020 }
1021
1022 return narrowSDivOrSRem(Instr, LCR, RCR);
1023}
1024
1025static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
1026 if (SDI->getType()->isVectorTy())
1027 return false;
1028
1029 ConstantRange LRange =
1030 LVI->getConstantRangeAtUse(U: SDI->getOperandUse(i: 0), /*UndefAllowed*/ false);
1031 unsigned OrigWidth = SDI->getType()->getIntegerBitWidth();
1032 ConstantRange NegOneOrZero =
1033 ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1));
1034 if (NegOneOrZero.contains(CR: LRange)) {
1035 // ashr of -1 or 0 never changes the value, so drop the whole instruction
1036 ++NumAShrsRemoved;
1037 SDI->replaceAllUsesWith(V: SDI->getOperand(i_nocapture: 0));
1038 SDI->eraseFromParent();
1039 return true;
1040 }
1041
1042 if (!LRange.isAllNonNegative())
1043 return false;
1044
1045 ++NumAShrsConverted;
1046 auto *BO = BinaryOperator::CreateLShr(V1: SDI->getOperand(i_nocapture: 0), V2: SDI->getOperand(i_nocapture: 1),
1047 Name: "", It: SDI->getIterator());
1048 BO->takeName(V: SDI);
1049 BO->setDebugLoc(SDI->getDebugLoc());
1050 BO->setIsExact(SDI->isExact());
1051 SDI->replaceAllUsesWith(V: BO);
1052 SDI->eraseFromParent();
1053
1054 return true;
1055}
1056
1057static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
1058 if (SDI->getType()->isVectorTy())
1059 return false;
1060
1061 const Use &Base = SDI->getOperandUse(i: 0);
1062 if (!LVI->getConstantRangeAtUse(U: Base, /*UndefAllowed*/ false)
1063 .isAllNonNegative())
1064 return false;
1065
1066 ++NumSExt;
1067 auto *ZExt = CastInst::CreateZExtOrBitCast(S: Base, Ty: SDI->getType(), Name: "",
1068 InsertBefore: SDI->getIterator());
1069 ZExt->takeName(V: SDI);
1070 ZExt->setDebugLoc(SDI->getDebugLoc());
1071 ZExt->setNonNeg();
1072 SDI->replaceAllUsesWith(V: ZExt);
1073 SDI->eraseFromParent();
1074
1075 return true;
1076}
1077
1078static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) {
1079 if (ZExt->getType()->isVectorTy())
1080 return false;
1081
1082 if (ZExt->hasNonNeg())
1083 return false;
1084
1085 const Use &Base = ZExt->getOperandUse(i: 0);
1086 if (!LVI->getConstantRangeAtUse(U: Base, /*UndefAllowed*/ false)
1087 .isAllNonNegative())
1088 return false;
1089
1090 ++NumZExt;
1091 ZExt->setNonNeg();
1092
1093 return true;
1094}
1095
1096static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
1097 using OBO = OverflowingBinaryOperator;
1098
1099 if (BinOp->getType()->isVectorTy())
1100 return false;
1101
1102 bool NSW = BinOp->hasNoSignedWrap();
1103 bool NUW = BinOp->hasNoUnsignedWrap();
1104 if (NSW && NUW)
1105 return false;
1106
1107 Instruction::BinaryOps Opcode = BinOp->getOpcode();
1108 ConstantRange LRange = LVI->getConstantRangeAtUse(U: BinOp->getOperandUse(i: 0),
1109 /*UndefAllowed=*/false);
1110 ConstantRange RRange = LVI->getConstantRangeAtUse(U: BinOp->getOperandUse(i: 1),
1111 /*UndefAllowed=*/false);
1112
1113 bool Changed = false;
1114 bool NewNUW = false, NewNSW = false;
1115 if (!NUW) {
1116 ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
1117 BinOp: Opcode, Other: RRange, NoWrapKind: OBO::NoUnsignedWrap);
1118 NewNUW = NUWRange.contains(CR: LRange);
1119 Changed |= NewNUW;
1120 }
1121 if (!NSW) {
1122 ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
1123 BinOp: Opcode, Other: RRange, NoWrapKind: OBO::NoSignedWrap);
1124 NewNSW = NSWRange.contains(CR: LRange);
1125 Changed |= NewNSW;
1126 }
1127
1128 setDeducedOverflowingFlags(V: BinOp, Opcode, NewNSW, NewNUW);
1129
1130 return Changed;
1131}
1132
1133static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
1134 if (BinOp->getType()->isVectorTy())
1135 return false;
1136
1137 // Pattern match (and lhs, C) where C includes a superset of bits which might
1138 // be set in lhs. This is a common truncation idiom created by instcombine.
1139 const Use &LHS = BinOp->getOperandUse(i: 0);
1140 ConstantInt *RHS = dyn_cast<ConstantInt>(Val: BinOp->getOperand(i_nocapture: 1));
1141 if (!RHS || !RHS->getValue().isMask())
1142 return false;
1143
1144 // We can only replace the AND with LHS based on range info if the range does
1145 // not include undef.
1146 ConstantRange LRange =
1147 LVI->getConstantRangeAtUse(U: LHS, /*UndefAllowed=*/false);
1148 if (!LRange.getUnsignedMax().ule(RHS: RHS->getValue()))
1149 return false;
1150
1151 BinOp->replaceAllUsesWith(V: LHS);
1152 BinOp->eraseFromParent();
1153 NumAnd++;
1154 return true;
1155}
1156
1157static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
1158 const SimplifyQuery &SQ) {
1159 bool FnChanged = false;
1160 // Visiting in a pre-order depth-first traversal causes us to simplify early
1161 // blocks before querying later blocks (which require us to analyze early
1162 // blocks). Eagerly simplifying shallow blocks means there is strictly less
1163 // work to do for deep blocks. This also means we don't visit unreachable
1164 // blocks.
1165 for (BasicBlock *BB : depth_first(G: &F.getEntryBlock())) {
1166 bool BBChanged = false;
1167 for (Instruction &II : llvm::make_early_inc_range(Range&: *BB)) {
1168 switch (II.getOpcode()) {
1169 case Instruction::Select:
1170 BBChanged |= processSelect(S: cast<SelectInst>(Val: &II), LVI);
1171 break;
1172 case Instruction::PHI:
1173 BBChanged |= processPHI(P: cast<PHINode>(Val: &II), LVI, DT, SQ);
1174 break;
1175 case Instruction::ICmp:
1176 case Instruction::FCmp:
1177 BBChanged |= processCmp(Cmp: cast<CmpInst>(Val: &II), LVI);
1178 break;
1179 case Instruction::Call:
1180 case Instruction::Invoke:
1181 BBChanged |= processCallSite(CB&: cast<CallBase>(Val&: II), LVI);
1182 break;
1183 case Instruction::SRem:
1184 case Instruction::SDiv:
1185 BBChanged |= processSDivOrSRem(Instr: cast<BinaryOperator>(Val: &II), LVI);
1186 break;
1187 case Instruction::UDiv:
1188 case Instruction::URem:
1189 BBChanged |= processUDivOrURem(Instr: cast<BinaryOperator>(Val: &II), LVI);
1190 break;
1191 case Instruction::AShr:
1192 BBChanged |= processAShr(SDI: cast<BinaryOperator>(Val: &II), LVI);
1193 break;
1194 case Instruction::SExt:
1195 BBChanged |= processSExt(SDI: cast<SExtInst>(Val: &II), LVI);
1196 break;
1197 case Instruction::ZExt:
1198 BBChanged |= processZExt(ZExt: cast<ZExtInst>(Val: &II), LVI);
1199 break;
1200 case Instruction::Add:
1201 case Instruction::Sub:
1202 case Instruction::Mul:
1203 case Instruction::Shl:
1204 BBChanged |= processBinOp(BinOp: cast<BinaryOperator>(Val: &II), LVI);
1205 break;
1206 case Instruction::And:
1207 BBChanged |= processAnd(BinOp: cast<BinaryOperator>(Val: &II), LVI);
1208 break;
1209 }
1210 }
1211
1212 Instruction *Term = BB->getTerminator();
1213 switch (Term->getOpcode()) {
1214 case Instruction::Switch:
1215 BBChanged |= processSwitch(I: cast<SwitchInst>(Val: Term), LVI, DT);
1216 break;
1217 case Instruction::Ret: {
1218 auto *RI = cast<ReturnInst>(Val: Term);
1219 // Try to determine the return value if we can. This is mainly here to
1220 // simplify the writing of unit tests, but also helps to enable IPO by
1221 // constant folding the return values of callees.
1222 auto *RetVal = RI->getReturnValue();
1223 if (!RetVal) break; // handle "ret void"
1224 if (isa<Constant>(Val: RetVal)) break; // nothing to do
1225 if (auto *C = getConstantAt(V: RetVal, At: RI, LVI)) {
1226 ++NumReturns;
1227 RI->replaceUsesOfWith(From: RetVal, To: C);
1228 BBChanged = true;
1229 }
1230 }
1231 }
1232
1233 FnChanged |= BBChanged;
1234 }
1235
1236 return FnChanged;
1237}
1238
1239PreservedAnalyses
1240CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
1241 LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(IR&: F);
1242 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1243
1244 bool Changed = runImpl(F, LVI, DT, SQ: getBestSimplifyQuery(AM, F));
1245
1246 PreservedAnalyses PA;
1247 if (!Changed) {
1248 PA = PreservedAnalyses::all();
1249 } else {
1250 PA.preserve<DominatorTreeAnalysis>();
1251 PA.preserve<LazyValueAnalysis>();
1252 }
1253
1254 // Keeping LVI alive is expensive, both because it uses a lot of memory, and
1255 // because invalidating values in LVI is expensive. While CVP does preserve
1256 // LVI, we know that passes after JumpThreading+CVP will not need the result
1257 // of this analysis, so we forcefully discard it early.
1258 PA.abandon<LazyValueAnalysis>();
1259 return PA;
1260}
1261

source code of llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp