Reassociate.cpp source code [llvm/lib/Transforms/Scalar/Reassociate.cpp]

1	//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass reassociates commutative expressions in an order that is designed
10	// to promote better constant propagation, GCSE, LICM, PRE, etc.
11	//
12	// For example: 4 + (x + 5) -> x + (4 + 5)
13	//
14	// In the implementation of this algorithm, constants are assigned rank = 0,
15	// function arguments are rank = 1, and other values are assigned ranks
16	// corresponding to the reverse post order traversal of current function
17	// (starting at 2), which effectively gives values in deep loops higher rank
18	// than values not in loops.
19	//
20	//===----------------------------------------------------------------------===//
21
22	#include "llvm/Transforms/Scalar/Reassociate.h"
23	#include "llvm/ADT/APFloat.h"
24	#include "llvm/ADT/APInt.h"
25	#include "llvm/ADT/DenseMap.h"
26	#include "llvm/ADT/PostOrderIterator.h"
27	#include "llvm/ADT/SmallPtrSet.h"
28	#include "llvm/ADT/SmallSet.h"
29	#include "llvm/ADT/SmallVector.h"
30	#include "llvm/ADT/Statistic.h"
31	#include "llvm/Analysis/BasicAliasAnalysis.h"
32	#include "llvm/Analysis/ConstantFolding.h"
33	#include "llvm/Analysis/GlobalsModRef.h"
34	#include "llvm/Analysis/ValueTracking.h"
35	#include "llvm/IR/Argument.h"
36	#include "llvm/IR/BasicBlock.h"
37	#include "llvm/IR/CFG.h"
38	#include "llvm/IR/Constant.h"
39	#include "llvm/IR/Constants.h"
40	#include "llvm/IR/Function.h"
41	#include "llvm/IR/IRBuilder.h"
42	#include "llvm/IR/InstrTypes.h"
43	#include "llvm/IR/Instruction.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/Operator.h"
46	#include "llvm/IR/PassManager.h"
47	#include "llvm/IR/PatternMatch.h"
48	#include "llvm/IR/Type.h"
49	#include "llvm/IR/User.h"
50	#include "llvm/IR/Value.h"
51	#include "llvm/IR/ValueHandle.h"
52	#include "llvm/InitializePasses.h"
53	#include "llvm/Pass.h"
54	#include "llvm/Support/Casting.h"
55	#include "llvm/Support/CommandLine.h"
56	#include "llvm/Support/Debug.h"
57	#include "llvm/Support/raw_ostream.h"
58	#include "llvm/Transforms/Scalar.h"
59	#include "llvm/Transforms/Utils/Local.h"
60	#include <algorithm>
61	#include <cassert>
62	#include <utility>
63
64	using namespace llvm;
65	using namespace reassociate;
66	using namespace PatternMatch;
67
68	#define DEBUG_TYPE "reassociate"
69
70	STATISTIC(NumChanged, "Number of insts reassociated");
71	STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72	STATISTIC(NumFactor , "Number of multiplies factored");
73
74	static cl::opt<bool>
75	UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76	cl::desc ("Only reorder expressions within a basic block "
77	"when exposing CSE opportunities"),
78	cl::init(Val: true), cl::Hidden);
79
80	#ifndef NDEBUG
81	/// Print out the expression identified in the Ops list.
82	static void PrintOps(Instruction I, const* SmallVectorImpl<ValueEntry> &Ops) {
83	Module *M = I->getModule();
84	dbgs() << Instruction::getOpcodeName(Opcode: I->getOpcode()) << " "
85	<< *Ops [`0`].Op->getType() << `'\t'`;
86	for (unsigned i = `0`, e = Ops.size(); i != e; ++i) {
87	dbgs() << "[ ";
88	Ops [i].Op->printAsOperand(O&: dbgs(), PrintType: false, M);
89	dbgs() << ", #" << Ops [i].Rank << "] ";
90	}
91	}
92	#endif
93
94	/// Utility class representing a non-constant Xor-operand. We classify
95	/// non-constant Xor-Operands into two categories:
96	/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97	/// C2)
98	/// C2.1) The operand is in the form of "X \| C", where C is a non-zero
99	/// constant.
100	/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101	/// operand as "E \| 0"
102	class llvm::reassociate::XorOpnd {
103	public:
104	XorOpnd(Value *V);
105
106	bool isInvalid() const { return SymbolicPart == nullptr; }
107	bool isOrExpr() const { return isOr; }
108	Value getValue() const* { return OrigVal; }
109	Value getSymbolicPart() const* { return SymbolicPart; }
110	unsigned getSymbolicRank() const { return SymbolicRank; }
111	const APInt &getConstPart() const { return ConstPart; }
112
113	void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114	void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116	private:
117	Value *OrigVal;
118	Value *SymbolicPart;
119	APInt ConstPart;
120	unsigned SymbolicRank;
121	bool isOr;
122	};
123
124	XorOpnd::XorOpnd(Value *V) {
125	assert(!isa<ConstantInt>(V) && "No ConstantInt");
126	OrigVal = V;
127	Instruction *I = dyn_cast<Instruction>(Val: V);
128	SymbolicRank = `0`;
129
130	if (I && (I->getOpcode() == Instruction::Or \|\|
131	I->getOpcode() == Instruction::And)) {
132	Value *V0 = I->getOperand(i: `0`);
133	Value *V1 = I->getOperand(i: `1`);
134	const APInt *C;
135	if (match(V: V0, P: m_APInt(Res&: C)))
136	std::swap(a&: V0, b&: V1);
137
138	if (match(V: V1, P: m_APInt(Res&: C))) {
139	ConstPart = *C;
140	SymbolicPart = V0;
141	isOr = (I->getOpcode() == Instruction::Or);
142	return;
143	}
144	}
145
146	// view the operand as "V \| 0"
147	SymbolicPart = V;
148	ConstPart = APInt::getZero(numBits: V->getType()->getScalarSizeInBits());
149	isOr = true;
150	}
151
152	/// Return true if I is an instruction with the FastMathFlags that are needed
153	/// for general reassociation set. This is not the same as testing
154	/// Instruction::isAssociative() because it includes operations like fsub.
155	/// (This routine is only intended to be called for floating-point operations.)
156	static bool hasFPAssociativeFlags(Instruction *I) {
157	assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158	return I->hasAllowReassoc() && I->hasNoSignedZeros();
159	}
160
161	/// Return true if V is an instruction of the specified opcode and if it
162	/// only has one use.
163	static BinaryOperator isReassociableOp(Value V, unsigned Opcode) {
164	auto *BO = dyn_cast<BinaryOperator>(Val: V);
165	if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166	if (!isa<FPMathOperator>(Val: BO) \|\| hasFPAssociativeFlags(I: BO))
167	return BO;
168	return nullptr;
169	}
170
171	static BinaryOperator isReassociableOp(Value V, unsigned Opcode1,
172	unsigned Opcode2) {
173	auto *BO = dyn_cast<BinaryOperator>(Val: V);
174	if (BO && BO->hasOneUse() &&
175	(BO->getOpcode() == Opcode1 \|\| BO->getOpcode() == Opcode2))
176	if (!isa<FPMathOperator>(Val: BO) \|\| hasFPAssociativeFlags(I: BO))
177	return BO;
178	return nullptr;
179	}
180
181	void ReassociatePass::BuildRankMap(Function &F,
182	ReversePostOrderTraversal<Function*> &RPOT) {
183	unsigned Rank = `2`;
184
185	// Assign distinct ranks to function arguments.
186	for (auto &Arg : F.args()) {
187	ValueRankMap [&Arg] = ++Rank;
188	LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189	<< "\n");
190	}
191
192	// Traverse basic blocks in ReversePostOrder.
193	for (BasicBlock *BB : RPOT) {
194	unsigned BBRank = RankMap [BB] = ++Rank << `16`;
195
196	// Walk the basic block, adding precomputed ranks for any instructions that
197	// we cannot move. This ensures that the ranks for these instructions are
198	// all different in the block.
199	for (Instruction &I : *BB)
200	if (mayHaveNonDefUseDependency(I))
201	ValueRankMap [&I] = ++BBRank;
202	}
203	}
204
205	unsigned ReassociatePass::getRank(Value *V) {
206	Instruction *I = dyn_cast<Instruction>(Val: V);
207	if (!I) {
208	if (isa<Argument>(Val: V)) return ValueRankMap [V]; // Function argument.
209	return `0`; // Otherwise it's a global or constant, rank 0.
210	}
211
212	if (unsigned Rank = ValueRankMap [I])
213	return Rank; // Rank already known?
214
215	// If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216	// we can reassociate expressions for code motion! Since we do not recurse
217	// for PHI nodes, we cannot have infinite recursion here, because there
218	// cannot be loops in the value graph that do not go through PHI nodes.
219	unsigned Rank = `0`, MaxRank = RankMap [I->getParent()];
220	for (unsigned i = `0`, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221	Rank = std::max(a: Rank, b: getRank(V: I->getOperand(i)));
222
223	// If this is a 'not' or 'neg' instruction, do not count it for rank. This
224	// assures us that X and ~X will have the same rank.
225	if (!match(V: I, P: m_Not(V: m_Value())) && !match(V: I, P: m_Neg(V: m_Value())) &&
226	!match(V: I, P: m_FNeg(X: m_Value())))
227	++Rank;
228
229	LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230	<< "\n");
231
232	return ValueRankMap [I] = Rank;
233	}
234
235	// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236	void ReassociatePass::canonicalizeOperands(Instruction *I) {
237	assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238	assert(I->isCommutative() && "Expected commutative operator.");
239
240	Value *LHS = I->getOperand(i: `0`);
241	Value *RHS = I->getOperand(i: `1`);
242	if (LHS == RHS \|\| isa<Constant>(Val: RHS))
243	return;
244	if (isa<Constant>(Val: LHS) \|\| getRank(V: RHS) < getRank(V: LHS))
245	cast<BinaryOperator>(Val: I)->swapOperands();
246	}
247
248	static BinaryOperator CreateAdd(Value S1, Value S2, const* Twine &Name,
249	BasicBlock::iterator InsertBefore,
250	Value *FlagsOp) {
251	if (S1->getType()->isIntOrIntVectorTy())
252	return BinaryOperator::CreateAdd(V1: S1, V2: S2, Name, It: InsertBefore);
253	else {
254	BinaryOperator *Res =
255	BinaryOperator::CreateFAdd(V1: S1, V2: S2, Name, It: InsertBefore);
256	Res->setFastMathFlags(cast<FPMathOperator>(Val: FlagsOp)->getFastMathFlags());
257	return Res;
258	}
259	}
260
261	static BinaryOperator CreateMul(Value S1, Value S2, const* Twine &Name,
262	BasicBlock::iterator InsertBefore,
263	Value *FlagsOp) {
264	if (S1->getType()->isIntOrIntVectorTy())
265	return BinaryOperator::CreateMul(V1: S1, V2: S2, Name, It: InsertBefore);
266	else {
267	BinaryOperator *Res =
268	BinaryOperator::CreateFMul(V1: S1, V2: S2, Name, It: InsertBefore);
269	Res->setFastMathFlags(cast<FPMathOperator>(Val: FlagsOp)->getFastMathFlags());
270	return Res;
271	}
272	}
273
274	static Instruction CreateNeg(Value S1, const Twine &Name,
275	BasicBlock::iterator InsertBefore,
276	Value *FlagsOp) {
277	if (S1->getType()->isIntOrIntVectorTy())
278	return BinaryOperator::CreateNeg(Op: S1, Name, InsertBefore);
279
280	if (auto *FMFSource = dyn_cast<Instruction>(Val: FlagsOp))
281	return UnaryOperator::CreateFNegFMF(Op: S1, FMFSource, Name, InsertBefore);
282
283	return UnaryOperator::CreateFNeg(V: S1, Name, It: InsertBefore);
284	}
285
286	/// Replace 0-X with X-1.*
287	static BinaryOperator LowerNegateToMultiply(Instruction Neg) {
288	assert((isa<UnaryOperator>(Neg) \|\| isa<BinaryOperator>(Neg)) &&
289	"Expected a Negate!");
290	// FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
291	unsigned OpNo = isa<BinaryOperator>(Val: Neg) ? `1` : `0`;
292	Type *Ty = Neg->getType();
293	Constant *NegOne = Ty->isIntOrIntVectorTy() ?
294	ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, V: -`1.0`);
295
296	BinaryOperator *Res =
297	CreateMul(S1: Neg->getOperand(i: OpNo), S2: NegOne, Name: "", InsertBefore: Neg->getIterator(), FlagsOp: Neg);
298	Neg->setOperand(i: OpNo, Val: Constant::getNullValue(Ty)); // Drop use of op.
299	Res->takeName(V: Neg);
300	Neg->replaceAllUsesWith(V: Res);
301	Res->setDebugLoc(Neg->getDebugLoc());
302	return Res;
303	}
304
305	/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
306	/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
307	/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
308	/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
309	/// even x in Bitwidth-bit arithmetic.
310	static unsigned CarmichaelShift(unsigned Bitwidth) {
311	if (Bitwidth < `3`)
312	return Bitwidth - `1`;
313	return Bitwidth - `2`;
314	}
315
316	/// Add the extra weight 'RHS' to the existing weight 'LHS',
317	/// reducing the combined weight using any special properties of the operation.
318	/// The existing weight LHS represents the computation X op X op ... op X where
319	/// X occurs LHS times. The combined weight represents X op X op ... op X with
320	/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
321	/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
322	/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
323	static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
324	// If we were working with infinite precision arithmetic then the combined
325	// weight would be LHS + RHS. But we are using finite precision arithmetic,
326	// and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
327	// for nilpotent operations and addition, but not for idempotent operations
328	// and multiplication), so it is important to correctly reduce the combined
329	// weight back into range if wrapping would be wrong.
330
331	// If RHS is zero then the weight didn't change.
332	if (RHS.isMinValue())
333	return;
334	// If LHS is zero then the combined weight is RHS.
335	if (LHS.isMinValue()) {
336	LHS = RHS;
337	return;
338	}
339	// From this point on we know that neither LHS nor RHS is zero.
340
341	if (Instruction::isIdempotent(Opcode)) {
342	// Idempotent means X op X === X, so any non-zero weight is equivalent to a
343	// weight of 1. Keeping weights at zero or one also means that wrapping is
344	// not a problem.
345	assert(LHS == `1` && RHS == `1` && "Weights not reduced!");
346	return; // Return a weight of 1.
347	}
348	if (Instruction::isNilpotent(Opcode)) {
349	// Nilpotent means X op X === 0, so reduce weights modulo 2.
350	assert(LHS == `1` && RHS == `1` && "Weights not reduced!");
351	LHS = `0`; // 1 + 1 === 0 modulo 2.
352	return;
353	}
354	if (Opcode == Instruction::Add \|\| Opcode == Instruction::FAdd) {
355	// TODO: Reduce the weight by exploiting nsw/nuw?
356	LHS += RHS;
357	return;
358	}
359
360	assert((Opcode == Instruction::Mul \|\| Opcode == Instruction::FMul) &&
361	"Unknown associative operation!");
362	unsigned Bitwidth = LHS.getBitWidth();
363	// If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
364	// can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
365	// bit number x, since either x is odd in which case x^CM = 1, or x is even in
366	// which case both x^W and x^(W - CM) are zero. By subtracting off multiples
367	// of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
368	// which by a happy accident means that they can always be represented using
369	// Bitwidth bits.
370	// TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
371	// the Carmichael number).
372	if (Bitwidth > `3`) {
373	/// CM - The value of Carmichael's lambda function.
374	APInt CM = APInt::getOneBitSet(numBits: Bitwidth, BitNo: CarmichaelShift(Bitwidth));
375	// Any weight W >= Threshold can be replaced with W - CM.
376	APInt Threshold = CM + Bitwidth;
377	assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
378	// For Bitwidth 4 or more the following sum does not overflow.
379	LHS += RHS;
380	while (LHS.uge(RHS: Threshold))
381	LHS -= CM;
382	} else {
383	// To avoid problems with overflow do everything the same as above but using
384	// a larger type.
385	unsigned CM = `1U` << CarmichaelShift(Bitwidth);
386	unsigned Threshold = CM + Bitwidth;
387	assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
388	"Weights not reduced!");
389	unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
390	while (Total >= Threshold)
391	Total -= CM;
392	LHS = Total;
393	}
394	}
395
396	using RepeatedValue = std::pair<Value*, APInt>;
397
398	/// Given an associative binary expression, return the leaf
399	/// nodes in Ops along with their weights (how many times the leaf occurs). The
400	/// original expression is the same as
401	/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
402	/// op
403	/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
404	/// op
405	/// ...
406	/// op
407	/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
408	///
409	/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
410	///
411	/// This routine may modify the function, in which case it returns 'true'. The
412	/// changes it makes may well be destructive, changing the value computed by 'I'
413	/// to something completely different. Thus if the routine returns 'true' then
414	/// you MUST either replace I with a new expression computed from the Ops array,
415	/// or use RewriteExprTree to put the values back in.
416	///
417	/// A leaf node is either not a binary operation of the same kind as the root
418	/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
419	/// opcode), or is the same kind of binary operator but has a use which either
420	/// does not belong to the expression, or does belong to the expression but is
421	/// a leaf node. Every leaf node has at least one use that is a non-leaf node
422	/// of the expression, while for non-leaf nodes (except for the root 'I') every
423	/// use is a non-leaf node of the expression.
424	///
425	/// For example:
426	/// expression graph node names
427	///
428	/// + \| I
429	/// / \ \|
430	/// + + \| A, B
431	/// / \ / \ \|
432	/// + * \| C, D, E*
433	/// / \ / \ / \ \|
434	/// + \| F, G*
435	///
436	/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
437	/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
438	///
439	/// The expression is maximal: if some instruction is a binary operator of the
440	/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
441	/// then the instruction also belongs to the expression, is not a leaf node of
442	/// it, and its operands also belong to the expression (but may be leaf nodes).
443	///
444	/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
445	/// order to ensure that every non-root node in the expression has exactly one
446	/// use by a non-leaf node of the expression. This destruction means that the
447	/// caller MUST either replace 'I' with a new expression or use something like
448	/// RewriteExprTree to put the values back in if the routine indicates that it
449	/// made a change by returning 'true'.
450	///
451	/// In the above example either the right operand of A or the left operand of B
452	/// will be replaced by undef. If it is B's operand then this gives:
453	///
454	/// + \| I
455	/// / \ \|
456	/// + + \| A, B - operand of B replaced with undef
457	/// / \ \ \|
458	/// + * \| C, D, E*
459	/// / \ / \ / \ \|
460	/// + \| F, G*
461	///
462	/// Note that such undef operands can only be reached by passing through 'I'.
463	/// For example, if you visit operands recursively starting from a leaf node
464	/// then you will never see such an undef operand unless you get back to 'I',
465	/// which requires passing through a phi node.
466	///
467	/// Note that this routine may also mutate binary operators of the wrong type
468	/// that have all uses inside the expression (i.e. only used by non-leaf nodes
469	/// of the expression) if it can turn them into binary operators of the right
470	/// type and thus make the expression bigger.
471	static bool LinearizeExprTree(Instruction *I,
472	SmallVectorImpl<RepeatedValue> &Ops,
473	ReassociatePass::OrderedSet &ToRedo,
474	bool &HasNUW) {
475	assert((isa<UnaryOperator>(I) \|\| isa<BinaryOperator>(I)) &&
476	"Expected a UnaryOperator or BinaryOperator!");
477	LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << `'\n'`);
478	unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
479	unsigned Opcode = I->getOpcode();
480	assert(I->isAssociative() && I->isCommutative() &&
481	"Expected an associative and commutative operation!");
482
483	// Visit all operands of the expression, keeping track of their weight (the
484	// number of paths from the expression root to the operand, or if you like
485	// the number of times that operand occurs in the linearized expression).
486	// For example, if I = X + A, where X = A + B, then I, X and B have weight 1
487	// while A has weight two.
488
489	// Worklist of non-leaf nodes (their operands are in the expression too) along
490	// with their weights, representing a certain number of paths to the operator.
491	// If an operator occurs in the worklist multiple times then we found multiple
492	// ways to get to it.
493	SmallVector<std::pair<Instruction, APInt>, `8`> Worklist; // (Op, Weight)*
494	Worklist.push_back(Elt: std::make_pair(x&: I, y: APInt (Bitwidth, `1`)));
495	bool Changed = false;
496
497	// Leaves of the expression are values that either aren't the right kind of
498	// operation (eg: a constant, or a multiply in an add tree), or are, but have
499	// some uses that are not inside the expression. For example, in I = X + X,
500	// X = A + B, the value X has two uses (by I) that are in the expression. If
501	// X has any other uses, for example in a return instruction, then we consider
502	// X to be a leaf, and won't analyze it further. When we first visit a value,
503	// if it has more than one use then at first we conservatively consider it to
504	// be a leaf. Later, as the expression is explored, we may discover some more
505	// uses of the value from inside the expression. If all uses turn out to be
506	// from within the expression (and the value is a binary operator of the right
507	// kind) then the value is no longer considered to be a leaf, and its operands
508	// are explored.
509
510	// Leaves - Keeps track of the set of putative leaves as well as the number of
511	// paths to each leaf seen so far.
512	using LeafMap = DenseMap<Value *, APInt>;
513	LeafMap Leaves; // Leaf -> Total weight so far.
514	SmallVector<Value , `8`> LeafOrder; // Ensure deterministic leaf output order.*
515
516	#ifndef NDEBUG
517	SmallPtrSet<Value , `8`> Visited; // For checking the iteration scheme.*
518	#endif
519	while (!Worklist.empty()) {
520	std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
521	I = P.first; // We examine the operands of this binary operator.
522
523	if (isa<OverflowingBinaryOperator>(Val: I))
524	HasNUW &= I->hasNoUnsignedWrap();
525
526	for (unsigned OpIdx = `0`; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
527	Value *Op = I->getOperand(i: OpIdx);
528	APInt Weight = P.second; // Number of paths to this operand.
529	LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
530	assert(!Op->use_empty() && "No uses, so how did we get to it?!");
531
532	// If this is a binary operation of the right kind with only one use then
533	// add its operands to the expression.
534	if (BinaryOperator *BO = isReassociableOp(V: Op, Opcode)) {
535	assert(Visited.insert(Op).second && "Not first visit!");
536	LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
537	Worklist.push_back(Elt: std::make_pair(x&: BO, y&: Weight));
538	continue;
539	}
540
541	// Appears to be a leaf. Is the operand already in the set of leaves?
542	LeafMap::iterator It = Leaves.find(Val: Op);
543	if (It == Leaves.end()) {
544	// Not in the leaf map. Must be the first time we saw this operand.
545	assert(Visited.insert(Op).second && "Not first visit!");
546	if (!Op->hasOneUse()) {
547	// This value has uses not accounted for by the expression, so it is
548	// not safe to modify. Mark it as being a leaf.
549	LLVM_DEBUG(dbgs()
550	<< "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
551	LeafOrder.push_back(Elt: Op);
552	Leaves [Op] = Weight;
553	continue;
554	}
555	// No uses outside the expression, try morphing it.
556	} else {
557	// Already in the leaf map.
558	assert(It != Leaves.end() && Visited.count(Op) &&
559	"In leaf map but not visited!");
560
561	// Update the number of paths to the leaf.
562	IncorporateWeight(LHS&: It ->second, RHS: Weight, Opcode);
563
564	#if 0 // TODO: Re-enable once PR13021 is fixed.
565	// The leaf already has one use from inside the expression. As we want
566	// exactly one such use, drop this new use of the leaf.
567	assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
568	I->setOperand(OpIdx, UndefValue::get(I->getType()));
569	Changed = true;
570
571	// If the leaf is a binary operation of the right kind and we now see
572	// that its multiple original uses were in fact all by nodes belonging
573	// to the expression, then no longer consider it to be a leaf and add
574	// its operands to the expression.
575	if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
576	LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
577	Worklist.push_back(std::make_pair(BO, It->second));
578	Leaves.erase(It);
579	continue;
580	}
581	#endif
582
583	// If we still have uses that are not accounted for by the expression
584	// then it is not safe to modify the value.
585	if (!Op->hasOneUse())
586	continue;
587
588	// No uses outside the expression, try morphing it.
589	Weight = It ->second;
590	Leaves.erase(I: It); // Since the value may be morphed below.
591	}
592
593	// At this point we have a value which, first of all, is not a binary
594	// expression of the right kind, and secondly, is only used inside the
595	// expression. This means that it can safely be modified. See if we
596	// can usefully morph it into an expression of the right kind.
597	assert((!isa<Instruction>(Op) \|\|
598	cast<Instruction>(Op)->getOpcode() != Opcode
599	\|\| (isa<FPMathOperator>(Op) &&
600	!hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
601	"Should have been handled above!");
602	assert(Op->hasOneUse() && "Has uses outside the expression tree!");
603
604	// If this is a multiply expression, turn any internal negations into
605	// multiplies by -1 so they can be reassociated. Add any users of the
606	// newly created multiplication by -1 to the redo list, so any
607	// reassociation opportunities that are exposed will be reassociated
608	// further.
609	Instruction *Neg;
610	if (((Opcode == Instruction::Mul && match(V: Op, P: m_Neg(V: m_Value()))) \|\|
611	(Opcode == Instruction::FMul && match(V: Op, P: m_FNeg(X: m_Value())))) &&
612	match(V: Op, P: m_Instruction(I&: Neg))) {
613	LLVM_DEBUG(dbgs()
614	<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
615	Instruction *Mul = LowerNegateToMultiply(Neg);
616	LLVM_DEBUG(dbgs() << *Mul << `'\n'`);
617	Worklist.push_back(Elt: std::make_pair(x&: Mul, y&: Weight));
618	for (User *U : Mul->users()) {
619	if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(Val: U))
620	ToRedo.insert(X: UserBO);
621	}
622	ToRedo.insert(X: Neg);
623	Changed = true;
624	continue;
625	}
626
627	// Failed to morph into an expression of the right type. This really is
628	// a leaf.
629	LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
630	assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
631	LeafOrder.push_back(Elt: Op);
632	Leaves [Op] = Weight;
633	}
634	}
635
636	// The leaves, repeated according to their weights, represent the linearized
637	// form of the expression.
638	for (Value *V : LeafOrder) {
639	LeafMap::iterator It = Leaves.find(Val: V);
640	if (It == Leaves.end())
641	// Node initially thought to be a leaf wasn't.
642	continue;
643	assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
644	APInt Weight = It ->second;
645	if (Weight.isMinValue())
646	// Leaf already output or weight reduction eliminated it.
647	continue;
648	// Ensure the leaf is only output once.
649	It ->second = `0`;
650	Ops.push_back(Elt: std::make_pair(x&: V, y&: Weight));
651	}
652
653	// For nilpotent operations or addition there may be no operands, for example
654	// because the expression was "X xor X" or consisted of 2^Bitwidth additions:
655	// in both cases the weight reduces to 0 causing the value to be skipped.
656	if (Ops.empty()) {
657	Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, Ty: I->getType());
658	assert(Identity && "Associative operation without identity!");
659	Ops.emplace_back(Args&: Identity, Args: APInt (Bitwidth, `1`));
660	}
661
662	return Changed;
663	}
664
665	/// Now that the operands for this expression tree are
666	/// linearized and optimized, emit them in-order.
667	void ReassociatePass::RewriteExprTree(BinaryOperator *I,
668	SmallVectorImpl<ValueEntry> &Ops,
669	bool HasNUW) {
670	assert(Ops.size() > `1` && "Single values should be used directly!");
671
672	// Since our optimizations should never increase the number of operations, the
673	// new expression can usually be written reusing the existing binary operators
674	// from the original expression tree, without creating any new instructions,
675	// though the rewritten expression may have a completely different topology.
676	// We take care to not change anything if the new expression will be the same
677	// as the original. If more than trivial changes (like commuting operands)
678	// were made then we are obliged to clear out any optional subclass data like
679	// nsw flags.
680
681	/// NodesToRewrite - Nodes from the original expression available for writing
682	/// the new expression into.
683	SmallVector<BinaryOperator*, `8`> NodesToRewrite;
684	unsigned Opcode = I->getOpcode();
685	BinaryOperator *Op = I;
686
687	/// NotRewritable - The operands being written will be the leaves of the new
688	/// expression and must not be used as inner nodes (via NodesToRewrite) by
689	/// mistake. Inner nodes are always reassociable, and usually leaves are not
690	/// (if they were they would have been incorporated into the expression and so
691	/// would not be leaves), so most of the time there is no danger of this. But
692	/// in rare cases a leaf may become reassociable if an optimization kills uses
693	/// of it, or it may momentarily become reassociable during rewriting (below)
694	/// due it being removed as an operand of one of its uses. Ensure that misuse
695	/// of leaf nodes as inner nodes cannot occur by remembering all of the future
696	/// leaves and refusing to reuse any of them as inner nodes.
697	SmallPtrSet<Value*, `8`> NotRewritable;
698	for (unsigned i = `0`, e = Ops.size(); i != e; ++i)
699	NotRewritable.insert(Ptr: Ops [i].Op);
700
701	// ExpressionChangedStart - Non-null if the rewritten expression differs from
702	// the original in some non-trivial way, requiring the clearing of optional
703	// flags. Flags are cleared from the operator in ExpressionChangedStart up to
704	// ExpressionChangedEnd inclusive.
705	BinaryOperator ExpressionChangedStart = nullptr*,
706	ExpressionChangedEnd = nullptr*;
707	for (unsigned i = `0`; ; ++i) {
708	// The last operation (which comes earliest in the IR) is special as both
709	// operands will come from Ops, rather than just one with the other being
710	// a subexpression.
711	if (i+`2` == Ops.size()) {
712	Value *NewLHS = Ops [i].Op;
713	Value *NewRHS = Ops [i+`1`].Op;
714	Value *OldLHS = Op->getOperand(i_nocapture: `0`);
715	Value *OldRHS = Op->getOperand(i_nocapture: `1`);
716
717	if (NewLHS == OldLHS && NewRHS == OldRHS)
718	// Nothing changed, leave it alone.
719	break;
720
721	if (NewLHS == OldRHS && NewRHS == OldLHS) {
722	// The order of the operands was reversed. Swap them.
723	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
724	Op->swapOperands();
725	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
726	MadeChange = true;
727	++NumChanged;
728	break;
729	}
730
731	// The new operation differs non-trivially from the original. Overwrite
732	// the old operands with the new ones.
733	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
734	if (NewLHS != OldLHS) {
735	BinaryOperator *BO = isReassociableOp(V: OldLHS, Opcode);
736	if (BO && !NotRewritable.count(Ptr: BO))
737	NodesToRewrite.push_back(Elt: BO);
738	Op->setOperand(i_nocapture: `0`, Val_nocapture: NewLHS);
739	}
740	if (NewRHS != OldRHS) {
741	BinaryOperator *BO = isReassociableOp(V: OldRHS, Opcode);
742	if (BO && !NotRewritable.count(Ptr: BO))
743	NodesToRewrite.push_back(Elt: BO);
744	Op->setOperand(i_nocapture: `1`, Val_nocapture: NewRHS);
745	}
746	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
747
748	ExpressionChangedStart = Op;
749	if (!ExpressionChangedEnd)
750	ExpressionChangedEnd = Op;
751	MadeChange = true;
752	++NumChanged;
753
754	break;
755	}
756
757	// Not the last operation. The left-hand side will be a sub-expression
758	// while the right-hand side will be the current element of Ops.
759	Value *NewRHS = Ops [i].Op;
760	if (NewRHS != Op->getOperand(i_nocapture: `1`)) {
761	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
762	if (NewRHS == Op->getOperand(i_nocapture: `0`)) {
763	// The new right-hand side was already present as the left operand. If
764	// we are lucky then swapping the operands will sort out both of them.
765	Op->swapOperands();
766	} else {
767	// Overwrite with the new right-hand side.
768	BinaryOperator *BO = isReassociableOp(V: Op->getOperand(i_nocapture: `1`), Opcode);
769	if (BO && !NotRewritable.count(Ptr: BO))
770	NodesToRewrite.push_back(Elt: BO);
771	Op->setOperand(i_nocapture: `1`, Val_nocapture: NewRHS);
772	ExpressionChangedStart = Op;
773	if (!ExpressionChangedEnd)
774	ExpressionChangedEnd = Op;
775	}
776	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
777	MadeChange = true;
778	++NumChanged;
779	}
780
781	// Now deal with the left-hand side. If this is already an operation node
782	// from the original expression then just rewrite the rest of the expression
783	// into it.
784	BinaryOperator *BO = isReassociableOp(V: Op->getOperand(i_nocapture: `0`), Opcode);
785	if (BO && !NotRewritable.count(Ptr: BO)) {
786	Op = BO;
787	continue;
788	}
789
790	// Otherwise, grab a spare node from the original expression and use that as
791	// the left-hand side. If there are no nodes left then the optimizers made
792	// an expression with more nodes than the original! This usually means that
793	// they did something stupid but it might mean that the problem was just too
794	// hard (finding the mimimal number of multiplications needed to realize a
795	// multiplication expression is NP-complete). Whatever the reason, smart or
796	// stupid, create a new node if there are none left.
797	BinaryOperator *NewOp;
798	if (NodesToRewrite.empty()) {
799	Constant *Undef = UndefValue::get(T: I->getType());
800	NewOp = BinaryOperator::Create(Op: Instruction::BinaryOps(Opcode), S1: Undef,
801	S2: Undef, Name: "", InsertBefore: I->getIterator());
802	if (isa<FPMathOperator>(Val: NewOp))
803	NewOp->setFastMathFlags(I->getFastMathFlags());
804	} else {
805	NewOp = NodesToRewrite.pop_back_val();
806	}
807
808	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
809	Op->setOperand(i_nocapture: `0`, Val_nocapture: NewOp);
810	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
811	ExpressionChangedStart = Op;
812	if (!ExpressionChangedEnd)
813	ExpressionChangedEnd = Op;
814	MadeChange = true;
815	++NumChanged;
816	Op = NewOp;
817	}
818
819	// If the expression changed non-trivially then clear out all subclass data
820	// starting from the operator specified in ExpressionChanged, and compactify
821	// the operators to just before the expression root to guarantee that the
822	// expression tree is dominated by all of Ops.
823	if (ExpressionChangedStart) {
824	bool ClearFlags = true;
825	do {
826	// Preserve flags.
827	if (ClearFlags) {
828	if (isa<FPMathOperator>(Val: I)) {
829	FastMathFlags Flags = I->getFastMathFlags();
830	ExpressionChangedStart->clearSubclassOptionalData();
831	ExpressionChangedStart->setFastMathFlags(Flags);
832	} else {
833	ExpressionChangedStart->clearSubclassOptionalData();
834	// Note that it doesn't hold for mul if one of the operands is zero.
835	// TODO: We can preserve NUW flag if we prove that all mul operands
836	// are non-zero.
837	if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add)
838	ExpressionChangedStart->setHasNoUnsignedWrap();
839	}
840	}
841
842	if (ExpressionChangedStart == ExpressionChangedEnd)
843	ClearFlags = false;
844	if (ExpressionChangedStart == I)
845	break;
846
847	// Discard any debug info related to the expressions that has changed (we
848	// can leave debug info related to the root and any operation that didn't
849	// change, since the result of the expression tree should be the same
850	// even after reassociation).
851	if (ClearFlags)
852	replaceDbgUsesWithUndef(I: ExpressionChangedStart);
853
854	ExpressionChangedStart->moveBefore(MovePos: I);
855	ExpressionChangedStart =
856	cast<BinaryOperator>(Val: *ExpressionChangedStart->user_begin());
857	} while (true);
858	}
859
860	// Throw away any left over nodes from the original expression.
861	for (unsigned i = `0`, e = NodesToRewrite.size(); i != e; ++i)
862	RedoInsts.insert(X: NodesToRewrite [i]);
863	}
864
865	/// Insert instructions before the instruction pointed to by BI,
866	/// that computes the negative version of the value specified. The negative
867	/// version of the value is returned, and BI is left pointing at the instruction
868	/// that should be processed next by the reassociation pass.
869	/// Also add intermediate instructions to the redo list that are modified while
870	/// pushing the negates through adds. These will be revisited to see if
871	/// additional opportunities have been exposed.
872	static Value NegateValue(Value V, Instruction *BI,
873	ReassociatePass::OrderedSet &ToRedo) {
874	if (auto *C = dyn_cast<Constant>(Val: V)) {
875	const DataLayout &DL = BI->getModule()->getDataLayout();
876	Constant *Res = C->getType()->isFPOrFPVectorTy()
877	? ConstantFoldUnaryOpOperand(Opcode: Instruction::FNeg, Op: C, DL)
878	: ConstantExpr::getNeg(C);
879	if (Res)
880	return Res;
881	}
882
883	// We are trying to expose opportunity for reassociation. One of the things
884	// that we want to do to achieve this is to push a negation as deep into an
885	// expression chain as possible, to expose the add instructions. In practice,
886	// this means that we turn this:
887	// X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
888	// so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
889	// the constants. We assume that instcombine will clean up the mess later if
890	// we introduce tons of unnecessary negation instructions.
891	//
892	if (BinaryOperator *I =
893	isReassociableOp(V, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd)) {
894	// Push the negates through the add.
895	I->setOperand(i_nocapture: `0`, Val_nocapture: NegateValue(V: I->getOperand(i_nocapture: `0`), BI, ToRedo));
896	I->setOperand(i_nocapture: `1`, Val_nocapture: NegateValue(V: I->getOperand(i_nocapture: `1`), BI, ToRedo));
897	if (I->getOpcode() == Instruction::Add) {
898	I->setHasNoUnsignedWrap(false);
899	I->setHasNoSignedWrap(false);
900	}
901
902	// We must move the add instruction here, because the neg instructions do
903	// not dominate the old add instruction in general. By moving it, we are
904	// assured that the neg instructions we just inserted dominate the
905	// instruction we are about to insert after them.
906	//
907	I->moveBefore(MovePos: BI);
908	I->setName(I->getName()+".neg");
909
910	// Add the intermediate negates to the redo list as processing them later
911	// could expose more reassociating opportunities.
912	ToRedo.insert(X: I);
913	return I;
914	}
915
916	// Okay, we need to materialize a negated version of V with an instruction.
917	// Scan the use lists of V to see if we have one already.
918	for (User *U : V->users()) {
919	if (!match(V: U, P: m_Neg(V: m_Value())) && !match(V: U, P: m_FNeg(X: m_Value())))
920	continue;
921
922	// We found one! Now we have to make sure that the definition dominates
923	// this use. We do this by moving it to the entry block (if it is a
924	// non-instruction value) or right after the definition. These negates will
925	// be zapped by reassociate later, so we don't need much finesse here.
926	Instruction *TheNeg = dyn_cast<Instruction>(Val: U);
927
928	// We can't safely propagate a vector zero constant with poison/undef lanes.
929	Constant *C;
930	if (match(V: TheNeg, P: m_BinOp(L: m_Constant(C), R: m_Value())) &&
931	C->containsUndefOrPoisonElement())
932	continue;
933
934	// Verify that the negate is in this function, V might be a constant expr.
935	if (!TheNeg \|\|
936	TheNeg->getParent()->getParent() != BI->getParent()->getParent())
937	continue;
938
939	BasicBlock::iterator InsertPt;
940	if (Instruction *InstInput = dyn_cast<Instruction>(Val: V)) {
941	auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
942	if (!InsertPtOpt)
943	continue;
944	InsertPt = *InsertPtOpt;
945	} else {
946	InsertPt = TheNeg->getFunction()
947	->getEntryBlock()
948	.getFirstNonPHIOrDbg()
949	->getIterator();
950	}
951
952	TheNeg->moveBefore(BB&: *InsertPt ->getParent(), I: InsertPt);
953	if (TheNeg->getOpcode() == Instruction::Sub) {
954	TheNeg->setHasNoUnsignedWrap(false);
955	TheNeg->setHasNoSignedWrap(false);
956	} else {
957	TheNeg->andIRFlags(V: BI);
958	}
959	ToRedo.insert(X: TheNeg);
960	return TheNeg;
961	}
962
963	// Insert a 'neg' instruction that subtracts the value from zero to get the
964	// negation.
965	Instruction *NewNeg =
966	CreateNeg(S1: V, Name: V->getName() + ".neg", InsertBefore: BI->getIterator(), FlagsOp: BI);
967	ToRedo.insert(X: NewNeg);
968	return NewNeg;
969	}
970
971	// See if this `or` looks like an load widening reduction, i.e. that it
972	// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
973	// ensure that the pattern is really* a load widening reduction,*
974	// we do not ensure that it can really be replaced with a widened load,
975	// only that it mostly looks like one.
976	static bool isLoadCombineCandidate(Instruction *Or) {
977	SmallVector<Instruction *, `8`> Worklist;
978	SmallSet<Instruction *, `8`> Visited;
979
980	auto Enqueue = [&](Value *V) {
981	auto *I = dyn_cast<Instruction>(Val: V);
982	// Each node of an `or` reduction must be an instruction,
983	if (!I)
984	return false; // Node is certainly not part of an `or` load reduction.
985	// Only process instructions we have never processed before.
986	if (Visited.insert(Ptr: I).second)
987	Worklist.emplace_back(Args&: I);
988	return true; // Will need to look at parent nodes.
989	};
990
991	if (!Enqueue (Or))
992	return false; // Not an `or` reduction pattern.
993
994	while (!Worklist.empty()) {
995	auto *I = Worklist.pop_back_val();
996
997	// Okay, which instruction is this node?
998	switch (I->getOpcode()) {
999	case Instruction::Or:
1000	// Got an `or` node. That's fine, just recurse into it's operands.
1001	for (Value *Op : I->operands())
1002	if (!Enqueue (Op))
1003	return false; // Not an `or` reduction pattern.
1004	continue;
1005
1006	case Instruction::Shl:
1007	case Instruction::ZExt:
1008	// `shl`/`zext` nodes are fine, just recurse into their base operand.
1009	if (!Enqueue (I->getOperand(i: `0`)))
1010	return false; // Not an `or` reduction pattern.
1011	continue;
1012
1013	case Instruction::Load:
1014	// Perfect, `load` node means we've reached an edge of the graph.
1015	continue;
1016
1017	default: // Unknown node.
1018	return false; // Not an `or` reduction pattern.
1019	}
1020	}
1021
1022	return true;
1023	}
1024
1025	/// Return true if it may be profitable to convert this (X\|Y) into (X+Y).
1026	static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
1027	// Don't bother to convert this up unless either the LHS is an associable add
1028	// or subtract or mul or if this is only used by one of the above.
1029	// This is only a compile-time improvement, it is not needed for correctness!
1030	auto isInteresting = [](Value *V) {
1031	for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
1032	Instruction::Shl})
1033	if (isReassociableOp(V, Opcode: Op))
1034	return true;
1035	return false;
1036	};
1037
1038	if (any_of(Range: Or->operands(), P: isInteresting))
1039	return true;
1040
1041	Value *VB = Or->user_back();
1042	if (Or->hasOneUse() && isInteresting (VB))
1043	return true;
1044
1045	return false;
1046	}
1047
1048	/// If we have (X\|Y), and iff X and Y have no common bits set,
1049	/// transform this into (X+Y) to allow arithmetics reassociation.
1050	static BinaryOperator convertOrWithNoCommonBitsToAdd(Instruction Or) {
1051	// Convert an or into an add.
1052	BinaryOperator *New = CreateAdd(S1: Or->getOperand(i: `0`), S2: Or->getOperand(i: `1`), Name: "",
1053	InsertBefore: Or->getIterator(), FlagsOp: Or);
1054	New->setHasNoSignedWrap();
1055	New->setHasNoUnsignedWrap();
1056	New->takeName(V: Or);
1057
1058	// Everyone now refers to the add instruction.
1059	Or->replaceAllUsesWith(V: New);
1060	New->setDebugLoc(Or->getDebugLoc());
1061
1062	LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << `'\n'`);
1063	return New;
1064	}
1065
1066	/// Return true if we should break up this subtract of X-Y into (X + -Y).
1067	static bool ShouldBreakUpSubtract(Instruction *Sub) {
1068	// If this is a negation, we can't split it up!
1069	if (match(V: Sub, P: m_Neg(V: m_Value())) \|\| match(V: Sub, P: m_FNeg(X: m_Value())))
1070	return false;
1071
1072	// Don't breakup X - undef.
1073	if (isa<UndefValue>(Val: Sub->getOperand(i: `1`)))
1074	return false;
1075
1076	// Don't bother to break this up unless either the LHS is an associable add or
1077	// subtract or if this is only used by one.
1078	Value *V0 = Sub->getOperand(i: `0`);
1079	if (isReassociableOp(V: V0, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd) \|\|
1080	isReassociableOp(V: V0, Opcode1: Instruction::Sub, Opcode2: Instruction::FSub))
1081	return true;
1082	Value *V1 = Sub->getOperand(i: `1`);
1083	if (isReassociableOp(V: V1, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd) \|\|
1084	isReassociableOp(V: V1, Opcode1: Instruction::Sub, Opcode2: Instruction::FSub))
1085	return true;
1086	Value *VB = Sub->user_back();
1087	if (Sub->hasOneUse() &&
1088	(isReassociableOp(V: VB, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd) \|\|
1089	isReassociableOp(V: VB, Opcode1: Instruction::Sub, Opcode2: Instruction::FSub)))
1090	return true;
1091
1092	return false;
1093	}
1094
1095	/// If we have (X-Y), and if either X is an add, or if this is only used by an
1096	/// add, transform this into (X+(0-Y)) to promote better reassociation.
1097	static BinaryOperator BreakUpSubtract(Instruction Sub,
1098	ReassociatePass::OrderedSet &ToRedo) {
1099	// Convert a subtract into an add and a neg instruction. This allows sub
1100	// instructions to be commuted with other add instructions.
1101	//
1102	// Calculate the negative value of Operand 1 of the sub instruction,
1103	// and set it as the RHS of the add instruction we just made.
1104	Value *NegVal = NegateValue(V: Sub->getOperand(i: `1`), BI: Sub, ToRedo);
1105	BinaryOperator *New =
1106	CreateAdd(S1: Sub->getOperand(i: `0`), S2: NegVal, Name: "", InsertBefore: Sub->getIterator(), FlagsOp: Sub);
1107	Sub->setOperand(i: `0`, Val: Constant::getNullValue(Ty: Sub->getType())); // Drop use of op.
1108	Sub->setOperand(i: `1`, Val: Constant::getNullValue(Ty: Sub->getType())); // Drop use of op.
1109	New->takeName(V: Sub);
1110
1111	// Everyone now refers to the add instruction.
1112	Sub->replaceAllUsesWith(V: New);
1113	New->setDebugLoc(Sub->getDebugLoc());
1114
1115	LLVM_DEBUG(dbgs() << "Negated: " << *New << `'\n'`);
1116	return New;
1117	}
1118
1119	/// If this is a shift of a reassociable multiply or is used by one, change
1120	/// this into a multiply by a constant to assist with further reassociation.
1121	static BinaryOperator ConvertShiftToMul(Instruction Shl) {
1122	Constant *MulCst = ConstantInt::get(Ty: Shl->getType(), V: `1`);
1123	auto *SA = cast<ConstantInt>(Val: Shl->getOperand(i: `1`));
1124	MulCst = ConstantExpr::getShl(C1: MulCst, C2: SA);
1125
1126	BinaryOperator *Mul = BinaryOperator::CreateMul(V1: Shl->getOperand(i: `0`), V2: MulCst,
1127	Name: "", It: Shl->getIterator());
1128	Shl->setOperand(i: `0`, Val: PoisonValue::get(T: Shl->getType())); // Drop use of op.
1129	Mul->takeName(V: Shl);
1130
1131	// Everyone now refers to the mul instruction.
1132	Shl->replaceAllUsesWith(V: Mul);
1133	Mul->setDebugLoc(Shl->getDebugLoc());
1134
1135	// We can safely preserve the nuw flag in all cases. It's also safe to turn a
1136	// nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1137	// handling. It can be preserved as long as we're not left shifting by
1138	// bitwidth - 1.
1139	bool NSW = cast<BinaryOperator>(Val: Shl)->hasNoSignedWrap();
1140	bool NUW = cast<BinaryOperator>(Val: Shl)->hasNoUnsignedWrap();
1141	unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1142	if (NSW && (NUW \|\| SA->getValue().ult(RHS: BitWidth - `1`)))
1143	Mul->setHasNoSignedWrap(true);
1144	Mul->setHasNoUnsignedWrap(NUW);
1145	return Mul;
1146	}
1147
1148	/// Scan backwards and forwards among values with the same rank as element i
1149	/// to see if X exists. If X does not exist, return i. This is useful when
1150	/// scanning for 'x' when we see '-x' because they both get the same rank.
1151	static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
1152	unsigned i, Value *X) {
1153	unsigned XRank = Ops [i].Rank;
1154	unsigned e = Ops.size();
1155	for (unsigned j = i+`1`; j != e && Ops [j].Rank == XRank; ++j) {
1156	if (Ops [j].Op == X)
1157	return j;
1158	if (Instruction *I1 = dyn_cast<Instruction>(Val: Ops [j].Op))
1159	if (Instruction *I2 = dyn_cast<Instruction>(Val: X))
1160	if (I1->isIdenticalTo(I: I2))
1161	return j;
1162	}
1163	// Scan backwards.
1164	for (unsigned j = i-`1`; j != ~`0U` && Ops [j].Rank == XRank; --j) {
1165	if (Ops [j].Op == X)
1166	return j;
1167	if (Instruction *I1 = dyn_cast<Instruction>(Val: Ops [j].Op))
1168	if (Instruction *I2 = dyn_cast<Instruction>(Val: X))
1169	if (I1->isIdenticalTo(I: I2))
1170	return j;
1171	}
1172	return i;
1173	}
1174
1175	/// Emit a tree of add instructions, summing Ops together
1176	/// and returning the result. Insert the tree before I.
1177	static Value *EmitAddTreeOfValues(BasicBlock::iterator It,
1178	SmallVectorImpl<WeakTrackingVH> &Ops) {
1179	if (Ops.size() == `1`) return Ops.back();
1180
1181	Value *V1 = Ops.pop_back_val();
1182	Value *V2 = EmitAddTreeOfValues(It, Ops);
1183	return CreateAdd(S1: V2, S2: V1, Name: "reass.add", InsertBefore: It, FlagsOp: &*It);
1184	}
1185
1186	/// If V is an expression tree that is a multiplication sequence,
1187	/// and if this sequence contains a multiply by Factor,
1188	/// remove Factor from the tree and return the new tree.
1189	Value ReassociatePass::RemoveFactorFromExpression(Value V, Value *Factor) {
1190	BinaryOperator *BO = isReassociableOp(V, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1191	if (!BO)
1192	return nullptr;
1193
1194	SmallVector<RepeatedValue, `8`> Tree;
1195	bool HasNUW = true;
1196	MadeChange \|= LinearizeExprTree(I: BO, Ops&: Tree, ToRedo&: RedoInsts, HasNUW);
1197	SmallVector<ValueEntry, `8`> Factors;
1198	Factors.reserve(N: Tree.size());
1199	for (unsigned i = `0`, e = Tree.size(); i != e; ++i) {
1200	RepeatedValue E = Tree [i];
1201	Factors.append(NumInputs: E.second.getZExtValue(),
1202	Elt: ValueEntry (getRank(V: E.first), E.first));
1203	}
1204
1205	bool FoundFactor = false;
1206	bool NeedsNegate = false;
1207	for (unsigned i = `0`, e = Factors.size(); i != e; ++i) {
1208	if (Factors [i].Op == Factor) {
1209	FoundFactor = true;
1210	Factors.erase(CI: Factors.begin()+i);
1211	break;
1212	}
1213
1214	// If this is a negative version of this factor, remove it.
1215	if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Val: Factor)) {
1216	if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Val: Factors [i].Op))
1217	if (FC1->getValue() == -FC2->getValue()) {
1218	FoundFactor = NeedsNegate = true;
1219	Factors.erase(CI: Factors.begin()+i);
1220	break;
1221	}
1222	} else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Val: Factor)) {
1223	if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Val: Factors [i].Op)) {
1224	const APFloat &F1 = FC1->getValueAPF();
1225	APFloat F2(FC2->getValueAPF());
1226	F2.changeSign();
1227	if (F1 == F2) {
1228	FoundFactor = NeedsNegate = true;
1229	Factors.erase(CI: Factors.begin() + i);
1230	break;
1231	}
1232	}
1233	}
1234	}
1235
1236	if (!FoundFactor) {
1237	// Make sure to restore the operands to the expression tree.
1238	RewriteExprTree(I: BO, Ops&: Factors, HasNUW);
1239	return nullptr;
1240	}
1241
1242	BasicBlock::iterator InsertPt = ++BO->getIterator();
1243
1244	// If this was just a single multiply, remove the multiply and return the only
1245	// remaining operand.
1246	if (Factors.size() == `1`) {
1247	RedoInsts.insert(X: BO);
1248	V = Factors [`0`].Op;
1249	} else {
1250	RewriteExprTree(I: BO, Ops&: Factors, HasNUW);
1251	V = BO;
1252	}
1253
1254	if (NeedsNegate)
1255	V = CreateNeg(S1: V, Name: "neg", InsertBefore: InsertPt, FlagsOp: BO);
1256
1257	return V;
1258	}
1259
1260	/// If V is a single-use multiply, recursively add its operands as factors,
1261	/// otherwise add V to the list of factors.
1262	///
1263	/// Ops is the top-level list of add operands we're trying to factor.
1264	static void FindSingleUseMultiplyFactors(Value *V,
1265	SmallVectorImpl<Value*> &Factors) {
1266	BinaryOperator *BO = isReassociableOp(V, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1267	if (!BO) {
1268	Factors.push_back(Elt: V);
1269	return;
1270	}
1271
1272	// Otherwise, add the LHS and RHS to the list of factors.
1273	FindSingleUseMultiplyFactors(V: BO->getOperand(i_nocapture: `1`), Factors);
1274	FindSingleUseMultiplyFactors(V: BO->getOperand(i_nocapture: `0`), Factors);
1275	}
1276
1277	/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1278	/// This optimizes based on identities. If it can be reduced to a single Value,
1279	/// it is returned, otherwise the Ops list is mutated as necessary.
1280	static Value OptimizeAndOrXor(unsigned* Opcode,
1281	SmallVectorImpl<ValueEntry> &Ops) {
1282	// Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1283	// If we find any, we can simplify the expression. X&~X == 0, X\|~X == -1.
1284	for (unsigned i = `0`, e = Ops.size(); i != e; ++i) {
1285	// First, check for X and ~X in the operand list.
1286	assert(i < Ops.size());
1287	Value *X;
1288	if (match(V: Ops [i].Op, P: m_Not(V: m_Value(V&: X)))) { // Cannot occur for ^.
1289	unsigned FoundX = FindInOperandList(Ops, i, X);
1290	if (FoundX != i) {
1291	if (Opcode == Instruction::And) // ...&X&~X = 0
1292	return Constant::getNullValue(Ty: X->getType());
1293
1294	if (Opcode == Instruction::Or) // ...\|X\|~X = -1
1295	return Constant::getAllOnesValue(Ty: X->getType());
1296	}
1297	}
1298
1299	// Next, check for duplicate pairs of values, which we assume are next to
1300	// each other, due to our sorting criteria.
1301	assert(i < Ops.size());
1302	if (i+`1` != Ops.size() && Ops [i+`1`].Op == Ops [i].Op) {
1303	if (Opcode == Instruction::And \|\| Opcode == Instruction::Or) {
1304	// Drop duplicate values for And and Or.
1305	Ops.erase(CI: Ops.begin()+i);
1306	--i; --e;
1307	++NumAnnihil;
1308	continue;
1309	}
1310
1311	// Drop pairs of values for Xor.
1312	assert(Opcode == Instruction::Xor);
1313	if (e == `2`)
1314	return Constant::getNullValue(Ty: Ops [`0`].Op->getType());
1315
1316	// Y ^ X^X -> Y
1317	Ops.erase(CS: Ops.begin()+i, CE: Ops.begin()+i+`2`);
1318	i -= `1`; e -= `2`;
1319	++NumAnnihil;
1320	}
1321	}
1322	return nullptr;
1323	}
1324
1325	/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1326	/// instruction with the given two operands, and return the resulting
1327	/// instruction. There are two special cases: 1) if the constant operand is 0,
1328	/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1329	/// be returned.
1330	static Value createAndInstr(BasicBlock::iterator InsertBefore, Value Opnd,
1331	const APInt &ConstOpnd) {
1332	if (ConstOpnd.isZero())
1333	return nullptr;
1334
1335	if (ConstOpnd.isAllOnes())
1336	return Opnd;
1337
1338	Instruction *I = BinaryOperator::CreateAnd(
1339	V1: Opnd, V2: ConstantInt::get(Ty: Opnd->getType(), V: ConstOpnd), Name: "and.ra",
1340	It: InsertBefore);
1341	I->setDebugLoc(InsertBefore ->getDebugLoc());
1342	return I;
1343	}
1344
1345	// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1346	// into "R ^ C", where C would be 0, and R is a symbolic value.
1347	//
1348	// If it was successful, true is returned, and the "R" and "C" is returned
1349	// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1350	// and both "Res" and "ConstOpnd" remain unchanged.
1351	bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1352	APInt &ConstOpnd, Value *&Res) {
1353	// Xor-Rule 1: (x \| c1) ^ c2 = (x \| c1) ^ (c1 ^ c1) ^ c2
1354	// = ((x \| c1) ^ c1) ^ (c1 ^ c2)
1355	// = (x & ~c1) ^ (c1 ^ c2)
1356	// It is useful only when c1 == c2.
1357	if (!Opnd1->isOrExpr() \|\| Opnd1->getConstPart().isZero())
1358	return false;
1359
1360	if (!Opnd1->getValue()->hasOneUse())
1361	return false;
1362
1363	const APInt &C1 = Opnd1->getConstPart();
1364	if (C1 != ConstOpnd)
1365	return false;
1366
1367	Value *X = Opnd1->getSymbolicPart();
1368	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: ~C1);
1369	// ConstOpnd was C2, now C1 ^ C2.
1370	ConstOpnd ^= C1;
1371
1372	if (Instruction *T = dyn_cast<Instruction>(Val: Opnd1->getValue()))
1373	RedoInsts.insert(X: T);
1374	return true;
1375	}
1376
1377	// Helper function of OptimizeXor(). It tries to simplify
1378	// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1379	// symbolic value.
1380	//
1381	// If it was successful, true is returned, and the "R" and "C" is returned
1382	// via "Res" and "ConstOpnd", respectively (If the entire expression is
1383	// evaluated to a constant, the Res is set to NULL); otherwise, false is
1384	// returned, and both "Res" and "ConstOpnd" remain unchanged.
1385	bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1386	XorOpnd *Opnd2, APInt &ConstOpnd,
1387	Value *&Res) {
1388	Value *X = Opnd1->getSymbolicPart();
1389	if (X != Opnd2->getSymbolicPart())
1390	return false;
1391
1392	// This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1393	int DeadInstNum = `1`;
1394	if (Opnd1->getValue()->hasOneUse())
1395	DeadInstNum++;
1396	if (Opnd2->getValue()->hasOneUse())
1397	DeadInstNum++;
1398
1399	// Xor-Rule 2:
1400	// (x \| c1) ^ (x & c2)
1401	// = (x\|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x\|c1) ^ c1) ^ (x & c2) ^ c1
1402	// = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1403	// = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1404	//
1405	if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1406	if (Opnd2->isOrExpr())
1407	std::swap(a&: Opnd1, b&: Opnd2);
1408
1409	const APInt &C1 = Opnd1->getConstPart();
1410	const APInt &C2 = Opnd2->getConstPart();
1411	APInt C3((~C1) ^ C2);
1412
1413	// Do not increase code size!
1414	if (!C3.isZero() && !C3.isAllOnes()) {
1415	int NewInstNum = ConstOpnd.getBoolValue() ? `1` : `2`;
1416	if (NewInstNum > DeadInstNum)
1417	return false;
1418	}
1419
1420	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: C3);
1421	ConstOpnd ^= C1;
1422	} else if (Opnd1->isOrExpr()) {
1423	// Xor-Rule 3: (x \| c1) ^ (x \| c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1424	//
1425	const APInt &C1 = Opnd1->getConstPart();
1426	const APInt &C2 = Opnd2->getConstPart();
1427	APInt C3 = C1 ^ C2;
1428
1429	// Do not increase code size
1430	if (!C3.isZero() && !C3.isAllOnes()) {
1431	int NewInstNum = ConstOpnd.getBoolValue() ? `1` : `2`;
1432	if (NewInstNum > DeadInstNum)
1433	return false;
1434	}
1435
1436	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: C3);
1437	ConstOpnd ^= C3;
1438	} else {
1439	// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1440	//
1441	const APInt &C1 = Opnd1->getConstPart();
1442	const APInt &C2 = Opnd2->getConstPart();
1443	APInt C3 = C1 ^ C2;
1444	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: C3);
1445	}
1446
1447	// Put the original operands in the Redo list; hope they will be deleted
1448	// as dead code.
1449	if (Instruction *T = dyn_cast<Instruction>(Val: Opnd1->getValue()))
1450	RedoInsts.insert(X: T);
1451	if (Instruction *T = dyn_cast<Instruction>(Val: Opnd2->getValue()))
1452	RedoInsts.insert(X: T);
1453
1454	return true;
1455	}
1456
1457	/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1458	/// to a single Value, it is returned, otherwise the Ops list is mutated as
1459	/// necessary.
1460	Value ReassociatePass::OptimizeXor(Instruction I,
1461	SmallVectorImpl<ValueEntry> &Ops) {
1462	if (Value *V = OptimizeAndOrXor(Opcode: Instruction::Xor, Ops))
1463	return V;
1464
1465	if (Ops.size() == `1`)
1466	return nullptr;
1467
1468	SmallVector<XorOpnd, `8`> Opnds;
1469	SmallVector<XorOpnd*, `8`> OpndPtrs;
1470	Type *Ty = Ops [`0`].Op->getType();
1471	APInt ConstOpnd(Ty->getScalarSizeInBits(), `0`);
1472
1473	// Step 1: Convert ValueEntry to XorOpnd
1474	for (unsigned i = `0`, e = Ops.size(); i != e; ++i) {
1475	Value *V = Ops [i].Op;
1476	const APInt *C;
1477	// TODO: Support non-splat vectors.
1478	if (match(V, P: m_APInt(Res&: C))) {
1479	ConstOpnd ^= *C;
1480	} else {
1481	XorOpnd O(V);
1482	O.setSymbolicRank(getRank(V: O.getSymbolicPart()));
1483	Opnds.push_back(Elt: O);
1484	}
1485	}
1486
1487	// NOTE: From this point on, do NOT* add/delete element to/from "Opnds".*
1488	// It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1489	// the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1490	// with the previous loop --- the iterator of the "Opnds" may be invalidated
1491	// when new elements are added to the vector.
1492	for (unsigned i = `0`, e = Opnds.size(); i != e; ++i)
1493	OpndPtrs.push_back(Elt: &Opnds [i]);
1494
1495	// Step 2: Sort the Xor-Operands in a way such that the operands containing
1496	// the same symbolic value cluster together. For instance, the input operand
1497	// sequence ("x \| 123", "y & 456", "x & 789") will be sorted into:
1498	// ("x \| 123", "x & 789", "y & 456").
1499	//
1500	// The purpose is twofold:
1501	// 1) Cluster together the operands sharing the same symbolic-value.
1502	// 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1503	// could potentially shorten crital path, and expose more loop-invariants.
1504	// Note that values' rank are basically defined in RPO order (FIXME).
1505	// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1506	// than Y which is defined earlier than Z. Permute "x \| 1", "Y & 2",
1507	// "z" in the order of X-Y-Z is better than any other orders.
1508	llvm::stable_sort(Range&: OpndPtrs, C: [](XorOpnd LHS, XorOpnd RHS) {
1509	return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1510	});
1511
1512	// Step 3: Combine adjacent operands
1513	XorOpnd PrevOpnd = nullptr*;
1514	bool Changed = false;
1515	for (unsigned i = `0`, e = Opnds.size(); i < e; i++) {
1516	XorOpnd *CurrOpnd = OpndPtrs [i];
1517	// The combined value
1518	Value *CV;
1519
1520	// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1521	if (!ConstOpnd.isZero() &&
1522	CombineXorOpnd(It: I->getIterator(), Opnd1: CurrOpnd, ConstOpnd, Res&: CV)) {
1523	Changed = true;
1524	if (CV)
1525	*CurrOpnd = XorOpnd (CV);
1526	else {
1527	CurrOpnd->Invalidate();
1528	continue;
1529	}
1530	}
1531
1532	if (!PrevOpnd \|\| CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1533	PrevOpnd = CurrOpnd;
1534	continue;
1535	}
1536
1537	// step 3.2: When previous and current operands share the same symbolic
1538	// value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1539	if (CombineXorOpnd(It: I->getIterator(), Opnd1: CurrOpnd, Opnd2: PrevOpnd, ConstOpnd, Res&: CV)) {
1540	// Remove previous operand
1541	PrevOpnd->Invalidate();
1542	if (CV) {
1543	*CurrOpnd = XorOpnd (CV);
1544	PrevOpnd = CurrOpnd;
1545	} else {
1546	CurrOpnd->Invalidate();
1547	PrevOpnd = nullptr;
1548	}
1549	Changed = true;
1550	}
1551	}
1552
1553	// Step 4: Reassemble the Ops
1554	if (Changed) {
1555	Ops.clear();
1556	for (const XorOpnd &O : Opnds) {
1557	if (O.isInvalid())
1558	continue;
1559	ValueEntry VE(getRank(V: O.getValue()), O.getValue());
1560	Ops.push_back(Elt: VE);
1561	}
1562	if (!ConstOpnd.isZero()) {
1563	Value *C = ConstantInt::get(Ty, V: ConstOpnd);
1564	ValueEntry VE(getRank(V: C), C);
1565	Ops.push_back(Elt: VE);
1566	}
1567	unsigned Sz = Ops.size();
1568	if (Sz == `1`)
1569	return Ops.back().Op;
1570	if (Sz == `0`) {
1571	assert(ConstOpnd.isZero());
1572	return ConstantInt::get(Ty, V: ConstOpnd);
1573	}
1574	}
1575
1576	return nullptr;
1577	}
1578
1579	/// Optimize a series of operands to an 'add' instruction. This
1580	/// optimizes based on identities. If it can be reduced to a single Value, it
1581	/// is returned, otherwise the Ops list is mutated as necessary.
1582	Value ReassociatePass::OptimizeAdd(Instruction I,
1583	SmallVectorImpl<ValueEntry> &Ops) {
1584	// Scan the operand lists looking for X and -X pairs. If we find any, we
1585	// can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1586	// scan for any
1587	// duplicates. We want to canonicalize Y+Y+Y+Z -> 3Y+Z.*
1588
1589	for (unsigned i = `0`, e = Ops.size(); i != e; ++i) {
1590	Value *TheOp = Ops [i].Op;
1591	// Check to see if we've seen this operand before. If so, we factor all
1592	// instances of the operand together. Due to our sorting criteria, we know
1593	// that these need to be next to each other in the vector.
1594	if (i+`1` != Ops.size() && Ops [i+`1`].Op == TheOp) {
1595	// Rescan the list, remove all instances of this operand from the expr.
1596	unsigned NumFound = `0`;
1597	do {
1598	Ops.erase(CI: Ops.begin()+i);
1599	++NumFound;
1600	} while (i != Ops.size() && Ops [i].Op == TheOp);
1601
1602	LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1603	<< `'\n'`);
1604	++NumFactor;
1605
1606	// Insert a new multiply.
1607	Type *Ty = TheOp->getType();
1608	Constant *C = Ty->isIntOrIntVectorTy() ?
1609	ConstantInt::get(Ty, V: NumFound) : ConstantFP::get(Ty, V: NumFound);
1610	Instruction *Mul = CreateMul(S1: TheOp, S2: C, Name: "factor", InsertBefore: I->getIterator(), FlagsOp: I);
1611
1612	// Now that we have inserted a multiply, optimize it. This allows us to
1613	// handle cases that require multiple factoring steps, such as this:
1614	// (X2) + (X2) + (X2) -> (X2)3 -> X6
1615	RedoInsts.insert(X: Mul);
1616
1617	// If every add operand was a duplicate, return the multiply.
1618	if (Ops.empty())
1619	return Mul;
1620
1621	// Otherwise, we had some input that didn't have the dupe, such as
1622	// "A + A + B" -> "A2 + B". Add the new multiply to the list of*
1623	// things being added by this operation.
1624	Ops.insert(I: Ops.begin(), Elt: ValueEntry (getRank(V: Mul), Mul));
1625
1626	--i;
1627	e = Ops.size();
1628	continue;
1629	}
1630
1631	// Check for X and -X or X and ~X in the operand list.
1632	Value *X;
1633	if (!match(V: TheOp, P: m_Neg(V: m_Value(V&: X))) && !match(V: TheOp, P: m_Not(V: m_Value(V&: X))) &&
1634	!match(V: TheOp, P: m_FNeg(X: m_Value(V&: X))))
1635	continue;
1636
1637	unsigned FoundX = FindInOperandList(Ops, i, X);
1638	if (FoundX == i)
1639	continue;
1640
1641	// Remove X and -X from the operand list.
1642	if (Ops.size() == `2` &&
1643	(match(V: TheOp, P: m_Neg(V: m_Value())) \|\| match(V: TheOp, P: m_FNeg(X: m_Value()))))
1644	return Constant::getNullValue(Ty: X->getType());
1645
1646	// Remove X and ~X from the operand list.
1647	if (Ops.size() == `2` && match(V: TheOp, P: m_Not(V: m_Value())))
1648	return Constant::getAllOnesValue(Ty: X->getType());
1649
1650	Ops.erase(CI: Ops.begin()+i);
1651	if (i < FoundX)
1652	--FoundX;
1653	else
1654	--i; // Need to back up an extra one.
1655	Ops.erase(CI: Ops.begin()+FoundX);
1656	++NumAnnihil;
1657	--i; // Revisit element.
1658	e -= `2`; // Removed two elements.
1659
1660	// if X and ~X we append -1 to the operand list.
1661	if (match(V: TheOp, P: m_Not(V: m_Value()))) {
1662	Value *V = Constant::getAllOnesValue(Ty: X->getType());
1663	Ops.insert(I: Ops.end(), Elt: ValueEntry (getRank(V), V));
1664	e += `1`;
1665	}
1666	}
1667
1668	// Scan the operand list, checking to see if there are any common factors
1669	// between operands. Consider something like AA+ABC+D. We would like to*
1670	// reassociate this to A(A+BC)+D, which reduces the number of multiplies.
1671	// To efficiently find this, we count the number of times a factor occurs
1672	// for any ADD operands that are MULs.
1673	DenseMap<Value, unsigned*> FactorOccurrences;
1674
1675	// Keep track of each multiply we see, to avoid triggering on (X4)+(X4)
1676	// where they are actually the same multiply.
1677	unsigned MaxOcc = `0`;
1678	Value MaxOccVal = nullptr*;
1679	for (unsigned i = `0`, e = Ops.size(); i != e; ++i) {
1680	BinaryOperator *BOp =
1681	isReassociableOp(V: Ops [i].Op, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1682	if (!BOp)
1683	continue;
1684
1685	// Compute all of the factors of this added value.
1686	SmallVector<Value*, `8`> Factors;
1687	FindSingleUseMultiplyFactors(V: BOp, Factors);
1688	assert(Factors.size() > `1` && "Bad linearize!");
1689
1690	// Add one to FactorOccurrences for each unique factor in this op.
1691	SmallPtrSet<Value*, `8`> Duplicates;
1692	for (Value *Factor : Factors) {
1693	if (!Duplicates.insert(Ptr: Factor).second)
1694	continue;
1695
1696	unsigned Occ = ++FactorOccurrences [Factor];
1697	if (Occ > MaxOcc) {
1698	MaxOcc = Occ;
1699	MaxOccVal = Factor;
1700	}
1701
1702	// If Factor is a negative constant, add the negated value as a factor
1703	// because we can percolate the negate out. Watch for minint, which
1704	// cannot be positivified.
1705	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Factor)) {
1706	if (CI->isNegative() && !CI->isMinValue(IsSigned: true)) {
1707	Factor = ConstantInt::get(Context&: CI->getContext(), V: -CI->getValue());
1708	if (!Duplicates.insert(Ptr: Factor).second)
1709	continue;
1710	unsigned Occ = ++FactorOccurrences [Factor];
1711	if (Occ > MaxOcc) {
1712	MaxOcc = Occ;
1713	MaxOccVal = Factor;
1714	}
1715	}
1716	} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: Factor)) {
1717	if (CF->isNegative()) {
1718	APFloat F(CF->getValueAPF());
1719	F.changeSign();
1720	Factor = ConstantFP::get(Context&: CF->getContext(), V: F);
1721	if (!Duplicates.insert(Ptr: Factor).second)
1722	continue;
1723	unsigned Occ = ++FactorOccurrences [Factor];
1724	if (Occ > MaxOcc) {
1725	MaxOcc = Occ;
1726	MaxOccVal = Factor;
1727	}
1728	}
1729	}
1730	}
1731	}
1732
1733	// If any factor occurred more than one time, we can pull it out.
1734	if (MaxOcc > `1`) {
1735	LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1736	<< `'\n'`);
1737	++NumFactor;
1738
1739	// Create a new instruction that uses the MaxOccVal twice. If we don't do
1740	// this, we could otherwise run into situations where removing a factor
1741	// from an expression will drop a use of maxocc, and this can cause
1742	// RemoveFactorFromExpression on successive values to behave differently.
1743	Instruction *DummyInst =
1744	I->getType()->isIntOrIntVectorTy()
1745	? BinaryOperator::CreateAdd(V1: MaxOccVal, V2: MaxOccVal)
1746	: BinaryOperator::CreateFAdd(V1: MaxOccVal, V2: MaxOccVal);
1747
1748	SmallVector<WeakTrackingVH, `4`> NewMulOps;
1749	for (unsigned i = `0`; i != Ops.size(); ++i) {
1750	// Only try to remove factors from expressions we're allowed to.
1751	BinaryOperator *BOp =
1752	isReassociableOp(V: Ops [i].Op, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1753	if (!BOp)
1754	continue;
1755
1756	if (Value *V = RemoveFactorFromExpression(V: Ops [i].Op, Factor: MaxOccVal)) {
1757	// The factorized operand may occur several times. Convert them all in
1758	// one fell swoop.
1759	for (unsigned j = Ops.size(); j != i;) {
1760	--j;
1761	if (Ops [j].Op == Ops [i].Op) {
1762	NewMulOps.push_back(Elt: V);
1763	Ops.erase(CI: Ops.begin()+j);
1764	}
1765	}
1766	--i;
1767	}
1768	}
1769
1770	// No need for extra uses anymore.
1771	DummyInst->deleteValue();
1772
1773	unsigned NumAddedValues = NewMulOps.size();
1774	Value *V = EmitAddTreeOfValues(It: I->getIterator(), Ops&: NewMulOps);
1775
1776	// Now that we have inserted the add tree, optimize it. This allows us to
1777	// handle cases that require multiple factoring steps, such as this:
1778	// AAB + AAC --> A(AB+AC) --> A(A(B+C))*
1779	assert(NumAddedValues > `1` && "Each occurrence should contribute a value");
1780	(void)NumAddedValues;
1781	if (Instruction *VI = dyn_cast<Instruction>(Val: V))
1782	RedoInsts.insert(X: VI);
1783
1784	// Create the multiply.
1785	Instruction *V2 = CreateMul(S1: V, S2: MaxOccVal, Name: "reass.mul", InsertBefore: I->getIterator(), FlagsOp: I);
1786
1787	// Rerun associate on the multiply in case the inner expression turned into
1788	// a multiply. We want to make sure that we keep things in canonical form.
1789	RedoInsts.insert(X: V2);
1790
1791	// If every add operand included the factor (e.g. "AB + AC"), then the
1792	// entire result expression is just the multiply "A(B+C)".*
1793	if (Ops.empty())
1794	return V2;
1795
1796	// Otherwise, we had some input that didn't have the factor, such as
1797	// "AB + AC + D" -> "A(B+C) + D". Add the new multiply to the list of*
1798	// things being added by this operation.
1799	Ops.insert(I: Ops.begin(), Elt: ValueEntry (getRank(V: V2), V2));
1800	}
1801
1802	return nullptr;
1803	}
1804
1805	/// Build up a vector of value/power pairs factoring a product.
1806	///
1807	/// Given a series of multiplication operands, build a vector of factors and
1808	/// the powers each is raised to when forming the final product. Sort them in
1809	/// the order of descending power.
1810	///
1811	/// (xx) -> [(x, 2)]*
1812	/// ((xx)x) -> [(x, 3)]
1813	/// ((((xy)x)y)x) -> [(x, 3), (y, 2)]
1814	///
1815	/// \returns Whether any factors have a power greater than one.
1816	static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
1817	SmallVectorImpl<Factor> &Factors) {
1818	// FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1819	// Compute the sum of powers of simplifiable factors.
1820	unsigned FactorPowerSum = `0`;
1821	for (unsigned Idx = `1`, Size = Ops.size(); Idx < Size; ++Idx) {
1822	Value *Op = Ops [Idx-`1`].Op;
1823
1824	// Count the number of occurrences of this value.
1825	unsigned Count = `1`;
1826	for (; Idx < Size && Ops [Idx].Op == Op; ++Idx)
1827	++Count;
1828	// Track for simplification all factors which occur 2 or more times.
1829	if (Count > `1`)
1830	FactorPowerSum += Count;
1831	}
1832
1833	// We can only simplify factors if the sum of the powers of our simplifiable
1834	// factors is 4 or higher. When that is the case, we will always* have*
1835	// a simplification. This is an important invariant to prevent cyclicly
1836	// trying to simplify already minimal formations.
1837	if (FactorPowerSum < `4`)
1838	return false;
1839
1840	// Now gather the simplifiable factors, removing them from Ops.
1841	FactorPowerSum = `0`;
1842	for (unsigned Idx = `1`; Idx < Ops.size(); ++Idx) {
1843	Value *Op = Ops [Idx-`1`].Op;
1844
1845	// Count the number of occurrences of this value.
1846	unsigned Count = `1`;
1847	for (; Idx < Ops.size() && Ops [Idx].Op == Op; ++Idx)
1848	++Count;
1849	if (Count == `1`)
1850	continue;
1851	// Move an even number of occurrences to Factors.
1852	Count &= ~`1U`;
1853	Idx -= Count;
1854	FactorPowerSum += Count;
1855	Factors.push_back(Elt: Factor (Op, Count));
1856	Ops.erase(CS: Ops.begin()+Idx, CE: Ops.begin()+Idx+Count);
1857	}
1858
1859	// None of the adjustments above should have reduced the sum of factor powers
1860	// below our mininum of '4'.
1861	assert(FactorPowerSum >= `4`);
1862
1863	llvm::stable_sort(Range&: Factors, C: [](const Factor &LHS, const Factor &RHS) {
1864	return LHS.Power > RHS.Power;
1865	});
1866	return true;
1867	}
1868
1869	/// Build a tree of multiplies, computing the product of Ops.
1870	static Value *buildMultiplyTree(IRBuilderBase &Builder,
1871	SmallVectorImpl<Value*> &Ops) {
1872	if (Ops.size() == `1`)
1873	return Ops.back();
1874
1875	Value *LHS = Ops.pop_back_val();
1876	do {
1877	if (LHS->getType()->isIntOrIntVectorTy())
1878	LHS = Builder.CreateMul(LHS, RHS: Ops.pop_back_val());
1879	else
1880	LHS = Builder.CreateFMul(L: LHS, R: Ops.pop_back_val());
1881	} while (!Ops.empty());
1882
1883	return LHS;
1884	}
1885
1886	/// Build a minimal multiplication DAG for (a^x)(b^y)(c^z)...*
1887	///
1888	/// Given a vector of values raised to various powers, where no two values are
1889	/// equal and the powers are sorted in decreasing order, compute the minimal
1890	/// DAG of multiplies to compute the final product, and return that product
1891	/// value.
1892	Value *
1893	ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1894	SmallVectorImpl<Factor> &Factors) {
1895	assert(Factors[`0`].Power);
1896	SmallVector<Value *, `4`> OuterProduct;
1897	for (unsigned LastIdx = `0`, Idx = `1`, Size = Factors.size();
1898	Idx < Size && Factors [Idx].Power > `0`; ++Idx) {
1899	if (Factors [Idx].Power != Factors [LastIdx].Power) {
1900	LastIdx = Idx;
1901	continue;
1902	}
1903
1904	// We want to multiply across all the factors with the same power so that
1905	// we can raise them to that power as a single entity. Build a mini tree
1906	// for that.
1907	SmallVector<Value *, `4`> InnerProduct;
1908	InnerProduct.push_back(Elt: Factors [LastIdx].Base);
1909	do {
1910	InnerProduct.push_back(Elt: Factors [Idx].Base);
1911	++Idx;
1912	} while (Idx < Size && Factors [Idx].Power == Factors [LastIdx].Power);
1913
1914	// Reset the base value of the first factor to the new expression tree.
1915	// We'll remove all the factors with the same power in a second pass.
1916	Value *M = Factors [LastIdx].Base = buildMultiplyTree(Builder, Ops&: InnerProduct);
1917	if (Instruction *MI = dyn_cast<Instruction>(Val: M))
1918	RedoInsts.insert(X: MI);
1919
1920	LastIdx = Idx;
1921	}
1922	// Unique factors with equal powers -- we've folded them into the first one's
1923	// base.
1924	Factors.erase(CS: std::unique(first: Factors.begin(), last: Factors.end(),
1925	binary_pred: [](const Factor &LHS, const Factor &RHS) {
1926	return LHS.Power == RHS.Power;
1927	}),
1928	CE: Factors.end());
1929
1930	// Iteratively collect the base of each factor with an add power into the
1931	// outer product, and halve each power in preparation for squaring the
1932	// expression.
1933	for (Factor &F : Factors) {
1934	if (F.Power & `1`)
1935	OuterProduct.push_back(Elt: F.Base);
1936	F.Power >>= `1`;
1937	}
1938	if (Factors [`0`].Power) {
1939	Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1940	OuterProduct.push_back(Elt: SquareRoot);
1941	OuterProduct.push_back(Elt: SquareRoot);
1942	}
1943	if (OuterProduct.size() == `1`)
1944	return OuterProduct.front();
1945
1946	Value *V = buildMultiplyTree(Builder, Ops&: OuterProduct);
1947	return V;
1948	}
1949
1950	Value ReassociatePass::OptimizeMul(BinaryOperator I,
1951	SmallVectorImpl<ValueEntry> &Ops) {
1952	// We can only optimize the multiplies when there is a chain of more than
1953	// three, such that a balanced tree might require fewer total multiplies.
1954	if (Ops.size() < `4`)
1955	return nullptr;
1956
1957	// Try to turn linear trees of multiplies without other uses of the
1958	// intermediate stages into minimal multiply DAGs with perfect sub-expression
1959	// re-use.
1960	SmallVector<Factor, `4`> Factors;
1961	if (!collectMultiplyFactors(Ops, Factors))
1962	return nullptr; // All distinct factors, so nothing left for us to do.
1963
1964	IRBuilder<> Builder(I);
1965	// The reassociate transformation for FP operations is performed only
1966	// if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1967	// to the newly generated operations.
1968	if (auto FPI = dyn_cast<FPMathOperator>(Val: I))
1969	Builder.setFastMathFlags(FPI->getFastMathFlags());
1970
1971	Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1972	if (Ops.empty())
1973	return V;
1974
1975	ValueEntry NewEntry = ValueEntry (getRank(V), V);
1976	Ops.insert(I: llvm::lower_bound(Range&: Ops, Value&: NewEntry), Elt: NewEntry);
1977	return nullptr;
1978	}
1979
1980	Value ReassociatePass::OptimizeExpression(BinaryOperator I,
1981	SmallVectorImpl<ValueEntry> &Ops) {
1982	// Now that we have the linearized expression tree, try to optimize it.
1983	// Start by folding any constants that we found.
1984	const DataLayout &DL = I->getModule()->getDataLayout();
1985	Constant Cst = nullptr*;
1986	unsigned Opcode = I->getOpcode();
1987	while (!Ops.empty()) {
1988	if (auto *C = dyn_cast<Constant>(Val: Ops.back().Op)) {
1989	if (!Cst) {
1990	Ops.pop_back();
1991	Cst = C;
1992	continue;
1993	}
1994	if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: C, RHS: Cst, DL)) {
1995	Ops.pop_back();
1996	Cst = Res;
1997	continue;
1998	}
1999	}
2000	break;
2001	}
2002	// If there was nothing but constants then we are done.
2003	if (Ops.empty())
2004	return Cst;
2005
2006	// Put the combined constant back at the end of the operand list, except if
2007	// there is no point. For example, an add of 0 gets dropped here, while a
2008	// multiplication by zero turns the whole expression into zero.
2009	if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, Ty: I->getType())) {
2010	if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, Ty: I->getType()))
2011	return Cst;
2012	Ops.push_back(Elt: ValueEntry (`0`, Cst));
2013	}
2014
2015	if (Ops.size() == `1`) return Ops [`0`].Op;
2016
2017	// Handle destructive annihilation due to identities between elements in the
2018	// argument list here.
2019	unsigned NumOps = Ops.size();
2020	switch (Opcode) {
2021	default: break;
2022	case Instruction::And:
2023	case Instruction::Or:
2024	if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
2025	return Result;
2026	break;
2027
2028	case Instruction::Xor:
2029	if (Value *Result = OptimizeXor(I, Ops))
2030	return Result;
2031	break;
2032
2033	case Instruction::Add:
2034	case Instruction::FAdd:
2035	if (Value *Result = OptimizeAdd(I, Ops))
2036	return Result;
2037	break;
2038
2039	case Instruction::Mul:
2040	case Instruction::FMul:
2041	if (Value *Result = OptimizeMul(I, Ops))
2042	return Result;
2043	break;
2044	}
2045
2046	if (Ops.size() != NumOps)
2047	return OptimizeExpression(I, Ops);
2048	return nullptr;
2049	}
2050
2051	// Remove dead instructions and if any operands are trivially dead add them to
2052	// Insts so they will be removed as well.
2053	void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2054	OrderedSet &Insts) {
2055	assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2056	SmallVector<Value *, `4`> Ops(I->operands());
2057	ValueRankMap.erase(Val: I);
2058	Insts.remove(X: I);
2059	RedoInsts.remove(X: I);
2060	llvm::salvageDebugInfo(I&: *I);
2061	I->eraseFromParent();
2062	for (auto *Op : Ops)
2063	if (Instruction *OpInst = dyn_cast<Instruction>(Val: Op))
2064	if (OpInst->use_empty())
2065	Insts.insert(X: OpInst);
2066	}
2067
2068	/// Zap the given instruction, adding interesting operands to the work list.
2069	void ReassociatePass::EraseInst(Instruction *I) {
2070	assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2071	LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2072
2073	SmallVector<Value *, `8`> Ops(I->operands());
2074	// Erase the dead instruction.
2075	ValueRankMap.erase(Val: I);
2076	RedoInsts.remove(X: I);
2077	llvm::salvageDebugInfo(I&: *I);
2078	I->eraseFromParent();
2079	// Optimize its operands.
2080	SmallPtrSet<Instruction , `8`> Visited; // Detect self-referential nodes.*
2081	for (unsigned i = `0`, e = Ops.size(); i != e; ++i)
2082	if (Instruction *Op = dyn_cast<Instruction>(Val: Ops [i])) {
2083	// If this is a node in an expression tree, climb to the expression root
2084	// and add that since that's where optimization actually happens.
2085	unsigned Opcode = Op->getOpcode();
2086	while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2087	Visited.insert(Ptr: Op).second)
2088	Op = Op->user_back();
2089
2090	// The instruction we're going to push may be coming from a
2091	// dead block, and Reassociate skips the processing of unreachable
2092	// blocks because it's a waste of time and also because it can
2093	// lead to infinite loop due to LLVM's non-standard definition
2094	// of dominance.
2095	if (ValueRankMap.contains(Val: Op))
2096	RedoInsts.insert(X: Op);
2097	}
2098
2099	MadeChange = true;
2100	}
2101
2102	/// Recursively analyze an expression to build a list of instructions that have
2103	/// negative floating-point constant operands. The caller can then transform
2104	/// the list to create positive constants for better reassociation and CSE.
2105	static void getNegatibleInsts(Value *V,
2106	SmallVectorImpl<Instruction *> &Candidates) {
2107	// Handle only one-use instructions. Combining negations does not justify
2108	// replicating instructions.
2109	Instruction *I;
2110	if (!match(V, P: m_OneUse(SubPattern: m_Instruction(I))))
2111	return;
2112
2113	// Handle expressions of multiplications and divisions.
2114	// TODO: This could look through floating-point casts.
2115	const APFloat *C;
2116	switch (I->getOpcode()) {
2117	case Instruction::FMul:
2118	// Not expecting non-canonical code here. Bail out and wait.
2119	if (match(V: I->getOperand(i: `0`), P: m_Constant()))
2120	break;
2121
2122	if (match(V: I->getOperand(i: `1`), P: m_APFloat(Res&: C)) && C->isNegative()) {
2123	Candidates.push_back(Elt: I);
2124	LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << `'\n'`);
2125	}
2126	getNegatibleInsts(V: I->getOperand(i: `0`), Candidates);
2127	getNegatibleInsts(V: I->getOperand(i: `1`), Candidates);
2128	break;
2129	case Instruction::FDiv:
2130	// Not expecting non-canonical code here. Bail out and wait.
2131	if (match(V: I->getOperand(i: `0`), P: m_Constant()) &&
2132	match(V: I->getOperand(i: `1`), P: m_Constant()))
2133	break;
2134
2135	if ((match(V: I->getOperand(i: `0`), P: m_APFloat(Res&: C)) && C->isNegative()) \|\|
2136	(match(V: I->getOperand(i: `1`), P: m_APFloat(Res&: C)) && C->isNegative())) {
2137	Candidates.push_back(Elt: I);
2138	LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << `'\n'`);
2139	}
2140	getNegatibleInsts(V: I->getOperand(i: `0`), Candidates);
2141	getNegatibleInsts(V: I->getOperand(i: `1`), Candidates);
2142	break;
2143	default:
2144	break;
2145	}
2146	}
2147
2148	/// Given an fadd/fsub with an operand that is a one-use instruction
2149	/// (the fadd/fsub), try to change negative floating-point constants into
2150	/// positive constants to increase potential for reassociation and CSE.
2151	Instruction ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction I,
2152	Instruction *Op,
2153	Value *OtherOp) {
2154	assert((I->getOpcode() == Instruction::FAdd \|\|
2155	I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2156
2157	// Collect instructions with negative FP constants from the subtree that ends
2158	// in Op.
2159	SmallVector<Instruction *, `4`> Candidates;
2160	getNegatibleInsts(V: Op, Candidates);
2161	if (Candidates.empty())
2162	return nullptr;
2163
2164	// Don't canonicalize x + (-Constant y) -> x - (Constant * y), if the*
2165	// resulting subtract will be broken up later. This can get us into an
2166	// infinite loop during reassociation.
2167	bool IsFSub = I->getOpcode() == Instruction::FSub;
2168	bool NeedsSubtract = !IsFSub && Candidates.size() % `2` == `1`;
2169	if (NeedsSubtract && ShouldBreakUpSubtract(Sub: I))
2170	return nullptr;
2171
2172	for (Instruction *Negatible : Candidates) {
2173	const APFloat *C;
2174	if (match(V: Negatible->getOperand(i: `0`), P: m_APFloat(Res&: C))) {
2175	assert(!match(Negatible->getOperand(`1`), m_Constant()) &&
2176	"Expecting only 1 constant operand");
2177	assert(C->isNegative() && "Expected negative FP constant");
2178	Negatible->setOperand(i: `0`, Val: ConstantFP::get(Ty: Negatible->getType(), V: abs(X: *C)));
2179	MadeChange = true;
2180	}
2181	if (match(V: Negatible->getOperand(i: `1`), P: m_APFloat(Res&: C))) {
2182	assert(!match(Negatible->getOperand(`0`), m_Constant()) &&
2183	"Expecting only 1 constant operand");
2184	assert(C->isNegative() && "Expected negative FP constant");
2185	Negatible->setOperand(i: `1`, Val: ConstantFP::get(Ty: Negatible->getType(), V: abs(X: *C)));
2186	MadeChange = true;
2187	}
2188	}
2189	assert(MadeChange == true && "Negative constant candidate was not changed");
2190
2191	// Negations cancelled out.
2192	if (Candidates.size() % `2` == `0`)
2193	return I;
2194
2195	// Negate the final operand in the expression by flipping the opcode of this
2196	// fadd/fsub.
2197	assert(Candidates.size() % `2` == `1` && "Expected odd number");
2198	IRBuilder<> Builder(I);
2199	Value *NewInst = IsFSub ? Builder.CreateFAddFMF(L: OtherOp, R: Op, FMFSource: I)
2200	: Builder.CreateFSubFMF(L: OtherOp, R: Op, FMFSource: I);
2201	I->replaceAllUsesWith(V: NewInst);
2202	RedoInsts.insert(X: I);
2203	return dyn_cast<Instruction>(Val: NewInst);
2204	}
2205
2206	/// Canonicalize expressions that contain a negative floating-point constant
2207	/// of the following form:
2208	/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2209	/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2210	/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2211	///
2212	/// The fadd/fsub opcode may be switched to allow folding a negation into the
2213	/// input instruction.
2214	Instruction ReassociatePass::canonicalizeNegFPConstants(Instruction I) {
2215	LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << `'\n'`);
2216	Value *X;
2217	Instruction *Op;
2218	if (match(V: I, P: m_FAdd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Instruction(I&: Op)))))
2219	if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, OtherOp: X))
2220	I = R;
2221	if (match(V: I, P: m_FAdd(L: m_OneUse(SubPattern: m_Instruction(I&: Op)), R: m_Value(V&: X))))
2222	if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, OtherOp: X))
2223	I = R;
2224	if (match(V: I, P: m_FSub(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Instruction(I&: Op)))))
2225	if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, OtherOp: X))
2226	I = R;
2227	return I;
2228	}
2229
2230	/// Inspect and optimize the given instruction. Note that erasing
2231	/// instructions is not allowed.
2232	void ReassociatePass::OptimizeInst(Instruction *I) {
2233	// Only consider operations that we understand.
2234	if (!isa<UnaryOperator>(Val: I) && !isa<BinaryOperator>(Val: I))
2235	return;
2236
2237	if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(Val: I->getOperand(i: `1`)))
2238	// If an operand of this shift is a reassociable multiply, or if the shift
2239	// is used by a reassociable multiply or add, turn into a multiply.
2240	if (isReassociableOp(V: I->getOperand(i: `0`), Opcode: Instruction::Mul) \|\|
2241	(I->hasOneUse() &&
2242	(isReassociableOp(V: I->user_back(), Opcode: Instruction::Mul) \|\|
2243	isReassociableOp(V: I->user_back(), Opcode: Instruction::Add)))) {
2244	Instruction *NI = ConvertShiftToMul(Shl: I);
2245	RedoInsts.insert(X: I);
2246	MadeChange = true;
2247	I = NI;
2248	}
2249
2250	// Commute binary operators, to canonicalize the order of their operands.
2251	// This can potentially expose more CSE opportunities, and makes writing other
2252	// transformations simpler.
2253	if (I->isCommutative())
2254	canonicalizeOperands(I);
2255
2256	// Canonicalize negative constants out of expressions.
2257	if (Instruction *Res = canonicalizeNegFPConstants(I))
2258	I = Res;
2259
2260	// Don't optimize floating-point instructions unless they have the
2261	// appropriate FastMathFlags for reassociation enabled.
2262	if (isa<FPMathOperator>(Val: I) && !hasFPAssociativeFlags(I))
2263	return;
2264
2265	// Do not reassociate boolean (i1) expressions. We want to preserve the
2266	// original order of evaluation for short-circuited comparisons that
2267	// SimplifyCFG has folded to AND/OR expressions. If the expression
2268	// is not further optimized, it is likely to be transformed back to a
2269	// short-circuited form for code gen, and the source order may have been
2270	// optimized for the most likely conditions.
2271	if (I->getType()->isIntegerTy(Bitwidth: `1`))
2272	return;
2273
2274	// If this is a bitwise or instruction of operands
2275	// with no common bits set, convert it to X+Y.
2276	if (I->getOpcode() == Instruction::Or &&
2277	shouldConvertOrWithNoCommonBitsToAdd(Or: I) && !isLoadCombineCandidate(Or: I) &&
2278	(cast<PossiblyDisjointInst>(Val: I)->isDisjoint() \|\|
2279	haveNoCommonBitsSet(LHSCache: I->getOperand(i: `0`), RHSCache: I->getOperand(i: `1`),
2280	SQ: SimplifyQuery (I->getModule()->getDataLayout(),
2281	/DT=/nullptr, /AC=/nullptr, I)))) {
2282	Instruction *NI = convertOrWithNoCommonBitsToAdd(Or: I);
2283	RedoInsts.insert(X: I);
2284	MadeChange = true;
2285	I = NI;
2286	}
2287
2288	// If this is a subtract instruction which is not already in negate form,
2289	// see if we can convert it to X+-Y.
2290	if (I->getOpcode() == Instruction::Sub) {
2291	if (ShouldBreakUpSubtract(Sub: I)) {
2292	Instruction *NI = BreakUpSubtract(Sub: I, ToRedo&: RedoInsts);
2293	RedoInsts.insert(X: I);
2294	MadeChange = true;
2295	I = NI;
2296	} else if (match(V: I, P: m_Neg(V: m_Value()))) {
2297	// Otherwise, this is a negation. See if the operand is a multiply tree
2298	// and if this is not an inner node of a multiply tree.
2299	if (isReassociableOp(V: I->getOperand(i: `1`), Opcode: Instruction::Mul) &&
2300	(!I->hasOneUse() \|\|
2301	!isReassociableOp(V: I->user_back(), Opcode: Instruction::Mul))) {
2302	Instruction *NI = LowerNegateToMultiply(Neg: I);
2303	// If the negate was simplified, revisit the users to see if we can
2304	// reassociate further.
2305	for (User *U : NI->users()) {
2306	if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(Val: U))
2307	RedoInsts.insert(X: Tmp);
2308	}
2309	RedoInsts.insert(X: I);
2310	MadeChange = true;
2311	I = NI;
2312	}
2313	}
2314	} else if (I->getOpcode() == Instruction::FNeg \|\|
2315	I->getOpcode() == Instruction::FSub) {
2316	if (ShouldBreakUpSubtract(Sub: I)) {
2317	Instruction *NI = BreakUpSubtract(Sub: I, ToRedo&: RedoInsts);
2318	RedoInsts.insert(X: I);
2319	MadeChange = true;
2320	I = NI;
2321	} else if (match(V: I, P: m_FNeg(X: m_Value()))) {
2322	// Otherwise, this is a negation. See if the operand is a multiply tree
2323	// and if this is not an inner node of a multiply tree.
2324	Value *Op = isa<BinaryOperator>(Val: I) ? I->getOperand(i: `1`) :
2325	I->getOperand(i: `0`);
2326	if (isReassociableOp(V: Op, Opcode: Instruction::FMul) &&
2327	(!I->hasOneUse() \|\|
2328	!isReassociableOp(V: I->user_back(), Opcode: Instruction::FMul))) {
2329	// If the negate was simplified, revisit the users to see if we can
2330	// reassociate further.
2331	Instruction *NI = LowerNegateToMultiply(Neg: I);
2332	for (User *U : NI->users()) {
2333	if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(Val: U))
2334	RedoInsts.insert(X: Tmp);
2335	}
2336	RedoInsts.insert(X: I);
2337	MadeChange = true;
2338	I = NI;
2339	}
2340	}
2341	}
2342
2343	// If this instruction is an associative binary operator, process it.
2344	if (!I->isAssociative()) return;
2345	BinaryOperator *BO = cast<BinaryOperator>(Val: I);
2346
2347	// If this is an interior node of a reassociable tree, ignore it until we
2348	// get to the root of the tree, to avoid N^2 analysis.
2349	unsigned Opcode = BO->getOpcode();
2350	if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2351	// During the initial run we will get to the root of the tree.
2352	// But if we get here while we are redoing instructions, there is no
2353	// guarantee that the root will be visited. So Redo later
2354	if (BO->user_back() != BO &&
2355	BO->getParent() == BO->user_back()->getParent())
2356	RedoInsts.insert(X: BO->user_back());
2357	return;
2358	}
2359
2360	// If this is an add tree that is used by a sub instruction, ignore it
2361	// until we process the subtract.
2362	if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2363	cast<Instruction>(Val: BO->user_back())->getOpcode() == Instruction::Sub)
2364	return;
2365	if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2366	cast<Instruction>(Val: BO->user_back())->getOpcode() == Instruction::FSub)
2367	return;
2368
2369	ReassociateExpression(I: BO);
2370	}
2371
2372	void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2373	// First, walk the expression tree, linearizing the tree, collecting the
2374	// operand information.
2375	SmallVector<RepeatedValue, `8`> Tree;
2376	bool HasNUW = true;
2377	MadeChange \|= LinearizeExprTree(I, Ops&: Tree, ToRedo&: RedoInsts, HasNUW);
2378	SmallVector<ValueEntry, `8`> Ops;
2379	Ops.reserve(N: Tree.size());
2380	for (const RepeatedValue &E : Tree)
2381	Ops.append(NumInputs: E.second.getZExtValue(), Elt: ValueEntry (getRank(V: E.first), E.first));
2382
2383	LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << `'\n'`);
2384
2385	// Now that we have linearized the tree to a list and have gathered all of
2386	// the operands and their ranks, sort the operands by their rank. Use a
2387	// stable_sort so that values with equal ranks will have their relative
2388	// positions maintained (and so the compiler is deterministic). Note that
2389	// this sorts so that the highest ranking values end up at the beginning of
2390	// the vector.
2391	llvm::stable_sort(Range&: Ops);
2392
2393	// Now that we have the expression tree in a convenient
2394	// sorted form, optimize it globally if possible.
2395	if (Value *V = OptimizeExpression(I, Ops)) {
2396	if (V == I)
2397	// Self-referential expression in unreachable code.
2398	return;
2399	// This expression tree simplified to something that isn't a tree,
2400	// eliminate it.
2401	LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << `'\n'`);
2402	I->replaceAllUsesWith(V);
2403	if (Instruction *VI = dyn_cast<Instruction>(Val: V))
2404	if (I->getDebugLoc())
2405	VI->setDebugLoc(I->getDebugLoc());
2406	RedoInsts.insert(X: I);
2407	++NumAnnihil;
2408	return;
2409	}
2410
2411	// We want to sink immediates as deeply as possible except in the case where
2412	// this is a multiply tree used only by an add, and the immediate is a -1.
2413	// In this case we reassociate to put the negation on the outside so that we
2414	// can fold the negation into the add: (-X)Y + Z -> Z-XY
2415	if (I->hasOneUse()) {
2416	if (I->getOpcode() == Instruction::Mul &&
2417	cast<Instruction>(Val: I->user_back())->getOpcode() == Instruction::Add &&
2418	isa<ConstantInt>(Val: Ops.back().Op) &&
2419	cast<ConstantInt>(Val: Ops.back().Op)->isMinusOne()) {
2420	ValueEntry Tmp = Ops.pop_back_val();
2421	Ops.insert(I: Ops.begin(), Elt: Tmp);
2422	} else if (I->getOpcode() == Instruction::FMul &&
2423	cast<Instruction>(Val: I->user_back())->getOpcode() ==
2424	Instruction::FAdd &&
2425	isa<ConstantFP>(Val: Ops.back().Op) &&
2426	cast<ConstantFP>(Val: Ops.back().Op)->isExactlyValue(V: -`1.0`)) {
2427	ValueEntry Tmp = Ops.pop_back_val();
2428	Ops.insert(I: Ops.begin(), Elt: Tmp);
2429	}
2430	}
2431
2432	LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << `'\n'`);
2433
2434	if (Ops.size() == `1`) {
2435	if (Ops [`0`].Op == I)
2436	// Self-referential expression in unreachable code.
2437	return;
2438
2439	// This expression tree simplified to something that isn't a tree,
2440	// eliminate it.
2441	I->replaceAllUsesWith(V: Ops [`0`].Op);
2442	if (Instruction *OI = dyn_cast<Instruction>(Val: Ops [`0`].Op))
2443	OI->setDebugLoc(I->getDebugLoc());
2444	RedoInsts.insert(X: I);
2445	return;
2446	}
2447
2448	if (Ops.size() > `2` && Ops.size() <= GlobalReassociateLimit) {
2449	// Find the pair with the highest count in the pairmap and move it to the
2450	// back of the list so that it can later be CSE'd.
2451	// example:
2452	// abcde
2453	// if ce is the most "popular" pair, we can express this as*
2454	// (((ce)d)b)a
2455	unsigned Max = `1`;
2456	unsigned BestRank = `0`;
2457	std::pair<unsigned, unsigned> BestPair;
2458	unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2459	unsigned LimitIdx = `0`;
2460	// With the CSE-driven heuristic, we are about to slap two values at the
2461	// beginning of the expression whereas they could live very late in the CFG.
2462	// When using the CSE-local heuristic we avoid creating dependences from
2463	// completely unrelated part of the CFG by limiting the expression
2464	// reordering on the values that live in the first seen basic block.
2465	// The main idea is that we want to avoid forming expressions that would
2466	// become loop dependent.
2467	if (UseCSELocalOpt) {
2468	const BasicBlock FirstSeenBB = nullptr*;
2469	int StartIdx = Ops.size() - `1`;
2470	// Skip the first value of the expression since we need at least two
2471	// values to materialize an expression. I.e., even if this value is
2472	// anchored in a different basic block, the actual first sub expression
2473	// will be anchored on the second value.
2474	for (int i = StartIdx - `1`; i != -`1`; --i) {
2475	const Value *Val = Ops [i].Op;
2476	const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2477	const BasicBlock SeenBB = nullptr*;
2478	if (!CurrLeafInstr) {
2479	// The value is free of any CFG dependencies.
2480	// Do as if it lives in the entry block.
2481	//
2482	// We do this to make sure all the values falling on this path are
2483	// seen through the same anchor point. The rationale is these values
2484	// can be combined together to from a sub expression free of any CFG
2485	// dependencies so we want them to stay together.
2486	// We could be cleverer and postpone the anchor down to the first
2487	// anchored value, but that's likely complicated to get right.
2488	// E.g., we wouldn't want to do that if that means being stuck in a
2489	// loop.
2490	//
2491	// For instance, we wouldn't want to change:
2492	// res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2493	// into
2494	// res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2495	// Because all the sub expressions with arg2..N would be stuck between
2496	// two loop dependent values.
2497	SeenBB = &I->getParent()->getParent()->getEntryBlock();
2498	} else {
2499	SeenBB = CurrLeafInstr->getParent();
2500	}
2501
2502	if (!FirstSeenBB) {
2503	FirstSeenBB = SeenBB;
2504	continue;
2505	}
2506	if (FirstSeenBB != SeenBB) {
2507	// ith value is in a different basic block.
2508	// Rewind the index once to point to the last value on the same basic
2509	// block.
2510	LimitIdx = i + `1`;
2511	LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2512	<< LimitIdx << ", " << StartIdx << "]\n");
2513	break;
2514	}
2515	}
2516	}
2517	for (unsigned i = Ops.size() - `1`; i > LimitIdx; --i) {
2518	// We must use int type to go below zero when LimitIdx is 0.
2519	for (int j = i - `1`; j >= (int)LimitIdx; --j) {
2520	unsigned Score = `0`;
2521	Value *Op0 = Ops [i].Op;
2522	Value *Op1 = Ops [j].Op;
2523	if (std::less<Value *>()(Op1, Op0))
2524	std::swap(a&: Op0, b&: Op1);
2525	auto it = PairMap[Idx].find(Val: {Op0, Op1});
2526	if (it != PairMap[Idx].end()) {
2527	// Functions like BreakUpSubtract() can erase the Values we're using
2528	// as keys and create new Values after we built the PairMap. There's a
2529	// small chance that the new nodes can have the same address as
2530	// something already in the table. We shouldn't accumulate the stored
2531	// score in that case as it refers to the wrong Value.
2532	if (it ->second.isValid())
2533	Score += it ->second.Score;
2534	}
2535
2536	unsigned MaxRank = std::max(a: Ops [i].Rank, b: Ops [j].Rank);
2537
2538	// By construction, the operands are sorted in reverse order of their
2539	// topological order.
2540	// So we tend to form (sub) expressions with values that are close to
2541	// each other.
2542	//
2543	// Now to expose more CSE opportunities we want to expose the pair of
2544	// operands that occur the most (as statically computed in
2545	// BuildPairMap.) as the first sub-expression.
2546	//
2547	// If two pairs occur as many times, we pick the one with the
2548	// lowest rank, meaning the one with both operands appearing first in
2549	// the topological order.
2550	if (Score > Max \|\| (Score == Max && MaxRank < BestRank)) {
2551	BestPair = {j, i};
2552	Max = Score;
2553	BestRank = MaxRank;
2554	}
2555	}
2556	}
2557	if (Max > `1`) {
2558	auto Op0 = Ops [BestPair.first];
2559	auto Op1 = Ops [BestPair.second];
2560	Ops.erase(CI: &Ops [BestPair.second]);
2561	Ops.erase(CI: &Ops [BestPair.first]);
2562	Ops.push_back(Elt: Op0);
2563	Ops.push_back(Elt: Op1);
2564	}
2565	}
2566	LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2567	dbgs() << `'\n'`);
2568	// Now that we ordered and optimized the expressions, splat them back into
2569	// the expression tree, removing any unneeded nodes.
2570	RewriteExprTree(I, Ops, HasNUW);
2571	}
2572
2573	void
2574	ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2575	// Make a "pairmap" of how often each operand pair occurs.
2576	for (BasicBlock *BI : RPOT) {
2577	for (Instruction &I : *BI) {
2578	if (!I.isAssociative() \|\| !I.isBinaryOp())
2579	continue;
2580
2581	// Ignore nodes that aren't at the root of trees.
2582	if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2583	continue;
2584
2585	// Collect all operands in a single reassociable expression.
2586	// Since Reassociate has already been run once, we can assume things
2587	// are already canonical according to Reassociation's regime.
2588	SmallVector<Value *, `8`> Worklist = { I.getOperand(i: `0`), I.getOperand(i: `1`) };
2589	SmallVector<Value *, `8`> Ops;
2590	while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2591	Value *Op = Worklist.pop_back_val();
2592	Instruction *OpI = dyn_cast<Instruction>(Val: Op);
2593	if (!OpI \|\| OpI->getOpcode() != I.getOpcode() \|\| !OpI->hasOneUse()) {
2594	Ops.push_back(Elt: Op);
2595	continue;
2596	}
2597	// Be paranoid about self-referencing expressions in unreachable code.
2598	if (OpI->getOperand(i: `0`) != OpI)
2599	Worklist.push_back(Elt: OpI->getOperand(i: `0`));
2600	if (OpI->getOperand(i: `1`) != OpI)
2601	Worklist.push_back(Elt: OpI->getOperand(i: `1`));
2602	}
2603	// Skip extremely long expressions.
2604	if (Ops.size() > GlobalReassociateLimit)
2605	continue;
2606
2607	// Add all pairwise combinations of operands to the pair map.
2608	unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2609	SmallSet<std::pair<Value , Value>, `32`> Visited;
2610	for (unsigned i = `0`; i < Ops.size() - `1`; ++i) {
2611	for (unsigned j = i + `1`; j < Ops.size(); ++j) {
2612	// Canonicalize operand orderings.
2613	Value *Op0 = Ops [i];
2614	Value *Op1 = Ops [j];
2615	if (std::less<Value *>()(Op1, Op0))
2616	std::swap(a&: Op0, b&: Op1);
2617	if (!Visited.insert(V: {Op0, Op1}).second)
2618	continue;
2619	auto res = PairMap[BinaryIdx].insert(KV: {{Op0, Op1}, {.Value1: Op0, .Value2: Op1, .Score: `1`}});
2620	if (!res.second) {
2621	// If either key value has been erased then we've got the same
2622	// address by coincidence. That can't happen here because nothing is
2623	// erasing values but it can happen by the time we're querying the
2624	// map.
2625	assert(res.first ->second.isValid() && "WeakVH invalidated");
2626	++res.first ->second.Score;
2627	}
2628	}
2629	}
2630	}
2631	}
2632	}
2633
2634	PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
2635	// Get the functions basic blocks in Reverse Post Order. This order is used by
2636	// BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2637	// blocks (it has been seen that the analysis in this pass could hang when
2638	// analysing dead basic blocks).
2639	ReversePostOrderTraversal<Function *> RPOT(&F);
2640
2641	// Calculate the rank map for F.
2642	BuildRankMap(F, RPOT);
2643
2644	// Build the pair map before running reassociate.
2645	// Technically this would be more accurate if we did it after one round
2646	// of reassociation, but in practice it doesn't seem to help much on
2647	// real-world code, so don't waste the compile time running reassociate
2648	// twice.
2649	// If a user wants, they could expicitly run reassociate twice in their
2650	// pass pipeline for further potential gains.
2651	// It might also be possible to update the pair map during runtime, but the
2652	// overhead of that may be large if there's many reassociable chains.
2653	BuildPairMap(RPOT);
2654
2655	MadeChange = false;
2656
2657	// Traverse the same blocks that were analysed by BuildRankMap.
2658	for (BasicBlock *BI : RPOT) {
2659	assert(RankMap.count(&*BI) && "BB should be ranked.");
2660	// Optimize every instruction in the basic block.
2661	for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2662	if (isInstructionTriviallyDead(I: &*II)) {
2663	EraseInst(I: &*II ++);
2664	} else {
2665	OptimizeInst(I: &*II);
2666	assert(II ->getParent() == &*BI && "Moved to a different block!");
2667	++II;
2668	}
2669
2670	// Make a copy of all the instructions to be redone so we can remove dead
2671	// instructions.
2672	OrderedSet ToRedo(RedoInsts);
2673	// Iterate over all instructions to be reevaluated and remove trivially dead
2674	// instructions. If any operand of the trivially dead instruction becomes
2675	// dead mark it for deletion as well. Continue this process until all
2676	// trivially dead instructions have been removed.
2677	while (!ToRedo.empty()) {
2678	Instruction *I = ToRedo.pop_back_val();
2679	if (isInstructionTriviallyDead(I)) {
2680	RecursivelyEraseDeadInsts(I, Insts&: ToRedo);
2681	MadeChange = true;
2682	}
2683	}
2684
2685	// Now that we have removed dead instructions, we can reoptimize the
2686	// remaining instructions.
2687	while (!RedoInsts.empty()) {
2688	Instruction *I = RedoInsts.front();
2689	RedoInsts.erase(I: RedoInsts.begin());
2690	if (isInstructionTriviallyDead(I))
2691	EraseInst(I);
2692	else
2693	OptimizeInst(I);
2694	}
2695	}
2696
2697	// We are done with the rank map and pair map.
2698	RankMap.clear();
2699	ValueRankMap.clear();
2700	for (auto &Entry : PairMap)
2701	Entry.clear();
2702
2703	if (MadeChange) {
2704	PreservedAnalyses PA;
2705	PA.preserveSet<CFGAnalyses>();
2706	return PA;
2707	}
2708
2709	return PreservedAnalyses::all();
2710	}
2711
2712	namespace {
2713
2714	class ReassociateLegacyPass : public FunctionPass {
2715	ReassociatePass Impl;
2716
2717	public:
2718	static char ID; // Pass identification, replacement for typeid
2719
2720	ReassociateLegacyPass() : FunctionPass (ID) {
2721	initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
2722	}
2723
2724	bool runOnFunction(Function &F) override {
2725	if (skipFunction(F))
2726	return false;
2727
2728	FunctionAnalysisManager DummyFAM;
2729	auto PA = Impl.run(F, DummyFAM);
2730	return !PA.areAllPreserved();
2731	}
2732
2733	void getAnalysisUsage(AnalysisUsage &AU) const override {
2734	AU.setPreservesCFG();
2735	AU.addPreserved<AAResultsWrapperPass>();
2736	AU.addPreserved<BasicAAWrapperPass>();
2737	AU.addPreserved<GlobalsAAWrapperPass>();
2738	}
2739	};
2740
2741	} // end anonymous namespace
2742
2743	char ReassociateLegacyPass::ID = `0`;
2744
2745	INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2746	"Reassociate expressions", false, false)
2747
2748	// Public interface to the Reassociate pass
2749	FunctionPass *llvm::createReassociatePass() {
2750	return new ReassociateLegacyPass ();
2751	}
2752

source code of llvm/lib/Transforms/Scalar/Reassociate.cpp