1 | //===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass reassociates n-ary add expressions and eliminates the redundancy |
10 | // exposed by the reassociation. |
11 | // |
12 | // A motivating example: |
13 | // |
14 | // void foo(int a, int b) { |
15 | // bar(a + b); |
16 | // bar((a + 2) + b); |
17 | // } |
18 | // |
19 | // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify |
20 | // the above code to |
21 | // |
22 | // int t = a + b; |
23 | // bar(t); |
24 | // bar(t + 2); |
25 | // |
26 | // However, the Reassociate pass is unable to do that because it processes each |
27 | // instruction individually and believes (a + 2) + b is the best form according |
28 | // to its rank system. |
29 | // |
30 | // To address this limitation, NaryReassociate reassociates an expression in a |
31 | // form that reuses existing instructions. As a result, NaryReassociate can |
32 | // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that |
33 | // (a + b) is computed before. |
34 | // |
35 | // NaryReassociate works as follows. For every instruction in the form of (a + |
36 | // b) + c, it checks whether a + c or b + c is already computed by a dominating |
37 | // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + |
38 | // c) + a and removes the redundancy accordingly. To efficiently look up whether |
39 | // an expression is computed before, we store each instruction seen and its SCEV |
40 | // into an SCEV-to-instruction map. |
41 | // |
42 | // Although the algorithm pattern-matches only ternary additions, it |
43 | // automatically handles many >3-ary expressions by walking through the function |
44 | // in the depth-first order. For example, given |
45 | // |
46 | // (a + c) + d |
47 | // ((a + b) + c) + d |
48 | // |
49 | // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites |
50 | // ((a + c) + b) + d into ((a + c) + d) + b. |
51 | // |
52 | // Finally, the above dominator-based algorithm may need to be run multiple |
53 | // iterations before emitting optimal code. One source of this need is that we |
54 | // only split an operand when it is used only once. The above algorithm can |
55 | // eliminate an instruction and decrease the usage count of its operands. As a |
56 | // result, an instruction that previously had multiple uses may become a |
57 | // single-use instruction and thus eligible for split consideration. For |
58 | // example, |
59 | // |
60 | // ac = a + c |
61 | // ab = a + b |
62 | // abc = ab + c |
63 | // ab2 = ab + b |
64 | // ab2c = ab2 + c |
65 | // |
66 | // In the first iteration, we cannot reassociate abc to ac+b because ab is used |
67 | // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a |
68 | // result, ab2 becomes dead and ab will be used only once in the second |
69 | // iteration. |
70 | // |
71 | // Limitations and TODO items: |
72 | // |
73 | // 1) We only considers n-ary adds and muls for now. This should be extended |
74 | // and generalized. |
75 | // |
76 | //===----------------------------------------------------------------------===// |
77 | |
78 | #ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H |
79 | #define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H |
80 | |
81 | #include "llvm/ADT/DenseMap.h" |
82 | #include "llvm/ADT/SmallVector.h" |
83 | #include "llvm/IR/PassManager.h" |
84 | #include "llvm/IR/ValueHandle.h" |
85 | |
86 | namespace llvm { |
87 | |
88 | class AssumptionCache; |
89 | class BinaryOperator; |
90 | class DataLayout; |
91 | class DominatorTree; |
92 | class Function; |
93 | class GetElementPtrInst; |
94 | class Instruction; |
95 | class ScalarEvolution; |
96 | class SCEV; |
97 | class TargetLibraryInfo; |
98 | class TargetTransformInfo; |
99 | class Type; |
100 | class Value; |
101 | |
102 | class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> { |
103 | public: |
104 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
105 | |
106 | // Glue for old PM. |
107 | bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, |
108 | ScalarEvolution *SE_, TargetLibraryInfo *TLI_, |
109 | TargetTransformInfo *TTI_); |
110 | |
111 | private: |
112 | // Runs only one iteration of the dominator-based algorithm. See the header |
113 | // comments for why we need multiple iterations. |
114 | bool doOneIteration(Function &F); |
115 | |
116 | // Reassociates I for better CSE. |
117 | Instruction *tryReassociate(Instruction *I, const SCEV *&OrigSCEV); |
118 | |
119 | // Reassociate GEP for better CSE. |
120 | Instruction *tryReassociateGEP(GetElementPtrInst *GEP); |
121 | |
122 | // Try splitting GEP at the I-th index and see whether either part can be |
123 | // CSE'ed. This is a helper function for tryReassociateGEP. |
124 | // |
125 | // \p IndexedType The element type indexed by GEP's I-th index. This is |
126 | // equivalent to |
127 | // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, |
128 | // ..., i-th index). |
129 | GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, |
130 | unsigned I, Type *IndexedType); |
131 | |
132 | // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or |
133 | // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. |
134 | GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, |
135 | unsigned I, Value *LHS, |
136 | Value *RHS, Type *IndexedType); |
137 | |
138 | // Reassociate binary operators for better CSE. |
139 | Instruction *tryReassociateBinaryOp(BinaryOperator *I); |
140 | |
141 | // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly |
142 | // passed. |
143 | Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, |
144 | BinaryOperator *I); |
145 | // Rewrites I to (LHS op RHS) if LHS is computed already. |
146 | Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, |
147 | BinaryOperator *I); |
148 | |
149 | // Tries to match Op1 and Op2 by using V. |
150 | bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); |
151 | |
152 | // Gets SCEV for (LHS op RHS). |
153 | const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, |
154 | const SCEV *RHS); |
155 | |
156 | // Returns the closest dominator of \c Dominatee that computes |
157 | // \c CandidateExpr. Returns null if not found. |
158 | Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, |
159 | Instruction *Dominatee); |
160 | |
161 | // Try to match \p I as signed/unsigned Min/Max and reassociate it. \p |
162 | // OrigSCEV is set if \I matches Min/Max regardless whether resassociation is |
163 | // done or not. If reassociation was successful newly generated instruction is |
164 | // returned, otherwise nullptr. |
165 | template <typename PredT> |
166 | Instruction *matchAndReassociateMinOrMax(Instruction *I, |
167 | const SCEV *&OrigSCEV); |
168 | |
169 | // Reassociate Min/Max. |
170 | template <typename MaxMinT> |
171 | Value *tryReassociateMinOrMax(Instruction *I, MaxMinT MaxMinMatch, Value *LHS, |
172 | Value *RHS); |
173 | |
174 | // GetElementPtrInst implicitly sign-extends an index if the index is shorter |
175 | // than the pointer size. This function returns whether Index is shorter than |
176 | // GEP's pointer size, i.e., whether Index needs to be sign-extended in order |
177 | // to be an index of GEP. |
178 | bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); |
179 | |
180 | AssumptionCache *AC; |
181 | const DataLayout *DL; |
182 | DominatorTree *DT; |
183 | ScalarEvolution *SE; |
184 | TargetLibraryInfo *TLI; |
185 | TargetTransformInfo *TTI; |
186 | |
187 | // A lookup table quickly telling which instructions compute the given SCEV. |
188 | // Note that there can be multiple instructions at different locations |
189 | // computing to the same SCEV, so we map a SCEV to an instruction list. For |
190 | // example, |
191 | // |
192 | // if (p1) |
193 | // foo(a + b); |
194 | // if (p2) |
195 | // bar(a + b); |
196 | DenseMap<const SCEV *, SmallVector<WeakTrackingVH, 2>> SeenExprs; |
197 | }; |
198 | |
199 | } // end namespace llvm |
200 | |
201 | #endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H |
202 | |