1 | //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements feature and label extraction for offline supervised learning |
10 | // of a IR to native size model. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" |
14 | |
15 | #ifdef LLVM_HAVE_TFLITE |
16 | #include "llvm/Analysis/Utils/TFUtils.h" |
17 | #endif |
18 | #include "llvm/IR/Function.h" |
19 | #include "llvm/IR/PassManager.h" |
20 | #include "llvm/Support/raw_ostream.h" |
21 | |
22 | using namespace llvm; |
23 | |
24 | AnalysisKey InlineSizeEstimatorAnalysis::Key; |
25 | |
26 | #ifdef LLVM_HAVE_TFLITE |
27 | #include "llvm/Analysis/LoopInfo.h" |
28 | #include "llvm/Analysis/TargetLibraryInfo.h" |
29 | #include "llvm/Analysis/TargetTransformInfo.h" |
30 | #include "llvm/IR/BasicBlock.h" |
31 | #include "llvm/IR/Dominators.h" |
32 | #include "llvm/IR/Instructions.h" |
33 | #include "llvm/MC/MCAsmLayout.h" |
34 | #include "llvm/Support/Casting.h" |
35 | #include "llvm/Support/CommandLine.h" |
36 | #include <algorithm> |
37 | #include <deque> |
38 | #include <optional> |
39 | |
40 | cl::opt<std::string> TFIR2NativeModelPath( |
41 | "ml-inliner-ir2native-model" , cl::Hidden, |
42 | cl::desc("Path to saved model evaluating native size from IR." )); |
43 | |
44 | #define DEBUG_TYPE "inline-size-estimator" |
45 | namespace { |
46 | unsigned getMaxInstructionID() { |
47 | #define LAST_OTHER_INST(NR) return NR; |
48 | #include "llvm/IR/Instruction.def" |
49 | } |
50 | |
51 | class IRToNativeSizeLearning { |
52 | public: |
53 | enum class NamedFeatureIndex : size_t { |
54 | InitialSize, |
55 | Blocks, |
56 | Calls, |
57 | IsLocal, |
58 | IsLinkOnceODR, |
59 | IsLinkOnce, |
60 | Loops, |
61 | MaxLoopDepth, |
62 | MaxDomTreeLevel, |
63 | |
64 | NumNamedFeatures |
65 | }; |
66 | static const size_t NumNamedFeatures = |
67 | static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); |
68 | struct FunctionFeatures { |
69 | static const size_t FeatureCount; |
70 | |
71 | std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; |
72 | std::vector<int32_t> InstructionHistogram; |
73 | std::vector<int32_t> InstructionPairHistogram; |
74 | |
75 | void fillTensor(int32_t *Ptr) const; |
76 | int32_t &operator[](NamedFeatureIndex Pos) { |
77 | return NamedFeatures[static_cast<size_t>(Pos)]; |
78 | } |
79 | }; |
80 | IRToNativeSizeLearning() = default; |
81 | |
82 | static FunctionFeatures getFunctionFeatures(Function &F, |
83 | FunctionAnalysisManager &FAM); |
84 | }; |
85 | |
86 | // This is a point in time - we determined including these pairs of |
87 | // consecutive instructions (in the IR layout available at inline time) as |
88 | // features improves the model performance. We want to move away from manual |
89 | // feature selection. |
90 | // The array is given in opcode pairs rather than labels because 1) labels |
91 | // weren't readily available, and 2) the successions were hand - extracted. |
92 | // |
93 | // This array must be sorted. |
94 | static const std::array<std::pair<size_t, size_t>, 137> |
95 | ImportantInstructionSuccessions{ |
96 | {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, |
97 | {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, |
98 | {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, |
99 | {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, |
100 | {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, |
101 | {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, |
102 | {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, |
103 | {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, |
104 | {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, |
105 | {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, |
106 | {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, |
107 | {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, |
108 | {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, |
109 | {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, |
110 | {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, |
111 | {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, |
112 | {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, |
113 | {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, |
114 | {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, |
115 | {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; |
116 | |
117 | // We have: 9 calculated features (the features here); 1 feature for each |
118 | // instruction opcode; and 1 feature for each manually-identified sequence. |
119 | // For the latter 2, we build a histogram: we count the number of |
120 | // occurrences of each instruction opcode or succession of instructions, |
121 | // respectively. |
122 | // Note that instruction opcodes start from 1. For convenience, we also have an |
123 | // always 0 feature for the '0' opcode, hence the extra 1. |
124 | const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = |
125 | ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + |
126 | IRToNativeSizeLearning::NumNamedFeatures; |
127 | |
128 | size_t getSize(Function &F, TargetTransformInfo &TTI) { |
129 | size_t Ret = 0; |
130 | for (const auto &BB : F) |
131 | for (const auto &I : BB) |
132 | Ret += *(TTI.getInstructionCost( |
133 | &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue()); |
134 | return Ret; |
135 | } |
136 | |
137 | size_t getSize(Function &F, FunctionAnalysisManager &FAM) { |
138 | auto &TTI = FAM.getResult<TargetIRAnalysis>(F); |
139 | return getSize(F, TTI); |
140 | } |
141 | |
142 | unsigned getMaxDominatorTreeDepth(const Function &F, |
143 | const DominatorTree &Tree) { |
144 | unsigned Ret = 0; |
145 | for (const auto &BB : F) |
146 | if (const auto *TN = Tree.getNode(&BB)) |
147 | Ret = std::max(Ret, TN->getLevel()); |
148 | return Ret; |
149 | } |
150 | } // namespace |
151 | |
152 | IRToNativeSizeLearning::FunctionFeatures |
153 | IRToNativeSizeLearning::getFunctionFeatures(Function &F, |
154 | FunctionAnalysisManager &FAM) { |
155 | assert(llvm::is_sorted(ImportantInstructionSuccessions) && |
156 | "expected function features are sorted" ); |
157 | |
158 | auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); |
159 | FunctionFeatures FF; |
160 | size_t InstrCount = getMaxInstructionID() + 1; |
161 | FF.InstructionHistogram.resize(InstrCount); |
162 | |
163 | FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); |
164 | |
165 | int StartID = 0; |
166 | int LastID = StartID; |
167 | auto getPairIndex = [](size_t a, size_t b) { |
168 | auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); |
169 | if (I == ImportantInstructionSuccessions.end()) |
170 | return -1; |
171 | return static_cast<int>( |
172 | std::distance(ImportantInstructionSuccessions.begin(), I)); |
173 | }; |
174 | |
175 | // We don't want debug calls, because they'd just add noise. |
176 | for (const auto &BB : F) { |
177 | for (const auto &I : BB.instructionsWithoutDebug()) { |
178 | auto ID = I.getOpcode(); |
179 | |
180 | ++FF.InstructionHistogram[ID]; |
181 | int PairIndex = getPairIndex(LastID, ID); |
182 | if (PairIndex >= 0) |
183 | ++FF.InstructionPairHistogram[PairIndex]; |
184 | LastID = ID; |
185 | if (isa<CallBase>(I)) |
186 | ++FF[NamedFeatureIndex::Calls]; |
187 | } |
188 | } |
189 | |
190 | FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); |
191 | FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); |
192 | FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); |
193 | FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); |
194 | FF[NamedFeatureIndex::Blocks] = F.size(); |
195 | auto &LI = FAM.getResult<LoopAnalysis>(F); |
196 | FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); |
197 | for (auto &L : LI) |
198 | FF[NamedFeatureIndex::MaxLoopDepth] = |
199 | std::max(FF[NamedFeatureIndex::MaxLoopDepth], |
200 | static_cast<int32_t>(L->getLoopDepth())); |
201 | FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); |
202 | return FF; |
203 | } |
204 | |
205 | void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { |
206 | std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); |
207 | Ptr += NamedFeatures.size(); |
208 | std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); |
209 | Ptr += InstructionHistogram.size(); |
210 | std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), |
211 | Ptr); |
212 | } |
213 | |
214 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { |
215 | return !TFIR2NativeModelPath.empty(); |
216 | } |
217 | |
218 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { |
219 | if (!isEvaluatorRequested()) { |
220 | return; |
221 | } |
222 | std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( |
223 | "serving_default_input_1" , |
224 | {1, static_cast<int64_t>( |
225 | IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; |
226 | std::vector<TensorSpec> OutputSpecs{ |
227 | TensorSpec::createSpec<float>("StatefulPartitionedCall" , {1})}; |
228 | Evaluator = std::make_unique<TFModelEvaluator>( |
229 | TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); |
230 | if (!Evaluator || !Evaluator->isValid()) { |
231 | Evaluator.reset(); |
232 | return; |
233 | } |
234 | } |
235 | |
236 | InlineSizeEstimatorAnalysis::Result |
237 | InlineSizeEstimatorAnalysis::run(const Function &F, |
238 | FunctionAnalysisManager &FAM) { |
239 | if (!Evaluator) |
240 | return std::nullopt; |
241 | auto Features = IRToNativeSizeLearning::getFunctionFeatures( |
242 | const_cast<Function &>(F), FAM); |
243 | int32_t *V = Evaluator->getInput<int32_t>(0); |
244 | Features.fillTensor(V); |
245 | auto ER = Evaluator->evaluate(); |
246 | if (!ER) |
247 | return std::nullopt; |
248 | float Ret = *ER->getTensorValue<float>(0); |
249 | if (Ret < 0.0) |
250 | Ret = 0.0; |
251 | return static_cast<size_t>(Ret); |
252 | } |
253 | |
254 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} |
255 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( |
256 | InlineSizeEstimatorAnalysis &&Other) |
257 | : Evaluator(std::move(Other.Evaluator)) {} |
258 | |
259 | #else |
260 | namespace llvm { |
261 | class TFModelEvaluator {}; |
262 | } // namespace llvm |
263 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default; |
264 | InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( |
265 | InlineSizeEstimatorAnalysis &&) {} |
266 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default; |
267 | InlineSizeEstimatorAnalysis::Result |
268 | InlineSizeEstimatorAnalysis::run(const Function &F, |
269 | FunctionAnalysisManager &FAM) { |
270 | return std::nullopt; |
271 | } |
272 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } |
273 | #endif |
274 | |
275 | PreservedAnalyses |
276 | InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, |
277 | FunctionAnalysisManager &AM) { |
278 | OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() |
279 | << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(IR&: F) << "\n" ; |
280 | return PreservedAnalyses::all(); |
281 | } |
282 | |