InlineSizeEstimatorAnalysis.cpp source code [llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp]

1	//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements feature and label extraction for offline supervised learning
10	// of a IR to native size model.
11	//
12	//===----------------------------------------------------------------------===//
13	#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
14
15	#ifdef LLVM_HAVE_TFLITE
16	#include "llvm/Analysis/Utils/TFUtils.h"
17	#endif
18	#include "llvm/IR/Function.h"
19	#include "llvm/IR/PassManager.h"
20	#include "llvm/Support/raw_ostream.h"
21
22	using namespace llvm;
23
24	AnalysisKey InlineSizeEstimatorAnalysis::Key;
25
26	#ifdef LLVM_HAVE_TFLITE
27	#include "llvm/Analysis/LoopInfo.h"
28	#include "llvm/Analysis/TargetLibraryInfo.h"
29	#include "llvm/Analysis/TargetTransformInfo.h"
30	#include "llvm/IR/BasicBlock.h"
31	#include "llvm/IR/Dominators.h"
32	#include "llvm/IR/Instructions.h"
33	#include "llvm/MC/MCAsmLayout.h"
34	#include "llvm/Support/Casting.h"
35	#include "llvm/Support/CommandLine.h"
36	#include <algorithm>
37	#include <deque>
38	#include <optional>
39
40	cl::opt<std::string> TFIR2NativeModelPath(
41	"ml-inliner-ir2native-model", cl::Hidden,
42	cl::desc("Path to saved model evaluating native size from IR."));
43
44	#define DEBUG_TYPE "inline-size-estimator"
45	namespace {
46	unsigned getMaxInstructionID() {
47	#define LAST_OTHER_INST(NR) return NR;
48	#include "llvm/IR/Instruction.def"
49	}
50
51	class IRToNativeSizeLearning {
52	public:
53	enum class NamedFeatureIndex : size_t {
54	InitialSize,
55	Blocks,
56	Calls,
57	IsLocal,
58	IsLinkOnceODR,
59	IsLinkOnce,
60	Loops,
61	MaxLoopDepth,
62	MaxDomTreeLevel,
63
64	NumNamedFeatures
65	};
66	static const size_t NumNamedFeatures =
67	static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
68	struct FunctionFeatures {
69	static const size_t FeatureCount;
70
71	std::array<int32_t, NumNamedFeatures> NamedFeatures = {`0`};
72	std::vector<int32_t> InstructionHistogram;
73	std::vector<int32_t> InstructionPairHistogram;
74
75	void fillTensor(int32_t Ptr) const*;
76	int32_t &operator[](NamedFeatureIndex Pos) {
77	return NamedFeatures[static_cast<size_t>(Pos)];
78	}
79	};
80	IRToNativeSizeLearning() = default;
81
82	static FunctionFeatures getFunctionFeatures(Function &F,
83	FunctionAnalysisManager &FAM);
84	};
85
86	// This is a point in time - we determined including these pairs of
87	// consecutive instructions (in the IR layout available at inline time) as
88	// features improves the model performance. We want to move away from manual
89	// feature selection.
90	// The array is given in opcode pairs rather than labels because 1) labels
91	// weren't readily available, and 2) the successions were hand - extracted.
92	//
93	// This array must be sorted.
94	static const std::array<std::pair<size_t, size_t>, `137`>
95	ImportantInstructionSuccessions{
96	{{`1`, `1`}, {`1`, `4`}, {`1`, `5`}, {`1`, `7`}, {`1`, `8`}, {`1`, `9`}, {`1`, `11`},
97	{`1`, `12`}, {`1`, `13`}, {`1`, `14`}, {`1`, `18`}, {`1`, `20`}, {`1`, `22`}, {`1`, `24`},
98	{`1`, `25`}, {`1`, `26`}, {`1`, `27`}, {`1`, `28`}, {`1`, `29`}, {`1`, `30`}, {`1`, `31`},
99	{`1`, `32`}, {`1`, `33`}, {`1`, `34`}, {`1`, `39`}, {`1`, `40`}, {`1`, `42`}, {`1`, `45`},
100	{`2`, `1`}, {`2`, `2`}, {`2`, `13`}, {`2`, `28`}, {`2`, `29`}, {`2`, `32`}, {`2`, `33`},
101	{`2`, `34`}, {`2`, `38`}, {`2`, `48`}, {`2`, `49`}, {`2`, `53`}, {`2`, `55`}, {`2`, `56`},
102	{`13`, `2`}, {`13`, `13`}, {`13`, `26`}, {`13`, `33`}, {`13`, `34`}, {`13`, `56`}, {`15`, `27`},
103	{`28`, `2`}, {`28`, `48`}, {`28`, `53`}, {`29`, `2`}, {`29`, `33`}, {`29`, `56`}, {`31`, `31`},
104	{`31`, `33`}, {`31`, `34`}, {`31`, `49`}, {`32`, `1`}, {`32`, `2`}, {`32`, `13`}, {`32`, `15`},
105	{`32`, `28`}, {`32`, `29`}, {`32`, `32`}, {`32`, `33`}, {`32`, `34`}, {`32`, `39`}, {`32`, `40`},
106	{`32`, `48`}, {`32`, `49`}, {`32`, `53`}, {`32`, `56`}, {`33`, `1`}, {`33`, `2`}, {`33`, `32`},
107	{`33`, `33`}, {`33`, `34`}, {`33`, `49`}, {`33`, `53`}, {`33`, `56`}, {`34`, `1`}, {`34`, `2`},
108	{`34`, `32`}, {`34`, `33`}, {`34`, `34`}, {`34`, `49`}, {`34`, `53`}, {`34`, `56`}, {`38`, `34`},
109	{`39`, `57`}, {`40`, `34`}, {`47`, `15`}, {`47`, `49`}, {`48`, `2`}, {`48`, `34`}, {`48`, `56`},
110	{`49`, `1`}, {`49`, `2`}, {`49`, `28`}, {`49`, `32`}, {`49`, `33`}, {`49`, `34`}, {`49`, `39`},
111	{`49`, `49`}, {`49`, `56`}, {`53`, `1`}, {`53`, `2`}, {`53`, `28`}, {`53`, `34`}, {`53`, `53`},
112	{`53`, `57`}, {`55`, `1`}, {`55`, `28`}, {`55`, `34`}, {`55`, `53`}, {`55`, `55`}, {`55`, `56`},
113	{`56`, `1`}, {`56`, `2`}, {`56`, `7`}, {`56`, `13`}, {`56`, `32`}, {`56`, `33`}, {`56`, `34`},
114	{`56`, `49`}, {`56`, `53`}, {`56`, `56`}, {`56`, `64`}, {`57`, `34`}, {`57`, `56`}, {`57`, `57`},
115	{`64`, `1`}, {`64`, `64`}, {`65`, `1`}, {`65`, `65`}}};
116
117	// We have: 9 calculated features (the features here); 1 feature for each
118	// instruction opcode; and 1 feature for each manually-identified sequence.
119	// For the latter 2, we build a histogram: we count the number of
120	// occurrences of each instruction opcode or succession of instructions,
121	// respectively.
122	// Note that instruction opcodes start from 1. For convenience, we also have an
123	// always 0 feature for the '0' opcode, hence the extra 1.
124	const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
125	ImportantInstructionSuccessions.size() + getMaxInstructionID() + `1` +
126	IRToNativeSizeLearning::NumNamedFeatures;
127
128	size_t getSize(Function &F, TargetTransformInfo &TTI) {
129	size_t Ret = `0`;
130	for (const auto &BB : F)
131	for (const auto &I : BB)
132	Ret += *(TTI.getInstructionCost(
133	&I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
134	return Ret;
135	}
136
137	size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
138	auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
139	return getSize(F, TTI);
140	}
141
142	unsigned getMaxDominatorTreeDepth(const Function &F,
143	const DominatorTree &Tree) {
144	unsigned Ret = `0`;
145	for (const auto &BB : F)
146	if (const auto *TN = Tree.getNode(&BB))
147	Ret = std::max(Ret, TN->getLevel());
148	return Ret;
149	}
150	} // namespace
151
152	IRToNativeSizeLearning::FunctionFeatures
153	IRToNativeSizeLearning::getFunctionFeatures(Function &F,
154	FunctionAnalysisManager &FAM) {
155	assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
156	"expected function features are sorted");
157
158	auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
159	FunctionFeatures FF;
160	size_t InstrCount = getMaxInstructionID() + `1`;
161	FF.InstructionHistogram.resize(InstrCount);
162
163	FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
164
165	int StartID = `0`;
166	int LastID = StartID;
167	auto getPairIndex = [](size_t a, size_t b) {
168	auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
169	if (I == ImportantInstructionSuccessions.end())
170	return -`1`;
171	return static_cast<int>(
172	std::distance(ImportantInstructionSuccessions.begin(), I));
173	};
174
175	// We don't want debug calls, because they'd just add noise.
176	for (const auto &BB : F) {
177	for (const auto &I : BB.instructionsWithoutDebug()) {
178	auto ID = I.getOpcode();
179
180	++FF.InstructionHistogram[ID];
181	int PairIndex = getPairIndex(LastID, ID);
182	if (PairIndex >= `0`)
183	++FF.InstructionPairHistogram[PairIndex];
184	LastID = ID;
185	if (isa<CallBase>(I))
186	++FF[NamedFeatureIndex::Calls];
187	}
188	}
189
190	FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
191	FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
192	FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
193	FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
194	FF[NamedFeatureIndex::Blocks] = F.size();
195	auto &LI = FAM.getResult<LoopAnalysis>(F);
196	FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
197	for (auto &L : LI)
198	FF[NamedFeatureIndex::MaxLoopDepth] =
199	std::max(FF[NamedFeatureIndex::MaxLoopDepth],
200	static_cast<int32_t>(L->getLoopDepth()));
201	FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
202	return FF;
203	}
204
205	void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t Ptr) const* {
206	std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
207	Ptr += NamedFeatures.size();
208	std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
209	Ptr += InstructionHistogram.size();
210	std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
211	Ptr);
212	}
213
214	bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
215	return !TFIR2NativeModelPath.empty();
216	}
217
218	InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
219	if (!isEvaluatorRequested()) {
220	return;
221	}
222	std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
223	"serving_default_input_1",
224	{`1`, static_cast<int64_t>(
225	IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
226	std::vector<TensorSpec> OutputSpecs{
227	TensorSpec::createSpec<float>("StatefulPartitionedCall", {`1`})};
228	Evaluator = std::make_unique<TFModelEvaluator>(
229	TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
230	if (!Evaluator \|\| !Evaluator->isValid()) {
231	Evaluator.reset();
232	return;
233	}
234	}
235
236	InlineSizeEstimatorAnalysis::Result
237	InlineSizeEstimatorAnalysis::run(const Function &F,
238	FunctionAnalysisManager &FAM) {
239	if (!Evaluator)
240	return std::nullopt;
241	auto Features = IRToNativeSizeLearning::getFunctionFeatures(
242	const_cast<Function &>(F), FAM);
243	int32_t *V = Evaluator->getInput<int32_t>(`0`);
244	Features.fillTensor(V);
245	auto ER = Evaluator->evaluate();
246	if (!ER)
247	return std::nullopt;
248	float Ret = ER->getTensorValue<float*>(`0`);
249	if (Ret < `0.0`)
250	Ret = `0.0`;
251	return static_cast<size_t>(Ret);
252	}
253
254	InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
255	InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
256	InlineSizeEstimatorAnalysis &&Other)
257	: Evaluator(std::move(Other.Evaluator)) {}
258
259	#else
260	namespace llvm {
261	class TFModelEvaluator {};
262	} // namespace llvm
263	InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;
264	InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
265	InlineSizeEstimatorAnalysis &&) {}
266	InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default;
267	InlineSizeEstimatorAnalysis::Result
268	InlineSizeEstimatorAnalysis::run(const Function &F,
269	FunctionAnalysisManager &FAM) {
270	return std::nullopt;
271	}
272	bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
273	#endif
274
275	PreservedAnalyses
276	InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
277	FunctionAnalysisManager &AM) {
278	OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
279	<< ": " << AM.getResult<InlineSizeEstimatorAnalysis>(IR&: F) << "\n";
280	return PreservedAnalyses::all();
281	}
282

source code of llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp