1//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a model runner using TFLite, allowing the
10// loading of a model from a command line option.
11//
12//===----------------------------------------------------------------------===//
13#include "llvm/Analysis/TensorSpec.h"
14#include "llvm/Config/config.h"
15#if defined(LLVM_HAVE_TFLITE)
16
17#include "llvm/ADT/BitVector.h"
18#include "llvm/Analysis/CallGraph.h"
19#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
20#include "llvm/Analysis/MLInlineAdvisor.h"
21#include "llvm/Analysis/ModelUnderTrainingRunner.h"
22#include "llvm/Analysis/NoInferenceModelRunner.h"
23#include "llvm/Analysis/Utils/TFUtils.h"
24#include "llvm/Analysis/Utils/TrainingLogger.h"
25#include "llvm/IR/LLVMContext.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/ManagedStatic.h"
28
29#include <vector>
30#include <optional>
31
32using namespace llvm;
33
34static cl::opt<std::string> TrainingLog(
35 "training-log", cl::Hidden,
36 cl::desc("Path where the development - mode inlining log is saved."));
37
38static cl::opt<std::string> TFModelUnderTrainingPath(
39 "ml-inliner-model-under-training", cl::Hidden,
40 cl::desc(R"(Path to SavedModel from the previous training iteration.
41The directory is also expected to contain a JSON specification of the
42outputs expected to be logged, where the first entry must be the
43inlining decision. The file containing the specification should be
44called output_spec.json. The expected JSON value is an array of
45dictionaries. Each dictionary should have 2 keys:
46
47- "tensor_spec, followed by the TensorSpec description of the
48output; and
49- "logging_name", a string indicating the name to use when
50logging the output values.
51
52Example:
53[
54 {
55 "logging_name" : "some_name",
56 "tensor_spec" : {
57 "name" : "model_name",
58 "port" : 0,
59 "shape" : [2, 3],
60 "type" : "float"
61 }
62 }
63]
64
65The first value must always correspond to the decision.)"));
66
67static cl::opt<std::string> TFOutputSpecOverride(
68 "ml-inliner-output-spec-override", cl::Hidden,
69 cl::desc("Override the path to the output spec json file. See "
70 "-ml-inliner-model-under-training documentation for the "
71 "specification of that file."));
72
73static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
74 cl::Hidden, cl::init("action_"),
75 cl::desc("Prefix for feature names."));
76
77namespace {
78/// An InlineEvent, used by TrainingLogger.
79struct InlineEvent {
80 /// What the default policy's decision would have been.
81 int64_t DefaultDecision = 0;
82
83 /// What we advised. When training off the default policy, this is the same as
84 /// DefaultDecision.
85 int64_t AdvisedDecision = 0;
86
87 /// What actually happened. This would be 'false' in the case of an inline
88 /// error, even if AdvisedDecision were true, otherwise it agrees with
89 /// AdvisedDecision.
90 bool Effect = false;
91
92 /// What the change in size was: size_after - size_before
93 int64_t Reward = 0;
94};
95
96/// Collect data we may use for training a model.
97class TrainingLogger final {
98public:
99 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
100
101 /// Log one inlining event.
102 void logInlineEvent(const InlineEvent &Event,
103 const MLModelRunner &ModelRunner);
104
105private:
106 StringRef LogFileName;
107 const ModelUnderTrainingRunner *const MUTR;
108 std::unique_ptr<Logger> L;
109 BitVector Effects;
110 /// Set these 2 clearly OOB, to make sure we set them later.
111 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
112 size_t DecisionPos = std::numeric_limits<size_t>::max();
113};
114
115/// An extension of the MLInlineAdvisor for the 'development' mode, targeting
116/// the offline training scenario. Note that training happens outside of the
117/// compiler, this facility is concerned with producing training data ("logs").
118/// This InlineAdvisor can operate in the following modes:
119///
120/// 1) collect logs for the default policy. This is useful for bootstrapping
121/// training, which will be considerably faster by starting from a reasonable
122/// policy.
123///
124/// 2) collect logs for the ML policy, using a model from a previous
125/// training. Potentially, that model uses internally some small random
126/// perturbation of its weights, to induce exploration (setting this up is the
127/// responsibility of the training algorithm). The logs would then be used to
128/// retrain and improve on this model.
129///
130/// 3) use the provided model, with no logging. This is useful for end to end
131/// validation - the model, in this case, is a release candidate and shouldn't
132/// have random perturbations. It is a convenience feature: rather than needing
133/// to take the release candidate model and compile it in 'release' mode,
134/// validate it, then potentially discard it, it's easier to just pass the model
135/// to the compiler, albeit compilation would be slower, as a one-off. Once the
136/// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
137/// release mode. The expectation is that a well-trained model provides a good
138/// policy over a sufficiently diverse codebase, over many changes (i.e.
139/// training happens seldom).
140class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
141public:
142 DevelopmentModeMLInlineAdvisor(
143 Module &M, ModuleAnalysisManager &MAM,
144 std::unique_ptr<MLModelRunner> ModelRunner,
145 std::function<bool(CallBase &)> GetDefaultAdvice,
146 std::unique_ptr<TrainingLogger> Logger);
147
148 size_t getTotalSizeEstimate();
149
150 void updateNativeSizeEstimate(int64_t Change) {
151 *CurrentNativeSize += Change;
152 }
153 void resetNativeSize(Function *F) {
154 PreservedAnalyses PA = PreservedAnalyses::all();
155 PA.abandon<InlineSizeEstimatorAnalysis>();
156 FAM.invalidate(*F, PA);
157 }
158
159 std::unique_ptr<MLInlineAdvice>
160 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
161
162 std::optional<size_t> getNativeSizeEstimate(const Function &F) const;
163
164private:
165 bool isLogging() const { return !!Logger; }
166 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
167
168 const bool IsDoingInference;
169 std::unique_ptr<TrainingLogger> Logger;
170
171 const std::optional<int32_t> InitialNativeSize;
172 std::optional<int32_t> CurrentNativeSize;
173};
174
175/// A variant of MLInlineAdvice that tracks all non-trivial inlining
176/// decisions, for training/logging.
177class LoggingMLInlineAdvice : public MLInlineAdvice {
178public:
179 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
180 OptimizationRemarkEmitter &ORE, bool Recommendation,
181 TrainingLogger &Logger,
182 std::optional<size_t> CallerSizeEstimateBefore,
183 std::optional<size_t> CalleeSizeEstimateBefore,
184 bool DefaultDecision, bool Mandatory = false)
185 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
186 CallerSizeEstimateBefore(CallerSizeEstimateBefore),
187 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
188 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
189
190 virtual ~LoggingMLInlineAdvice() = default;
191
192private:
193 DevelopmentModeMLInlineAdvisor *getAdvisor() const {
194 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
195 }
196 void recordInliningImpl() override {
197 MLInlineAdvice::recordInliningImpl();
198 getAdvisor()->resetNativeSize(Caller);
199 int Reward = std::numeric_limits<int>::max();
200 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
201 !getAdvisor()->isForcedToStop()) {
202 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
203 *CalleeSizeEstimateBefore;
204 Reward = NativeSizeAfter -
205 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
206 getAdvisor()->updateNativeSizeEstimate(Reward);
207 }
208 log(Reward, /*Success=*/true);
209 }
210
211 void recordInliningWithCalleeDeletedImpl() override {
212 MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
213 getAdvisor()->resetNativeSize(Caller);
214 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
215 !getAdvisor()->isForcedToStop()) {
216 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
217 int Reward = NativeSizeAfter -
218 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
219 getAdvisor()->updateNativeSizeEstimate(Reward);
220 log(Reward, /*Success=*/true);
221 } else {
222 log(NoReward, /*Success=*/true);
223 }
224 }
225
226 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
227 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
228 log(NoReward, /*Success=*/false);
229 }
230
231 void recordUnattemptedInliningImpl() override {
232 MLInlineAdvice::recordUnattemptedInliningImpl();
233 log(NoReward, /*Success=*/false);
234 }
235
236 void log(int64_t Reward, bool Success) {
237 if (Mandatory)
238 return;
239 InlineEvent Event;
240 Event.AdvisedDecision = isInliningRecommended();
241 Event.DefaultDecision = DefaultDecision;
242 Event.Effect = Success;
243 Event.Reward = Reward;
244 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
245 }
246
247 static const int64_t NoReward = 0;
248 TrainingLogger &Logger;
249 const std::optional<size_t> CallerSizeEstimateBefore;
250 const std::optional<size_t> CalleeSizeEstimateBefore;
251 const int64_t DefaultDecision;
252 const int64_t Mandatory;
253};
254
255static const std::vector<TensorSpec> TrainingOnlyFeatures{
256 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
257 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
258 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
259 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
260
261static const std::vector<TensorSpec> getInputFeatures() {
262 std::vector<TensorSpec> InputSpecs;
263 for (size_t I = 0; I < NumberOfFeatures; ++I)
264 InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
265 TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
266 append_range(InputSpecs, TrainingOnlyFeatures);
267 return InputSpecs;
268}
269
270} // namespace
271
272TrainingLogger::TrainingLogger(StringRef LogFileName,
273 const ModelUnderTrainingRunner *MUTR)
274 : LogFileName(LogFileName), MUTR(MUTR) {
275 // The first output is the inlining decision.
276 std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end());
277
278 if (MUTR)
279 append_range(FT, MUTR->extraOutputsForLoggingSpecs());
280
281 DefaultDecisionPos = FT.size();
282 FT.push_back(DefaultDecisionSpec);
283
284 DecisionPos = FT.size();
285 FT.push_back(InlineDecisionSpec);
286 std::error_code EC;
287 auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
288 if (EC)
289 dbgs() << (EC.message() + ":" + TrainingLog);
290
291 L = std::make_unique<Logger>(
292 std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
293 InlineSizeEstimatorAnalysis::isEvaluatorRequested());
294 L->switchContext("");
295}
296
297/// Log one inlining event.
298void TrainingLogger::logInlineEvent(const InlineEvent &Event,
299 const MLModelRunner &ModelRunner) {
300 L->startObservation();
301 size_t CurrentFeature = 0;
302 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature)
303 L->logTensorValue(CurrentFeature,
304 reinterpret_cast<const char *>(
305 ModelRunner.getTensorUntyped(CurrentFeature)));
306
307 if (MUTR)
308 for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
309 const char *RawData =
310 reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
311 L->logTensorValue(CurrentFeature, RawData);
312 ++CurrentFeature;
313 }
314
315 assert(CurrentFeature == DefaultDecisionPos);
316 L->logTensorValue(DefaultDecisionPos,
317 reinterpret_cast<const char *>(&Event.DefaultDecision));
318 L->logTensorValue(DecisionPos,
319 reinterpret_cast<const char *>(&Event.AdvisedDecision));
320 L->endObservation();
321 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
322 L->logReward(Event.Reward);
323
324 // For debugging / later use
325 Effects.push_back(Event.Effect);
326}
327
328DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
329 Module &M, ModuleAnalysisManager &MAM,
330 std::unique_ptr<MLModelRunner> ModelRunner,
331 std::function<bool(CallBase &)> GetDefaultAdvice,
332 std::unique_ptr<TrainingLogger> Logger)
333 : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice),
334 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
335 Logger(std::move(Logger)),
336 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
337 CurrentNativeSize(InitialNativeSize) {
338 // We cannot have the case of neither inference nor logging.
339 assert(IsDoingInference || isLogging());
340}
341
342std::optional<size_t>
343DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
344 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
345 return std::nullopt;
346 auto &R =
347 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
348 if (!R) {
349 F.getParent()->getContext().emitError(
350 "Native size estimator is not present.");
351 return 0;
352 }
353 return *R;
354}
355
356std::unique_ptr<MLInlineAdvice>
357DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
358 return std::make_unique<LoggingMLInlineAdvice>(
359 /*Advisor=*/this,
360 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
361 /*Logger=*/*Logger,
362 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
363 /*CalleeSizeEstimateBefore=*/
364 getNativeSizeEstimate(*CB.getCalledFunction()),
365 /*DefaultDecision=*/true, /*Mandatory*/ true);
366}
367
368std::unique_ptr<MLInlineAdvice>
369DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
370 CallBase &CB, OptimizationRemarkEmitter &ORE) {
371 if (IsDoingInference && !isLogging())
372 return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
373
374 bool DefaultAdvice = GetDefaultAdvice(CB);
375 auto Recommendation =
376 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
377 : DefaultAdvice;
378 return std::make_unique<LoggingMLInlineAdvice>(
379 /*Advisor=*/this,
380 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
381 /*Logger=*/*Logger,
382 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
383 /*CalleeSizeEstimateBefore=*/
384 getNativeSizeEstimate(*CB.getCalledFunction()),
385 /*DefaultDecision=*/DefaultAdvice);
386}
387
388size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
389 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
390 return 0;
391 size_t Ret = 0;
392 for (auto &F : M) {
393 if (F.isDeclaration())
394 continue;
395 Ret += *getNativeSizeEstimate(F);
396 }
397 return Ret;
398}
399
400std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
401 Module &M, ModuleAnalysisManager &MAM,
402 std::function<bool(CallBase &)> GetDefaultAdvice) {
403 auto &Ctx = M.getContext();
404 std::unique_ptr<MLModelRunner> Runner;
405 if (TFModelUnderTrainingPath.empty())
406 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
407 else
408 Runner = ModelUnderTrainingRunner::createAndEnsureValid(
409 Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
410 TFOutputSpecOverride);
411 if (!Runner)
412 return nullptr;
413 std::unique_ptr<TrainingLogger> Logger;
414 if (!TrainingLog.empty())
415 Logger = std::make_unique<TrainingLogger>(
416 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
417
418 return std::make_unique<DevelopmentModeMLInlineAdvisor>(
419 M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
420}
421#endif // defined(LLVM_HAVE_TFLITE)
422

source code of llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp