1 | //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a model runner using TFLite, allowing the |
10 | // loading of a model from a command line option. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/Analysis/TensorSpec.h" |
14 | #include "llvm/Config/config.h" |
15 | #if defined(LLVM_HAVE_TFLITE) |
16 | |
17 | #include "llvm/ADT/BitVector.h" |
18 | #include "llvm/Analysis/CallGraph.h" |
19 | #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" |
20 | #include "llvm/Analysis/MLInlineAdvisor.h" |
21 | #include "llvm/Analysis/ModelUnderTrainingRunner.h" |
22 | #include "llvm/Analysis/NoInferenceModelRunner.h" |
23 | #include "llvm/Analysis/Utils/TFUtils.h" |
24 | #include "llvm/Analysis/Utils/TrainingLogger.h" |
25 | #include "llvm/IR/LLVMContext.h" |
26 | #include "llvm/Support/CommandLine.h" |
27 | #include "llvm/Support/ManagedStatic.h" |
28 | |
29 | #include <vector> |
30 | #include <optional> |
31 | |
32 | using namespace llvm; |
33 | |
34 | static cl::opt<std::string> TrainingLog( |
35 | "training-log" , cl::Hidden, |
36 | cl::desc("Path where the development - mode inlining log is saved." )); |
37 | |
38 | static cl::opt<std::string> TFModelUnderTrainingPath( |
39 | "ml-inliner-model-under-training" , cl::Hidden, |
40 | cl::desc(R"(Path to SavedModel from the previous training iteration. |
41 | The directory is also expected to contain a JSON specification of the |
42 | outputs expected to be logged, where the first entry must be the |
43 | inlining decision. The file containing the specification should be |
44 | called output_spec.json. The expected JSON value is an array of |
45 | dictionaries. Each dictionary should have 2 keys: |
46 | |
47 | - "tensor_spec, followed by the TensorSpec description of the |
48 | output; and |
49 | - "logging_name", a string indicating the name to use when |
50 | logging the output values. |
51 | |
52 | Example: |
53 | [ |
54 | { |
55 | "logging_name" : "some_name", |
56 | "tensor_spec" : { |
57 | "name" : "model_name", |
58 | "port" : 0, |
59 | "shape" : [2, 3], |
60 | "type" : "float" |
61 | } |
62 | } |
63 | ] |
64 | |
65 | The first value must always correspond to the decision.)" )); |
66 | |
67 | static cl::opt<std::string> TFOutputSpecOverride( |
68 | "ml-inliner-output-spec-override" , cl::Hidden, |
69 | cl::desc("Override the path to the output spec json file. See " |
70 | "-ml-inliner-model-under-training documentation for the " |
71 | "specification of that file." )); |
72 | |
73 | static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix" , |
74 | cl::Hidden, cl::init("action_" ), |
75 | cl::desc("Prefix for feature names." )); |
76 | |
77 | namespace { |
78 | /// An InlineEvent, used by TrainingLogger. |
79 | struct InlineEvent { |
80 | /// What the default policy's decision would have been. |
81 | int64_t DefaultDecision = 0; |
82 | |
83 | /// What we advised. When training off the default policy, this is the same as |
84 | /// DefaultDecision. |
85 | int64_t AdvisedDecision = 0; |
86 | |
87 | /// What actually happened. This would be 'false' in the case of an inline |
88 | /// error, even if AdvisedDecision were true, otherwise it agrees with |
89 | /// AdvisedDecision. |
90 | bool Effect = false; |
91 | |
92 | /// What the change in size was: size_after - size_before |
93 | int64_t Reward = 0; |
94 | }; |
95 | |
96 | /// Collect data we may use for training a model. |
97 | class TrainingLogger final { |
98 | public: |
99 | TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); |
100 | |
101 | /// Log one inlining event. |
102 | void logInlineEvent(const InlineEvent &Event, |
103 | const MLModelRunner &ModelRunner); |
104 | |
105 | private: |
106 | StringRef LogFileName; |
107 | const ModelUnderTrainingRunner *const MUTR; |
108 | std::unique_ptr<Logger> L; |
109 | BitVector Effects; |
110 | /// Set these 2 clearly OOB, to make sure we set them later. |
111 | size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); |
112 | size_t DecisionPos = std::numeric_limits<size_t>::max(); |
113 | }; |
114 | |
115 | /// An extension of the MLInlineAdvisor for the 'development' mode, targeting |
116 | /// the offline training scenario. Note that training happens outside of the |
117 | /// compiler, this facility is concerned with producing training data ("logs"). |
118 | /// This InlineAdvisor can operate in the following modes: |
119 | /// |
120 | /// 1) collect logs for the default policy. This is useful for bootstrapping |
121 | /// training, which will be considerably faster by starting from a reasonable |
122 | /// policy. |
123 | /// |
124 | /// 2) collect logs for the ML policy, using a model from a previous |
125 | /// training. Potentially, that model uses internally some small random |
126 | /// perturbation of its weights, to induce exploration (setting this up is the |
127 | /// responsibility of the training algorithm). The logs would then be used to |
128 | /// retrain and improve on this model. |
129 | /// |
130 | /// 3) use the provided model, with no logging. This is useful for end to end |
131 | /// validation - the model, in this case, is a release candidate and shouldn't |
132 | /// have random perturbations. It is a convenience feature: rather than needing |
133 | /// to take the release candidate model and compile it in 'release' mode, |
134 | /// validate it, then potentially discard it, it's easier to just pass the model |
135 | /// to the compiler, albeit compilation would be slower, as a one-off. Once the |
136 | /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in |
137 | /// release mode. The expectation is that a well-trained model provides a good |
138 | /// policy over a sufficiently diverse codebase, over many changes (i.e. |
139 | /// training happens seldom). |
140 | class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { |
141 | public: |
142 | DevelopmentModeMLInlineAdvisor( |
143 | Module &M, ModuleAnalysisManager &MAM, |
144 | std::unique_ptr<MLModelRunner> ModelRunner, |
145 | std::function<bool(CallBase &)> GetDefaultAdvice, |
146 | std::unique_ptr<TrainingLogger> Logger); |
147 | |
148 | size_t getTotalSizeEstimate(); |
149 | |
150 | void updateNativeSizeEstimate(int64_t Change) { |
151 | *CurrentNativeSize += Change; |
152 | } |
153 | void resetNativeSize(Function *F) { |
154 | PreservedAnalyses PA = PreservedAnalyses::all(); |
155 | PA.abandon<InlineSizeEstimatorAnalysis>(); |
156 | FAM.invalidate(*F, PA); |
157 | } |
158 | |
159 | std::unique_ptr<MLInlineAdvice> |
160 | getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; |
161 | |
162 | std::optional<size_t> getNativeSizeEstimate(const Function &F) const; |
163 | |
164 | private: |
165 | bool isLogging() const { return !!Logger; } |
166 | std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; |
167 | |
168 | const bool IsDoingInference; |
169 | std::unique_ptr<TrainingLogger> Logger; |
170 | |
171 | const std::optional<int32_t> InitialNativeSize; |
172 | std::optional<int32_t> CurrentNativeSize; |
173 | }; |
174 | |
175 | /// A variant of MLInlineAdvice that tracks all non-trivial inlining |
176 | /// decisions, for training/logging. |
177 | class LoggingMLInlineAdvice : public MLInlineAdvice { |
178 | public: |
179 | LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, |
180 | OptimizationRemarkEmitter &ORE, bool Recommendation, |
181 | TrainingLogger &Logger, |
182 | std::optional<size_t> CallerSizeEstimateBefore, |
183 | std::optional<size_t> CalleeSizeEstimateBefore, |
184 | bool DefaultDecision, bool Mandatory = false) |
185 | : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), |
186 | CallerSizeEstimateBefore(CallerSizeEstimateBefore), |
187 | CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), |
188 | DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} |
189 | |
190 | virtual ~LoggingMLInlineAdvice() = default; |
191 | |
192 | private: |
193 | DevelopmentModeMLInlineAdvisor *getAdvisor() const { |
194 | return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); |
195 | } |
196 | void recordInliningImpl() override { |
197 | MLInlineAdvice::recordInliningImpl(); |
198 | getAdvisor()->resetNativeSize(Caller); |
199 | int Reward = std::numeric_limits<int>::max(); |
200 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && |
201 | !getAdvisor()->isForcedToStop()) { |
202 | int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + |
203 | *CalleeSizeEstimateBefore; |
204 | Reward = NativeSizeAfter - |
205 | (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); |
206 | getAdvisor()->updateNativeSizeEstimate(Reward); |
207 | } |
208 | log(Reward, /*Success=*/true); |
209 | } |
210 | |
211 | void recordInliningWithCalleeDeletedImpl() override { |
212 | MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); |
213 | getAdvisor()->resetNativeSize(Caller); |
214 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && |
215 | !getAdvisor()->isForcedToStop()) { |
216 | int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); |
217 | int Reward = NativeSizeAfter - |
218 | (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); |
219 | getAdvisor()->updateNativeSizeEstimate(Reward); |
220 | log(Reward, /*Success=*/true); |
221 | } else { |
222 | log(NoReward, /*Success=*/true); |
223 | } |
224 | } |
225 | |
226 | void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { |
227 | MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); |
228 | log(NoReward, /*Success=*/false); |
229 | } |
230 | |
231 | void recordUnattemptedInliningImpl() override { |
232 | MLInlineAdvice::recordUnattemptedInliningImpl(); |
233 | log(NoReward, /*Success=*/false); |
234 | } |
235 | |
236 | void log(int64_t Reward, bool Success) { |
237 | if (Mandatory) |
238 | return; |
239 | InlineEvent Event; |
240 | Event.AdvisedDecision = isInliningRecommended(); |
241 | Event.DefaultDecision = DefaultDecision; |
242 | Event.Effect = Success; |
243 | Event.Reward = Reward; |
244 | Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); |
245 | } |
246 | |
247 | static const int64_t NoReward = 0; |
248 | TrainingLogger &Logger; |
249 | const std::optional<size_t> CallerSizeEstimateBefore; |
250 | const std::optional<size_t> CalleeSizeEstimateBefore; |
251 | const int64_t DefaultDecision; |
252 | const int64_t Mandatory; |
253 | }; |
254 | |
255 | static const std::vector<TensorSpec> TrainingOnlyFeatures{ |
256 | TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default" , {1}), |
257 | TensorSpec::createSpec<float>(TFFeedPrefix + "discount" , {1}), |
258 | TensorSpec::createSpec<float>(TFFeedPrefix + "reward" , {1}), |
259 | TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type" , {1})}; |
260 | |
261 | static const std::vector<TensorSpec> getInputFeatures() { |
262 | std::vector<TensorSpec> InputSpecs; |
263 | for (size_t I = 0; I < NumberOfFeatures; ++I) |
264 | InputSpecs.push_back(TensorSpec::createSpec<int64_t>( |
265 | TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape())); |
266 | append_range(InputSpecs, TrainingOnlyFeatures); |
267 | return InputSpecs; |
268 | } |
269 | |
270 | } // namespace |
271 | |
272 | TrainingLogger::TrainingLogger(StringRef LogFileName, |
273 | const ModelUnderTrainingRunner *MUTR) |
274 | : LogFileName(LogFileName), MUTR(MUTR) { |
275 | // The first output is the inlining decision. |
276 | std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end()); |
277 | |
278 | if (MUTR) |
279 | append_range(FT, MUTR->extraOutputsForLoggingSpecs()); |
280 | |
281 | DefaultDecisionPos = FT.size(); |
282 | FT.push_back(DefaultDecisionSpec); |
283 | |
284 | DecisionPos = FT.size(); |
285 | FT.push_back(InlineDecisionSpec); |
286 | std::error_code EC; |
287 | auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); |
288 | if (EC) |
289 | dbgs() << (EC.message() + ":" + TrainingLog); |
290 | |
291 | L = std::make_unique<Logger>( |
292 | std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), |
293 | InlineSizeEstimatorAnalysis::isEvaluatorRequested()); |
294 | L->switchContext("" ); |
295 | } |
296 | |
297 | /// Log one inlining event. |
298 | void TrainingLogger::logInlineEvent(const InlineEvent &Event, |
299 | const MLModelRunner &ModelRunner) { |
300 | L->startObservation(); |
301 | size_t CurrentFeature = 0; |
302 | for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) |
303 | L->logTensorValue(CurrentFeature, |
304 | reinterpret_cast<const char *>( |
305 | ModelRunner.getTensorUntyped(CurrentFeature))); |
306 | |
307 | if (MUTR) |
308 | for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) { |
309 | const char *RawData = |
310 | reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)); |
311 | L->logTensorValue(CurrentFeature, RawData); |
312 | ++CurrentFeature; |
313 | } |
314 | |
315 | assert(CurrentFeature == DefaultDecisionPos); |
316 | L->logTensorValue(DefaultDecisionPos, |
317 | reinterpret_cast<const char *>(&Event.DefaultDecision)); |
318 | L->logTensorValue(DecisionPos, |
319 | reinterpret_cast<const char *>(&Event.AdvisedDecision)); |
320 | L->endObservation(); |
321 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
322 | L->logReward(Event.Reward); |
323 | |
324 | // For debugging / later use |
325 | Effects.push_back(Event.Effect); |
326 | } |
327 | |
328 | DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( |
329 | Module &M, ModuleAnalysisManager &MAM, |
330 | std::unique_ptr<MLModelRunner> ModelRunner, |
331 | std::function<bool(CallBase &)> GetDefaultAdvice, |
332 | std::unique_ptr<TrainingLogger> Logger) |
333 | : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice), |
334 | IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), |
335 | Logger(std::move(Logger)), |
336 | InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), |
337 | CurrentNativeSize(InitialNativeSize) { |
338 | // We cannot have the case of neither inference nor logging. |
339 | assert(IsDoingInference || isLogging()); |
340 | } |
341 | |
342 | std::optional<size_t> |
343 | DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { |
344 | if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
345 | return std::nullopt; |
346 | auto &R = |
347 | FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); |
348 | if (!R) { |
349 | F.getParent()->getContext().emitError( |
350 | "Native size estimator is not present." ); |
351 | return 0; |
352 | } |
353 | return *R; |
354 | } |
355 | |
356 | std::unique_ptr<MLInlineAdvice> |
357 | DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { |
358 | return std::make_unique<LoggingMLInlineAdvice>( |
359 | /*Advisor=*/this, |
360 | /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, |
361 | /*Logger=*/*Logger, |
362 | /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), |
363 | /*CalleeSizeEstimateBefore=*/ |
364 | getNativeSizeEstimate(*CB.getCalledFunction()), |
365 | /*DefaultDecision=*/true, /*Mandatory*/ true); |
366 | } |
367 | |
368 | std::unique_ptr<MLInlineAdvice> |
369 | DevelopmentModeMLInlineAdvisor::getAdviceFromModel( |
370 | CallBase &CB, OptimizationRemarkEmitter &ORE) { |
371 | if (IsDoingInference && !isLogging()) |
372 | return MLInlineAdvisor::getAdviceFromModel(CB, ORE); |
373 | |
374 | bool DefaultAdvice = GetDefaultAdvice(CB); |
375 | auto Recommendation = |
376 | IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) |
377 | : DefaultAdvice; |
378 | return std::make_unique<LoggingMLInlineAdvice>( |
379 | /*Advisor=*/this, |
380 | /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, |
381 | /*Logger=*/*Logger, |
382 | /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), |
383 | /*CalleeSizeEstimateBefore=*/ |
384 | getNativeSizeEstimate(*CB.getCalledFunction()), |
385 | /*DefaultDecision=*/DefaultAdvice); |
386 | } |
387 | |
388 | size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { |
389 | if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
390 | return 0; |
391 | size_t Ret = 0; |
392 | for (auto &F : M) { |
393 | if (F.isDeclaration()) |
394 | continue; |
395 | Ret += *getNativeSizeEstimate(F); |
396 | } |
397 | return Ret; |
398 | } |
399 | |
400 | std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( |
401 | Module &M, ModuleAnalysisManager &MAM, |
402 | std::function<bool(CallBase &)> GetDefaultAdvice) { |
403 | auto &Ctx = M.getContext(); |
404 | std::unique_ptr<MLModelRunner> Runner; |
405 | if (TFModelUnderTrainingPath.empty()) |
406 | Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); |
407 | else |
408 | Runner = ModelUnderTrainingRunner::createAndEnsureValid( |
409 | Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), |
410 | TFOutputSpecOverride); |
411 | if (!Runner) |
412 | return nullptr; |
413 | std::unique_ptr<TrainingLogger> Logger; |
414 | if (!TrainingLog.empty()) |
415 | Logger = std::make_unique<TrainingLogger>( |
416 | TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); |
417 | |
418 | return std::make_unique<DevelopmentModeMLInlineAdvisor>( |
419 | M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); |
420 | } |
421 | #endif // defined(LLVM_HAVE_TFLITE) |
422 | |