1//===- IROutliner.h - Extract similar IR regions into functions --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// The interface file for the IROutliner which is used by the IROutliner Pass.
11//
12// The outliner uses the IRSimilarityIdentifier to identify the similar regions
13// of code. It evaluates each set of IRSimilarityCandidates with an estimate of
14// whether it will provide code size reduction. Each region is extracted using
15// the code extractor. These extracted functions are consolidated into a single
16// function and called from the extracted call site.
17//
18// For example:
19// \code
20// %1 = add i32 %a, %b
21// %2 = add i32 %b, %a
22// %3 = add i32 %b, %a
23// %4 = add i32 %a, %b
24// \endcode
25// would become function
26// \code
27// define internal void outlined_ir_function(i32 %0, i32 %1) {
28// %1 = add i32 %0, %1
29// %2 = add i32 %1, %0
30// ret void
31// }
32// \endcode
33// with calls:
34// \code
35// call void outlined_ir_function(i32 %a, i32 %b)
36// call void outlined_ir_function(i32 %b, i32 %a)
37// \endcode
38//
39//===----------------------------------------------------------------------===//
40
41#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42#define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43
44#include "llvm/Analysis/IRSimilarityIdentifier.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/Support/InstructionCost.h"
47#include "llvm/Transforms/Utils/CodeExtractor.h"
48
49struct OutlinableGroup;
50
51namespace llvm {
52using namespace CallingConv;
53using namespace IRSimilarity;
54
55class Module;
56class TargetTransformInfo;
57class OptimizationRemarkEmitter;
58
59/// The OutlinableRegion holds all the information for a specific region, or
60/// sequence of instructions. This includes what values need to be hoisted to
61/// arguments from the extracted function, inputs and outputs to the region, and
62/// mapping from the extracted function arguments to overall function arguments.
63struct OutlinableRegion {
64 /// Describes the region of code.
65 IRSimilarityCandidate *Candidate = nullptr;
66
67 /// If this region is outlined, the front and back IRInstructionData could
68 /// potentially become invalidated if the only new instruction is a call.
69 /// This ensures that we replace in the instruction in the IRInstructionData.
70 IRInstructionData *NewFront = nullptr;
71 IRInstructionData *NewBack = nullptr;
72
73 /// The number of extracted inputs from the CodeExtractor.
74 unsigned NumExtractedInputs = 0;
75
76 /// The corresponding BasicBlock with the appropriate stores for this
77 /// OutlinableRegion in the overall function.
78 unsigned OutputBlockNum = -1;
79
80 /// Mapping the extracted argument number to the argument number in the
81 /// overall function. Since there will be inputs, such as elevated constants
82 /// that are not the same in each region in a SimilarityGroup, or values that
83 /// cannot be sunk into the extracted section in every region, we must keep
84 /// track of which extracted argument maps to which overall argument.
85 DenseMap<unsigned, unsigned> ExtractedArgToAgg;
86 DenseMap<unsigned, unsigned> AggArgToExtracted;
87
88 /// Values in the outlined functions will often be replaced by arguments. When
89 /// finding corresponding values from one region to another, the found value
90 /// will be the value the argument previously replaced. This structure maps
91 /// any replaced values for the region to the aggregate aggregate argument
92 /// in the overall function.
93 DenseMap<Value *, Value *> RemappedArguments;
94
95 /// Marks whether we need to change the order of the arguments when mapping
96 /// the old extracted function call to the new aggregate outlined function
97 /// call.
98 bool ChangedArgOrder = false;
99
100 /// Marks whether this region ends in a branch, there is special handling
101 /// required for the following basic blocks in this case.
102 bool EndsInBranch = false;
103
104 /// The PHIBlocks with their corresponding return block based on the return
105 /// value as the key.
106 DenseMap<Value *, BasicBlock *> PHIBlocks;
107
108 /// Mapping of the argument number in the deduplicated function
109 /// to a given constant, which is used when creating the arguments to the call
110 /// to the newly created deduplicated function. This is handled separately
111 /// since the CodeExtractor does not recognize constants.
112 DenseMap<unsigned, Constant *> AggArgToConstant;
113
114 /// The global value numbers that are used as outputs for this section. Once
115 /// extracted, each output will be stored to an output register. This
116 /// documents the global value numbers that are used in this pattern.
117 SmallVector<unsigned, 4> GVNStores;
118
119 /// Used to create an outlined function.
120 CodeExtractor *CE = nullptr;
121
122 /// The call site of the extracted region.
123 CallInst *Call = nullptr;
124
125 /// The function for the extracted region.
126 Function *ExtractedFunction = nullptr;
127
128 /// Flag for whether we have split out the IRSimilarityCanidate. That is,
129 /// make the region contained the IRSimilarityCandidate its own BasicBlock.
130 bool CandidateSplit = false;
131
132 /// Flag for whether we should not consider this region for extraction.
133 bool IgnoreRegion = false;
134
135 /// The BasicBlock that is before the start of the region BasicBlock,
136 /// only defined when the region has been split.
137 BasicBlock *PrevBB = nullptr;
138
139 /// The BasicBlock that contains the starting instruction of the region.
140 BasicBlock *StartBB = nullptr;
141
142 /// The BasicBlock that contains the ending instruction of the region.
143 BasicBlock *EndBB = nullptr;
144
145 /// The BasicBlock that is after the start of the region BasicBlock,
146 /// only defined when the region has been split.
147 BasicBlock *FollowBB = nullptr;
148
149 /// The Outlinable Group that contains this region and structurally similar
150 /// regions to this region.
151 OutlinableGroup *Parent = nullptr;
152
153 OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
154 : Candidate(&C), Parent(&Group) {
155 StartBB = C.getStartBB();
156 EndBB = C.getEndBB();
157 }
158
159 /// For the contained region, split the parent BasicBlock at the starting and
160 /// ending instructions of the contained IRSimilarityCandidate.
161 void splitCandidate();
162
163 /// For the contained region, reattach the BasicBlock at the starting and
164 /// ending instructions of the contained IRSimilarityCandidate, or if the
165 /// function has been extracted, the start and end of the BasicBlock
166 /// containing the called function.
167 void reattachCandidate();
168
169 /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
170 ///
171 /// \param Other [in] - The OutlinableRegion to find the corresponding Value
172 /// in.
173 /// \param V [in] - The Value to look for in the other region.
174 /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
175 Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
176
177 /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other.
178 ///
179 /// \param Other [in] - The OutlinableRegion to find the corresponding
180 /// BasicBlock in.
181 /// \param BB [in] - The BasicBlock to look for in the other region.
182 /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
183 BasicBlock *findCorrespondingBlockIn(const OutlinableRegion &Other,
184 BasicBlock *BB);
185
186 /// Get the size of the code removed from the region.
187 ///
188 /// \param [in] TTI - The TargetTransformInfo for the parent function.
189 /// \returns the code size of the region
190 InstructionCost getBenefit(TargetTransformInfo &TTI);
191};
192
193/// This class is a pass that identifies similarity in a Module, extracts
194/// instances of the similarity, and then consolidating the similar regions
195/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
196/// to identify the similar regions of code, and then extracts the similar
197/// sections into a single function. See the above for an example as to
198/// how code is extracted and consolidated into a single function.
199class IROutliner {
200public:
201 IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
202 function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
203 function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
204 : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {
205
206 // Check that the DenseMap implementation has not changed.
207 assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
208 "DenseMapInfo<unsigned>'s empty key isn't -1!");
209 assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
210 "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
211 }
212 bool run(Module &M);
213
214private:
215 /// Find repeated similar code sequences in \p M and outline them into new
216 /// Functions.
217 ///
218 /// \param [in] M - The module to outline from.
219 /// \returns The number of Functions created.
220 unsigned doOutline(Module &M);
221
222 /// Check whether an OutlinableRegion is incompatible with code already
223 /// outlined. OutlinableRegions are incomptaible when there are overlapping
224 /// instructions, or code that has not been recorded has been added to the
225 /// instructions.
226 ///
227 /// \param [in] Region - The OutlinableRegion to check for conflicts with
228 /// already outlined code.
229 /// \returns whether the region can safely be outlined.
230 bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
231
232 /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
233 /// instructions contained in a previously outlined region and put the
234 /// remaining regions in \p CurrentGroup.
235 ///
236 /// \param [in] CandidateVec - List of similarity candidates for regions with
237 /// the same similarity structure.
238 /// \param [in,out] CurrentGroup - Contains the potential sections to
239 /// be outlined.
240 void
241 pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
242 OutlinableGroup &CurrentGroup);
243
244 /// Create the function based on the overall types found in the current
245 /// regions being outlined.
246 ///
247 /// \param M - The module to outline from.
248 /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
249 /// \param [in] FunctionNameSuffix - How many functions have we previously
250 /// created.
251 /// \returns the newly created function.
252 Function *createFunction(Module &M, OutlinableGroup &CG,
253 unsigned FunctionNameSuffix);
254
255 /// Identify the needed extracted inputs in a section, and add to the overall
256 /// function if needed.
257 ///
258 /// \param [in] M - The module to outline from.
259 /// \param [in,out] Region - The region to be extracted.
260 /// \param [in] NotSame - The global value numbers of the Values in the region
261 /// that do not have the same Constant in each strucutrally similar region.
262 void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
263 DenseSet<unsigned> &NotSame);
264
265 /// Find the number of instructions that will be removed by extracting the
266 /// OutlinableRegions in \p CurrentGroup.
267 ///
268 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
269 /// analyzed.
270 /// \returns the number of outlined instructions across all regions.
271 InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
272
273 /// Find the number of instructions that will be added by reloading arguments.
274 ///
275 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
276 /// analyzed.
277 /// \returns the number of added reload instructions across all regions.
278 InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
279
280 /// Find the cost and the benefit of \p CurrentGroup and save it back to
281 /// \p CurrentGroup.
282 ///
283 /// \param [in] M - The module being analyzed
284 /// \param [in,out] CurrentGroup - The overall outlined section
285 void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
286
287 /// Update the output mapping based on the load instruction, and the outputs
288 /// of the extracted function.
289 ///
290 /// \param Region - The region extracted
291 /// \param Outputs - The outputs from the extracted function.
292 /// \param LI - The load instruction used to update the mapping.
293 void updateOutputMapping(OutlinableRegion &Region,
294 ArrayRef<Value *> Outputs, LoadInst *LI);
295
296 /// Extract \p Region into its own function.
297 ///
298 /// \param [in] Region - The region to be extracted into its own function.
299 /// \returns True if it was successfully outlined.
300 bool extractSection(OutlinableRegion &Region);
301
302 /// For the similarities found, and the extracted sections, create a single
303 /// outlined function with appropriate output blocks as necessary.
304 ///
305 /// \param [in] M - The module to outline from
306 /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
307 /// \param [in,out] FuncsToRemove - List of functions to remove from the
308 /// module after outlining is completed.
309 /// \param [in,out] OutlinedFunctionNum - the number of new outlined
310 /// functions.
311 void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
312 std::vector<Function *> &FuncsToRemove,
313 unsigned &OutlinedFunctionNum);
314
315 /// If true, enables us to outline from functions that have LinkOnceFromODR
316 /// linkages.
317 bool OutlineFromLinkODRs = false;
318
319 /// If false, we do not worry if the cost is greater than the benefit. This
320 /// is for debugging and testing, so that we can test small cases to ensure
321 /// that the outlining is being done correctly.
322 bool CostModel = true;
323
324 /// The set of outlined Instructions, identified by their location in the
325 /// sequential ordering of instructions in a Module.
326 DenseSet<unsigned> Outlined;
327
328 /// TargetTransformInfo lambda for target specific information.
329 function_ref<TargetTransformInfo &(Function &)> getTTI;
330
331 /// A mapping from newly created reloaded output values to the original value.
332 /// If an value is replace by an output from an outlined region, this maps
333 /// that Value, back to its original Value.
334 DenseMap<Value *, Value *> OutputMappings;
335
336 /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
337 function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
338
339 /// The optimization remark emitter for the pass.
340 function_ref<OptimizationRemarkEmitter &(Function &)> getORE;
341
342 /// The memory allocator used to allocate the CodeExtractors.
343 SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
344
345 /// The memory allocator used to allocate the OutlinableRegions.
346 SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
347
348 /// The memory allocator used to allocate new IRInstructionData.
349 SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
350
351 /// Custom InstVisitor to classify different instructions for whether it can
352 /// be analyzed for similarity. This is needed as there may be instruction we
353 /// can identify as having similarity, but are more complicated to outline.
354 struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
355 InstructionAllowed() = default;
356
357 bool visitBranchInst(BranchInst &BI) { return EnableBranches; }
358 bool visitPHINode(PHINode &PN) { return EnableBranches; }
359 // TODO: Handle allocas.
360 bool visitAllocaInst(AllocaInst &AI) { return false; }
361 // VAArg instructions are not allowed since this could cause difficulty when
362 // differentiating between different sets of variable instructions in
363 // the deduplicated outlined regions.
364 bool visitVAArgInst(VAArgInst &VI) { return false; }
365 // We exclude all exception handling cases since they are so context
366 // dependent.
367 bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
368 bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
369 // DebugInfo should be included in the regions, but should not be
370 // analyzed for similarity as it has no bearing on the outcome of the
371 // program.
372 bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
373 // TODO: Handle specific intrinsics individually from those that can be
374 // handled.
375 bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
376 // We only handle CallInsts that are not indirect, since we cannot guarantee
377 // that they have a name in these cases.
378 bool visitCallInst(CallInst &CI) {
379 Function *F = CI.getCalledFunction();
380 bool IsIndirectCall = CI.isIndirectCall();
381 if (IsIndirectCall && !EnableIndirectCalls)
382 return false;
383 if (!F && !IsIndirectCall)
384 return false;
385 // Returning twice can cause issues with the state of the function call
386 // that were not expected when the function was used, so we do not include
387 // the call in outlined functions.
388 if (CI.canReturnTwice())
389 return false;
390 // TODO: Update the outliner to capture whether the outlined function
391 // needs these extra attributes.
392
393 // Functions marked with the swifttailcc and tailcc calling conventions
394 // require special handling when outlining musttail functions. The
395 // calling convention must be passed down to the outlined function as
396 // well. Further, there is special handling for musttail calls as well,
397 // requiring a return call directly after. For now, the outliner does not
398 // support this.
399 bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail ||
400 CI.getCallingConv() == CallingConv::Tail;
401 if (IsTailCC && !EnableMustTailCalls)
402 return false;
403 if (CI.isMustTailCall() && !EnableMustTailCalls)
404 return false;
405 // The outliner can only handle musttail items if it is also accompanied
406 // by the tailcc or swifttailcc calling convention.
407 if (CI.isMustTailCall() && !IsTailCC)
408 return false;
409 return true;
410 }
411 // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
412 // the outlined region, and then returned as an output, this will have to be
413 // handled differently.
414 bool visitFreezeInst(FreezeInst &CI) { return false; }
415 // TODO: We do not current handle similarity that changes the control flow.
416 bool visitInvokeInst(InvokeInst &II) { return false; }
417 // TODO: We do not current handle similarity that changes the control flow.
418 bool visitCallBrInst(CallBrInst &CBI) { return false; }
419 // TODO: Handle interblock similarity.
420 bool visitTerminator(Instruction &I) { return false; }
421 bool visitInstruction(Instruction &I) { return true; }
422
423 // The flag variable that marks whether we should allow branch instructions
424 // to be outlined.
425 bool EnableBranches = false;
426
427 // The flag variable that marks whether we should allow indirect calls
428 // to be outlined.
429 bool EnableIndirectCalls = true;
430
431 // The flag variable that marks whether we should allow intrinsics
432 // instructions to be outlined.
433 bool EnableIntrinsics = false;
434
435 // The flag variable that marks whether we should allow musttail calls.
436 bool EnableMustTailCalls = false;
437 };
438
439 /// A InstVisitor used to exclude certain instructions from being outlined.
440 InstructionAllowed InstructionClassifier;
441};
442
443/// Pass to outline similar regions.
444class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
445public:
446 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
447};
448
449} // end namespace llvm
450
451#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
452

source code of llvm/include/llvm/Transforms/IPO/IROutliner.h