1//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Interface file for the IRSimilarityIdentifier for identifying similarities in
11// IR including the IRInstructionMapper, which maps an Instruction to unsigned
12// integers.
13//
14// Two sequences of instructions are called "similar" if they perform the same
15// series of operations for all inputs.
16//
17// \code
18// %1 = add i32 %a, 10
19// %2 = add i32 %a, %1
20// %3 = icmp slt icmp %1, %2
21// \endcode
22//
23// and
24//
25// \code
26// %1 = add i32 11, %a
27// %2 = sub i32 %a, %1
28// %3 = icmp sgt icmp %2, %1
29// \endcode
30//
31// ultimately have the same result, even if the inputs, and structure are
32// slightly different.
33//
34// For instructions, we do not worry about operands that do not have fixed
35// semantic meaning to the program. We consider the opcode that the instruction
36// has, the types, parameters, and extra information such as the function name,
37// or comparison predicate. These are used to create a hash to map instructions
38// to integers to be used in similarity matching in sequences of instructions
39//
40// Terminology:
41// An IRSimilarityCandidate is a region of IRInstructionData (wrapped
42// Instructions), usually used to denote a region of similarity has been found.
43//
44// A SimilarityGroup is a set of IRSimilarityCandidates that are structurally
45// similar to one another.
46//
47//===----------------------------------------------------------------------===//
48
49#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
50#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
51
52#include "llvm/IR/InstVisitor.h"
53#include "llvm/IR/Instructions.h"
54#include "llvm/IR/PassManager.h"
55#include "llvm/Pass.h"
56#include "llvm/Support/Allocator.h"
57#include <optional>
58
59namespace llvm {
60class Module;
61
62namespace IRSimilarity {
63
64struct IRInstructionDataList;
65
66/// This represents what is and is not supported when finding similarity in
67/// Instructions.
68///
69/// Legal Instructions are considered when looking at similarity between
70/// Instructions.
71///
72/// Illegal Instructions cannot be considered when looking for similarity
73/// between Instructions. They act as boundaries between similarity regions.
74///
75/// Invisible Instructions are skipped over during analysis.
76// TODO: Shared with MachineOutliner
77enum InstrType { Legal, Illegal, Invisible };
78
79/// This provides the utilities for hashing an Instruction to an unsigned
80/// integer. Two IRInstructionDatas produce the same hash value when their
81/// underlying Instructions perform the same operation (even if they don't have
82/// the same input operands.)
83/// As a more concrete example, consider the following:
84///
85/// \code
86/// %add1 = add i32 %a, %b
87/// %add2 = add i32 %c, %d
88/// %add3 = add i64 %e, %f
89/// \endcode
90///
91// Then the IRInstructionData wrappers for these Instructions may be hashed like
92/// so:
93///
94/// \code
95/// ; These two adds have the same types and operand types, so they hash to the
96/// ; same number.
97/// %add1 = add i32 %a, %b ; Hash: 1
98/// %add2 = add i32 %c, %d ; Hash: 1
99/// ; This add produces an i64. This differentiates it from %add1 and %add2. So,
100/// ; it hashes to a different number.
101/// %add3 = add i64 %e, %f; Hash: 2
102/// \endcode
103///
104///
105/// This hashing scheme will be used to represent the program as a very long
106/// string. This string can then be placed in a data structure which can be used
107/// for similarity queries.
108///
109/// TODO: Handle types of Instructions which can be equal even with different
110/// operands. (E.g. comparisons with swapped predicates.)
111/// TODO: Handle CallInsts, which are only checked for function type
112/// by \ref isSameOperationAs.
113/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
114/// exact same, and some do not.
115struct IRInstructionData
116 : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> {
117
118 /// The source Instruction that is being wrapped.
119 Instruction *Inst = nullptr;
120 /// The values of the operands in the Instruction.
121 SmallVector<Value *, 4> OperVals;
122 /// The legality of the wrapped instruction. This is informed by InstrType,
123 /// and is used when checking when two instructions are considered similar.
124 /// If either instruction is not legal, the instructions are automatically not
125 /// considered similar.
126 bool Legal = false;
127
128 /// This is only relevant if we are wrapping a CmpInst where we needed to
129 /// change the predicate of a compare instruction from a greater than form
130 /// to a less than form. It is std::nullopt otherwise.
131 std::optional<CmpInst::Predicate> RevisedPredicate;
132
133 /// This is only relevant if we are wrapping a CallInst. If we are requiring
134 /// that the function calls have matching names as well as types, and the
135 /// call is not an indirect call, this will hold the name of the function. If
136 /// it is an indirect string, it will be the empty string. However, if this
137 /// requirement is not in place it will be the empty string regardless of the
138 /// function call type. The value held here is used to create the hash of the
139 /// instruction, and check to make sure two instructions are close to one
140 /// another.
141 std::optional<std::string> CalleeName;
142
143 /// This structure holds the distances of how far "ahead of" or "behind" the
144 /// target blocks of a branch, or the incoming blocks of a phi nodes are.
145 /// If the value is negative, it means that the block was registered before
146 /// the block of this instruction in terms of blocks in the function.
147 /// Code Example:
148 /// \code
149 /// block_1:
150 /// br i1 %0, label %block_2, label %block_3
151 /// block_2:
152 /// br i1 %1, label %block_1, label %block_2
153 /// block_3:
154 /// br i1 %2, label %block_2, label %block_1
155 /// ; Replacing the labels with relative values, this becomes:
156 /// block_1:
157 /// br i1 %0, distance 1, distance 2
158 /// block_2:
159 /// br i1 %1, distance -1, distance 0
160 /// block_3:
161 /// br i1 %2, distance -1, distance -2
162 /// \endcode
163 /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is
164 /// "ahead" of block_2.
165 SmallVector<int, 4> RelativeBlockLocations;
166
167 /// Gather the information that is difficult to gather for an Instruction, or
168 /// is changed. i.e. the operands of an Instruction and the Types of those
169 /// operands. This extra information allows for similarity matching to make
170 /// assertions that allow for more flexibility when checking for whether an
171 /// Instruction performs the same operation.
172 IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
173 IRInstructionData(IRInstructionDataList &IDL);
174
175 /// Fills data stuctures for IRInstructionData when it is constructed from a
176 // reference or a pointer.
177 void initializeInstruction();
178
179 /// Get the predicate that the compare instruction is using for hashing the
180 /// instruction. the IRInstructionData must be wrapping a CmpInst.
181 CmpInst::Predicate getPredicate() const;
182
183 /// Get the callee name that the call instruction is using for hashing the
184 /// instruction. The IRInstructionData must be wrapping a CallInst.
185 StringRef getCalleeName() const;
186
187 /// A function that swaps the predicates to their less than form if they are
188 /// in a greater than form. Otherwise, the predicate is unchanged.
189 ///
190 /// \param CI - The comparison operation to find a consistent preidcate for.
191 /// \return the consistent comparison predicate.
192 static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
193
194 /// For an IRInstructionData containing a branch, finds the
195 /// relative distances from the source basic block to the target by taking
196 /// the difference of the number assigned to the current basic block and the
197 /// target basic block of the branch.
198 ///
199 /// \param BasicBlockToInteger - The mapping of basic blocks to their location
200 /// in the module.
201 void
202 setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
203
204 /// For an IRInstructionData containing a CallInst, set the function name
205 /// appropriately. This will be an empty string if it is an indirect call,
206 /// or we are not matching by name of the called function. It will be the
207 /// name of the function if \p MatchByName is true and it is not an indirect
208 /// call. We may decide not to match by name in order to expand the
209 /// size of the regions we can match. If a function name has the same type
210 /// signature, but the different name, the region of code is still almost the
211 /// same. Since function names can be treated as constants, the name itself
212 /// could be extrapolated away. However, matching by name provides a
213 /// specificity and more "identical" code than not matching by name.
214 ///
215 /// \param MatchByName - A flag to mark whether we are using the called
216 /// function name as a differentiating parameter.
217 void setCalleeName(bool MatchByName = true);
218
219 /// For an IRInstructionData containing a PHINode, finds the
220 /// relative distances from the incoming basic block to the current block by
221 /// taking the difference of the number assigned to the current basic block
222 /// and the incoming basic block of the branch.
223 ///
224 /// \param BasicBlockToInteger - The mapping of basic blocks to their location
225 /// in the module.
226 void
227 setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
228
229 /// Get the BasicBlock based operands for PHINodes and BranchInsts.
230 ///
231 /// \returns A list of relevant BasicBlocks.
232 ArrayRef<Value *> getBlockOperVals();
233
234 /// Hashes \p Value based on its opcode, types, and operand types.
235 /// Two IRInstructionData instances produce the same hash when they perform
236 /// the same operation.
237 ///
238 /// As a simple example, consider the following instructions.
239 ///
240 /// \code
241 /// %add1 = add i32 %x1, %y1
242 /// %add2 = add i32 %x2, %y2
243 ///
244 /// %sub = sub i32 %x1, %y1
245 ///
246 /// %add_i64 = add i64 %x2, %y2
247 /// \endcode
248 ///
249 /// Because the first two adds operate the same types, and are performing the
250 /// same action, they will be hashed to the same value.
251 ///
252 /// However, the subtraction instruction is not the same as an addition, and
253 /// will be hashed to a different value.
254 ///
255 /// Finally, the last add has a different type compared to the first two add
256 /// instructions, so it will also be hashed to a different value that any of
257 /// the previous instructions.
258 ///
259 /// \param [in] ID - The IRInstructionData instance to be hashed.
260 /// \returns A hash_value of the IRInstructionData.
261 friend hash_code hash_value(const IRInstructionData &ID) {
262 SmallVector<Type *, 4> OperTypes;
263 for (Value *V : ID.OperVals)
264 OperTypes.push_back(Elt: V->getType());
265
266 if (isa<CmpInst>(Val: ID.Inst))
267 return llvm::hash_combine(
268 args: llvm::hash_value(value: ID.Inst->getOpcode()),
269 args: llvm::hash_value(ptr: ID.Inst->getType()),
270 args: llvm::hash_value(value: ID.getPredicate()),
271 args: llvm::hash_combine_range(first: OperTypes.begin(), last: OperTypes.end()));
272
273 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: ID.Inst)) {
274 // To hash intrinsics, we use the opcode, and types like the other
275 // instructions, but also, the Intrinsic ID, and the Name of the
276 // intrinsic.
277 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
278 return llvm::hash_combine(
279 args: llvm::hash_value(value: ID.Inst->getOpcode()),
280 args: llvm::hash_value(ptr: ID.Inst->getType()), args: llvm::hash_value(value: IntrinsicID),
281 args: llvm::hash_value(arg: *ID.CalleeName),
282 args: llvm::hash_combine_range(first: OperTypes.begin(), last: OperTypes.end()));
283 }
284
285 if (isa<CallInst>(Val: ID.Inst)) {
286 std::string FunctionName = *ID.CalleeName;
287 return llvm::hash_combine(
288 args: llvm::hash_value(value: ID.Inst->getOpcode()),
289 args: llvm::hash_value(ptr: ID.Inst->getType()),
290 args: llvm::hash_value(ptr: ID.Inst->getType()), args: llvm::hash_value(arg: FunctionName),
291 args: llvm::hash_combine_range(first: OperTypes.begin(), last: OperTypes.end()));
292 }
293
294 return llvm::hash_combine(
295 args: llvm::hash_value(value: ID.Inst->getOpcode()),
296 args: llvm::hash_value(ptr: ID.Inst->getType()),
297 args: llvm::hash_combine_range(first: OperTypes.begin(), last: OperTypes.end()));
298 }
299
300 IRInstructionDataList *IDL = nullptr;
301};
302
303struct IRInstructionDataList
304 : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {};
305
306/// Compare one IRInstructionData class to another IRInstructionData class for
307/// whether they are performing a the same operation, and can mapped to the
308/// same value. For regular instructions if the hash value is the same, then
309/// they will also be close.
310///
311/// \param A - The first IRInstructionData class to compare
312/// \param B - The second IRInstructionData class to compare
313/// \returns true if \p A and \p B are similar enough to be mapped to the same
314/// value.
315bool isClose(const IRInstructionData &A, const IRInstructionData &B);
316
317struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> {
318 static inline IRInstructionData *getEmptyKey() { return nullptr; }
319 static inline IRInstructionData *getTombstoneKey() {
320 return reinterpret_cast<IRInstructionData *>(-1);
321 }
322
323 static unsigned getHashValue(const IRInstructionData *E) {
324 using llvm::hash_value;
325 assert(E && "IRInstructionData is a nullptr?");
326 return hash_value(ID: *E);
327 }
328
329 static bool isEqual(const IRInstructionData *LHS,
330 const IRInstructionData *RHS) {
331 if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
332 LHS == getEmptyKey() || LHS == getTombstoneKey())
333 return LHS == RHS;
334
335 assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?");
336 return isClose(A: *LHS, B: *RHS);
337 }
338};
339
340/// Helper struct for converting the Instructions in a Module into a vector of
341/// unsigned integers. This vector of unsigned integers can be thought of as a
342/// "numeric string". This numeric string can then be queried by, for example,
343/// data structures that find repeated substrings.
344///
345/// This hashing is done per BasicBlock in the module. To hash Instructions
346/// based off of their operations, each Instruction is wrapped in an
347/// IRInstructionData struct. The unsigned integer for an IRInstructionData
348/// depends on:
349/// - The hash provided by the IRInstructionData.
350/// - Which member of InstrType the IRInstructionData is classified as.
351// See InstrType for more details on the possible classifications, and how they
352// manifest in the numeric string.
353///
354/// The numeric string for an individual BasicBlock is terminated by an unique
355/// unsigned integer. This prevents data structures which rely on repetition
356/// from matching across BasicBlocks. (For example, the SuffixTree.)
357/// As a concrete example, if we have the following two BasicBlocks:
358/// \code
359/// bb0:
360/// %add1 = add i32 %a, %b
361/// %add2 = add i32 %c, %d
362/// %add3 = add i64 %e, %f
363/// bb1:
364/// %sub = sub i32 %c, %d
365/// \endcode
366/// We may hash the Instructions like this (via IRInstructionData):
367/// \code
368/// bb0:
369/// %add1 = add i32 %a, %b ; Hash: 1
370/// %add2 = add i32 %c, %d; Hash: 1
371/// %add3 = add i64 %e, %f; Hash: 2
372/// bb1:
373/// %sub = sub i32 %c, %d; Hash: 3
374/// %add4 = add i32 %c, %d ; Hash: 1
375/// \endcode
376/// And produce a "numeric string representation" like so:
377/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2
378///
379/// TODO: This is very similar to the MachineOutliner, and should be
380/// consolidated into the same interface.
381struct IRInstructionMapper {
382 /// The starting illegal instruction number to map to.
383 ///
384 /// Set to -3 for compatibility with DenseMapInfo<unsigned>.
385 unsigned IllegalInstrNumber = static_cast<unsigned>(-3);
386
387 /// The next available integer to assign to a legal Instruction to.
388 unsigned LegalInstrNumber = 0;
389
390 /// Correspondence from IRInstructionData to unsigned integers.
391 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
392 InstructionIntegerMap;
393
394 /// A mapping for a basic block in a module to its assigned number/location
395 /// in the module.
396 DenseMap<BasicBlock *, unsigned> BasicBlockToInteger;
397
398 /// Set if we added an illegal number in the previous step.
399 /// Since each illegal number is unique, we only need one of them between
400 /// each range of legal numbers. This lets us make sure we don't add more
401 /// than one illegal number per range.
402 bool AddedIllegalLastTime = false;
403
404 /// Marks whether we found a illegal instruction in the previous step.
405 bool CanCombineWithPrevInstr = false;
406
407 /// Marks whether we have found a set of instructions that is long enough
408 /// to be considered for similarity.
409 bool HaveLegalRange = false;
410
411 /// Marks whether we should use exact function names, as well as types to
412 /// find similarity between calls.
413 bool EnableMatchCallsByName = false;
414
415 /// This allocator pointer is in charge of holding on to the IRInstructionData
416 /// so it is not deallocated until whatever external tool is using it is done
417 /// with the information.
418 SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr;
419
420 /// This allocator pointer is in charge of creating the IRInstructionDataList
421 /// so it is not deallocated until whatever external tool is using it is done
422 /// with the information.
423 SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr;
424
425 /// Get an allocated IRInstructionData struct using the InstDataAllocator.
426 ///
427 /// \param I - The Instruction to wrap with IRInstructionData.
428 /// \param Legality - A boolean value that is true if the instruction is to
429 /// be considered for similarity, and false if not.
430 /// \param IDL - The InstructionDataList that the IRInstructionData is
431 /// inserted into.
432 /// \returns An allocated IRInstructionData struct.
433 IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
434 IRInstructionDataList &IDL);
435
436 /// Get an empty allocated IRInstructionData struct using the
437 /// InstDataAllocator.
438 ///
439 /// \param IDL - The InstructionDataList that the IRInstructionData is
440 /// inserted into.
441 /// \returns An allocated IRInstructionData struct.
442 IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL);
443
444 /// Get an allocated IRInstructionDataList object using the IDLAllocator.
445 ///
446 /// \returns An allocated IRInstructionDataList object.
447 IRInstructionDataList *allocateIRInstructionDataList();
448
449 IRInstructionDataList *IDL = nullptr;
450
451 /// Assigns values to all the basic blocks in function \p F starting from
452 /// integer \p BBNumber.
453 ///
454 /// \param F - The function containing the basic blocks to assign numbers to.
455 /// \param BBNumber - The number to start from.
456 void initializeForBBs(Function &F, unsigned &BBNumber) {
457 for (BasicBlock &BB : F)
458 BasicBlockToInteger.insert(KV: std::make_pair(x: &BB, y: BBNumber++));
459 }
460
461 /// Assigns values to all the basic blocks in Module \p M.
462 /// \param M - The module containing the basic blocks to assign numbers to.
463 void initializeForBBs(Module &M) {
464 unsigned BBNumber = 0;
465 for (Function &F : M)
466 initializeForBBs(F, BBNumber);
467 }
468
469 /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
470 /// determined by \p InstrType. Two Instructions are mapped to the same value
471 /// if they are close as defined by the InstructionData class above.
472 ///
473 /// \param [in] BB - The BasicBlock to be mapped to integers.
474 /// \param [in,out] InstrList - Vector of IRInstructionData to append to.
475 /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to.
476 void convertToUnsignedVec(BasicBlock &BB,
477 std::vector<IRInstructionData *> &InstrList,
478 std::vector<unsigned> &IntegerMapping);
479
480 /// Maps an Instruction to a legal integer.
481 ///
482 /// \param [in] It - The Instruction to be mapped to an integer.
483 /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
484 /// append to.
485 /// \param [in,out] InstrListForBB - Vector of InstructionData to append to.
486 /// \returns The integer \p It was mapped to.
487 unsigned mapToLegalUnsigned(BasicBlock::iterator &It,
488 std::vector<unsigned> &IntegerMappingForBB,
489 std::vector<IRInstructionData *> &InstrListForBB);
490
491 /// Maps an Instruction to an illegal integer.
492 ///
493 /// \param [in] It - The \p Instruction to be mapped to an integer.
494 /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
495 /// append to.
496 /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to.
497 /// \param End - true if creating a dummy IRInstructionData at the end of a
498 /// basic block.
499 /// \returns The integer \p It was mapped to.
500 unsigned mapToIllegalUnsigned(
501 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
502 std::vector<IRInstructionData *> &InstrListForBB, bool End = false);
503
504 IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA,
505 SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA)
506 : InstDataAllocator(IDA), IDLAllocator(IDLA) {
507 // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
508 // changed.
509 assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) &&
510 "DenseMapInfo<unsigned>'s empty key isn't -1!");
511 assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
512 static_cast<unsigned>(-2) &&
513 "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
514
515 IDL = new (IDLAllocator->Allocate())
516 IRInstructionDataList();
517 }
518
519 /// Custom InstVisitor to classify different instructions for whether it can
520 /// be analyzed for similarity.
521 struct InstructionClassification
522 : public InstVisitor<InstructionClassification, InstrType> {
523 InstructionClassification() = default;
524
525 // TODO: Determine a scheme to resolve when the label is similar enough.
526 InstrType visitBranchInst(BranchInst &BI) {
527 if (EnableBranches)
528 return Legal;
529 return Illegal;
530 }
531 InstrType visitPHINode(PHINode &PN) {
532 if (EnableBranches)
533 return Legal;
534 return Illegal;
535 }
536 // TODO: Handle allocas.
537 InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
538 // We exclude variable argument instructions since variable arguments
539 // requires extra checking of the argument list.
540 InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; }
541 // We exclude all exception handling cases since they are so context
542 // dependent.
543 InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; }
544 InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; }
545 // DebugInfo should be included in the regions, but should not be
546 // analyzed for similarity as it has no bearing on the outcome of the
547 // program.
548 InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
549 InstrType visitIntrinsicInst(IntrinsicInst &II) {
550 // These are disabled due to complications in the CodeExtractor when
551 // outlining these instructions. For instance, It is unclear what we
552 // should do when moving only the start or end lifetime instruction into
553 // an outlined function. Also, assume-like intrinsics could be removed
554 // from the region, removing arguments, causing discrepencies in the
555 // number of inputs between different regions.
556 if (II.isAssumeLikeIntrinsic())
557 return Illegal;
558 return EnableIntrinsics ? Legal : Illegal;
559 }
560 // We only allow call instructions where the function has a name and
561 // is not an indirect call.
562 InstrType visitCallInst(CallInst &CI) {
563 Function *F = CI.getCalledFunction();
564 bool IsIndirectCall = CI.isIndirectCall();
565 if (IsIndirectCall && !EnableIndirectCalls)
566 return Illegal;
567 if (!F && !IsIndirectCall)
568 return Illegal;
569 // Functions marked with the swifttailcc and tailcc calling conventions
570 // require special handling when outlining musttail functions. The
571 // calling convention must be passed down to the outlined function as
572 // well. Further, there is special handling for musttail calls as well,
573 // requiring a return call directly after. For now, the outliner does not
574 // support this, so we do not handle matching this case either.
575 if ((CI.getCallingConv() == CallingConv::SwiftTail ||
576 CI.getCallingConv() == CallingConv::Tail) &&
577 !EnableMustTailCalls)
578 return Illegal;
579 if (CI.isMustTailCall() && !EnableMustTailCalls)
580 return Illegal;
581 return Legal;
582 }
583 // TODO: We do not current handle similarity that changes the control flow.
584 InstrType visitInvokeInst(InvokeInst &II) { return Illegal; }
585 // TODO: We do not current handle similarity that changes the control flow.
586 InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; }
587 // TODO: Handle interblock similarity.
588 InstrType visitTerminator(Instruction &I) { return Illegal; }
589 InstrType visitInstruction(Instruction &I) { return Legal; }
590
591 // The flag variable that lets the classifier know whether we should
592 // allow branches to be checked for similarity.
593 bool EnableBranches = false;
594
595 // The flag variable that lets the classifier know whether we should
596 // allow indirect calls to be considered legal instructions.
597 bool EnableIndirectCalls = false;
598
599 // Flag that lets the classifier know whether we should allow intrinsics to
600 // be checked for similarity.
601 bool EnableIntrinsics = false;
602
603 // Flag that lets the classifier know whether we should allow tail calls to
604 // be checked for similarity.
605 bool EnableMustTailCalls = false;
606 };
607
608 /// Maps an Instruction to a member of InstrType.
609 InstructionClassification InstClassifier;
610};
611
612/// This is a class that wraps a range of IRInstructionData from one point to
613/// another in the vector of IRInstructionData, which is a region of the
614/// program. It is also responsible for defining the structure within this
615/// region of instructions.
616///
617/// The structure of a region is defined through a value numbering system
618/// assigned to each unique value in a region at the creation of the
619/// IRSimilarityCandidate.
620///
621/// For example, for each Instruction we add a mapping for each new
622/// value seen in that Instruction.
623/// IR: Mapping Added:
624/// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
625/// %add2 = add i32 %a, %1 %add2 -> 4
626/// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
627///
628/// We can compare IRSimilarityCandidates against one another.
629/// The \ref isSimilar function compares each IRInstructionData against one
630/// another and if we have the same sequences of IRInstructionData that would
631/// create the same hash, we have similar IRSimilarityCandidates.
632///
633/// We can also compare the structure of IRSimilarityCandidates. If we can
634/// create a mapping of registers in the region contained by one
635/// IRSimilarityCandidate to the region contained by different
636/// IRSimilarityCandidate, they can be considered structurally similar.
637///
638/// IRSimilarityCandidate1: IRSimilarityCandidate2:
639/// %add1 = add i32 %a, %b %add1 = add i32 %d, %e
640/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
641/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
642///
643/// Can have the following mapping from candidate to candidate of:
644/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
645/// and can be considered similar.
646///
647/// IRSimilarityCandidate1: IRSimilarityCandidate2:
648/// %add1 = add i32 %a, %b %add1 = add i32 %d, c4
649/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
650/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
651///
652/// We cannot create the same mapping since the use of c4 is not used in the
653/// same way as %b or c2.
654class IRSimilarityCandidate {
655private:
656 /// The start index of this IRSimilarityCandidate in the instruction list.
657 unsigned StartIdx = 0;
658
659 /// The number of instructions in this IRSimilarityCandidate.
660 unsigned Len = 0;
661
662 /// The first instruction in this IRSimilarityCandidate.
663 IRInstructionData *FirstInst = nullptr;
664
665 /// The last instruction in this IRSimilarityCandidate.
666 IRInstructionData *LastInst = nullptr;
667
668 /// Global Value Numbering structures
669 /// @{
670 /// Stores the mapping of the value to the number assigned to it in the
671 /// IRSimilarityCandidate.
672 DenseMap<Value *, unsigned> ValueToNumber;
673 /// Stores the mapping of the number to the value assigned this number.
674 DenseMap<unsigned, Value *> NumberToValue;
675 /// Stores the mapping of a value's number to canonical numbering in the
676 /// candidate's respective similarity group.
677 DenseMap<unsigned, unsigned> NumberToCanonNum;
678 /// Stores the mapping of canonical number in the candidate's respective
679 /// similarity group to a value number.
680 DenseMap<unsigned, unsigned> CanonNumToNumber;
681 /// @}
682
683public:
684 /// \param StartIdx - The starting location of the region.
685 /// \param Len - The length of the region.
686 /// \param FirstInstIt - The starting IRInstructionData of the region.
687 /// \param LastInstIt - The ending IRInstructionData of the region.
688 IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
689 IRInstructionData *FirstInstIt,
690 IRInstructionData *LastInstIt);
691
692 /// \param A - The first IRInstructionCandidate to compare.
693 /// \param B - The second IRInstructionCandidate to compare.
694 /// \returns True when every IRInstructionData in \p A is similar to every
695 /// IRInstructionData in \p B.
696 static bool isSimilar(const IRSimilarityCandidate &A,
697 const IRSimilarityCandidate &B);
698
699 /// \param [in] A - The first IRInstructionCandidate to compare.
700 /// \param [in] B - The second IRInstructionCandidate to compare.
701 /// \returns True when every IRInstructionData in \p A is structurally similar
702 /// to \p B.
703 static bool compareStructure(const IRSimilarityCandidate &A,
704 const IRSimilarityCandidate &B);
705
706 /// \param [in] A - The first IRInstructionCandidate to compare.
707 /// \param [in] B - The second IRInstructionCandidate to compare.
708 /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
709 /// candidate \p A to candidate \B.
710 /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
711 /// candidate \p B to candidate \A.
712 /// \returns True when every IRInstructionData in \p A is structurally similar
713 /// to \p B.
714 static bool
715 compareStructure(const IRSimilarityCandidate &A,
716 const IRSimilarityCandidate &B,
717 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
718 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
719
720 struct OperandMapping {
721 /// The IRSimilarityCandidate that holds the instruction the OperVals were
722 /// pulled from.
723 const IRSimilarityCandidate &IRSC;
724
725 /// The operand values to be analyzed.
726 ArrayRef<Value *> &OperVals;
727
728 /// The current mapping of global value numbers from one IRSimilarityCandidate
729 /// to another IRSimilarityCandidate.
730 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
731 };
732
733 /// A helper struct to hold the candidate, for a branch instruction, the
734 /// relative location of a label, and the label itself. This is mostly to
735 /// group the values together before passing them as a bundle to a function.
736 struct RelativeLocMapping {
737 /// The IRSimilarityCandidate that holds the instruction the relative
738 /// location was pulled from.
739 const IRSimilarityCandidate &IRSC;
740
741 /// The relative location to be analyzed.
742 int RelativeLocation;
743
744 /// The corresponding value.
745 Value *OperVal;
746 };
747
748 /// Compare the operands in \p A and \p B and check that the current mapping
749 /// of global value numbers from \p A to \p B and \p B to \A is consistent.
750 ///
751 /// \param A - The first IRInstructionCandidate, operand values, and current
752 /// operand mappings to compare.
753 /// \param B - The second IRInstructionCandidate, operand values, and current
754 /// operand mappings to compare.
755 /// \returns true if the IRSimilarityCandidates operands are compatible.
756 static bool compareNonCommutativeOperandMapping(OperandMapping A,
757 OperandMapping B);
758
759 /// Compare the operands in \p A and \p B and check that the current mapping
760 /// of global value numbers from \p A to \p B and \p B to \A is consistent
761 /// given that the operands are commutative.
762 ///
763 /// \param A - The first IRInstructionCandidate, operand values, and current
764 /// operand mappings to compare.
765 /// \param B - The second IRInstructionCandidate, operand values, and current
766 /// operand mappings to compare.
767 /// \returns true if the IRSimilarityCandidates operands are compatible.
768 static bool compareCommutativeOperandMapping(OperandMapping A,
769 OperandMapping B);
770
771 /// Compare the GVN of the assignment value in corresponding instructions in
772 /// IRSimilarityCandidates \p A and \p B and check that there exists a mapping
773 /// between the values and replaces the mapping with a one-to-one value if
774 /// needed.
775 ///
776 /// \param InstValA - The assignment GVN from the first IRSimilarityCandidate.
777 /// \param InstValB - The assignment GVN from the second
778 /// IRSimilarityCandidate.
779 /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
780 /// candidate \p A to candidate \B.
781 /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
782 /// candidate \p B to candidate \A.
783 /// \returns true if the IRSimilarityCandidates assignments are compatible.
784 static bool compareAssignmentMapping(
785 const unsigned InstValA, const unsigned &InstValB,
786 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
787 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
788
789 /// Compare the relative locations in \p A and \p B and check that the
790 /// distances match if both locations are contained in the region, and that
791 /// the branches both point outside the region if they do not.
792 /// Example Region:
793 /// \code
794 /// entry:
795 /// br i1 %0, label %block_1, label %block_3
796 /// block_0:
797 /// br i1 %0, label %block_1, label %block_2
798 /// block_1:
799 /// br i1 %0, label %block_2, label %block_3
800 /// block_2:
801 /// br i1 %1, label %block_1, label %block_4
802 /// block_3:
803 /// br i1 %2, label %block_2, label %block_5
804 /// \endcode
805 /// If we compare the branches in block_0 and block_1 the relative values are
806 /// 1 and 2 for both, so we consider this a match.
807 ///
808 /// If we compare the branches in entry and block_0 the relative values are
809 /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not
810 /// consider them a match.
811 ///
812 /// If we compare the branches in block_1 and block_2 the relative values are
813 /// 1 and 2, and -1 and None respectively. As a result we do not consider
814 /// these to be the same
815 ///
816 /// If we compare the branches in block_2 and block_3 the relative values are
817 /// -1 and None for both. We do consider these to be a match.
818 ///
819 /// \param A - The first IRInstructionCandidate, relative location value,
820 /// and incoming block.
821 /// \param B - The second IRInstructionCandidate, relative location value,
822 /// and incoming block.
823 /// \returns true if the relative locations match.
824 static bool checkRelativeLocations(RelativeLocMapping A,
825 RelativeLocMapping B);
826
827 /// Create a mapping from the value numbering to a different separate set of
828 /// numbers. This will serve as a guide for relating one candidate to another.
829 /// The canonical number gives use the ability identify which global value
830 /// number in one candidate relates to the global value number in the other.
831 ///
832 /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a
833 /// canonical numbering for.
834 static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand);
835
836 /// Create a mapping for the value numbering of the calling
837 /// IRSimilarityCandidate, to a different separate set of numbers, based on
838 /// the canonical ordering in \p SourceCand. These are defined based on the
839 /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of
840 /// these relationships should have the same information, just in opposite
841 /// directions.
842 ///
843 /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
844 /// canonical numbering from.
845 /// \param ToSourceMapping - The mapping of value numbers from this candidate
846 /// to \p SourceCand.
847 /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
848 /// to this candidate.
849 void createCanonicalRelationFrom(
850 IRSimilarityCandidate &SourceCand,
851 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
852 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
853
854 /// Create a mapping for the value numbering of the calling
855 /// IRSimilarityCandidate, to a different separate set of numbers, based on
856 /// the canonical ordering in \p SourceCand. These are defined based on the
857 /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of
858 /// these relationships should have the same information, just in opposite
859 /// directions. Uses the \p OneToOne mapping from target candidate to \p
860 /// SourceCand GVNs to determine the mapping first for values with multiple
861 /// mappings. This mapping is created by the ordering of operands in the
862 /// instruction they are first seen in the candidates.
863 ///
864 /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
865 /// canonical numbering from.
866 /// \param [in,out] OneToOne - A mapping of value numbers from candidate
867 /// \p A to candidate \B using the structure of the original instructions.
868 /// \param ToSourceMapping - The mapping of value numbers from this candidate
869 /// to \p SourceCand.
870 /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
871 /// to this candidate.
872 void createCanonicalRelationFrom(
873 IRSimilarityCandidate &SourceCand,
874 DenseMap<unsigned, unsigned> &OneToOne,
875 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
876 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
877
878 /// Create a mapping for the value numbering of the calling
879 /// IRSimilarityCandidate, to a different separate set of numbers, based on
880 /// the canonical ordering in \p SourceCand. These are defined based on the
881 /// canonical mapping defined between \p SoureCandLarge and
882 /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally
883 /// similar, and fully encapsulate the IRSimilarityCandidates in question.
884 /// These are used as a "bridge" from the \p SourceCand to the target.
885 ///
886 /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
887 /// canonical numbering from.
888 /// \param SoureCandLarge - The IRSimilarityCandidate fully containing
889 /// \p SourceCand.
890 /// \param TargetCandLarge - The IRSimilarityCandidate fully containing
891 /// this Candidate.
892 void createCanonicalRelationFrom(
893 IRSimilarityCandidate &SourceCand,
894 IRSimilarityCandidate &SourceCandLarge,
895 IRSimilarityCandidate &TargetCandLarge);
896
897 /// \param [in,out] BBSet - The set to track the basic blocks.
898 void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
899 for (IRInstructionData &ID : *this) {
900 BasicBlock *BB = ID.Inst->getParent();
901 BBSet.insert(V: BB);
902 }
903 }
904
905 /// \param [in,out] BBSet - The set to track the basic blocks.
906 /// \param [in,out] BBList - A list in order of use to track the basic blocks.
907 void getBasicBlocks(DenseSet<BasicBlock *> &BBSet,
908 SmallVector<BasicBlock *> &BBList) const {
909 for (IRInstructionData &ID : *this) {
910 BasicBlock *BB = ID.Inst->getParent();
911 if (BBSet.insert(V: BB).second)
912 BBList.push_back(Elt: BB);
913 }
914 }
915
916 /// Compare the start and end indices of the two IRSimilarityCandidates for
917 /// whether they overlap. If the start instruction of one
918 /// IRSimilarityCandidate is less than the end instruction of the other, and
919 /// the start instruction of one is greater than the start instruction of the
920 /// other, they overlap.
921 ///
922 /// \returns true if the IRSimilarityCandidates do not have overlapping
923 /// instructions.
924 static bool overlap(const IRSimilarityCandidate &A,
925 const IRSimilarityCandidate &B);
926
927 /// \returns the number of instructions in this Candidate.
928 unsigned getLength() const { return Len; }
929
930 /// \returns the start index of this IRSimilarityCandidate.
931 unsigned getStartIdx() const { return StartIdx; }
932
933 /// \returns the end index of this IRSimilarityCandidate.
934 unsigned getEndIdx() const { return StartIdx + Len - 1; }
935
936 /// \returns The first IRInstructionData.
937 IRInstructionData *front() const { return FirstInst; }
938 /// \returns The last IRInstructionData.
939 IRInstructionData *back() const { return LastInst; }
940
941 /// \returns The first Instruction.
942 Instruction *frontInstruction() { return FirstInst->Inst; }
943 /// \returns The last Instruction
944 Instruction *backInstruction() { return LastInst->Inst; }
945
946 /// \returns The BasicBlock the IRSimilarityCandidate starts in.
947 BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
948 /// \returns The BasicBlock the IRSimilarityCandidate ends in.
949 BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
950
951 /// \returns The Function that the IRSimilarityCandidate is located in.
952 Function *getFunction() { return getStartBB()->getParent(); }
953
954 /// Finds the positive number associated with \p V if it has been mapped.
955 /// \param [in] V - the Value to find.
956 /// \returns The positive number corresponding to the value.
957 /// \returns std::nullopt if not present.
958 std::optional<unsigned> getGVN(Value *V) {
959 assert(V != nullptr && "Value is a nullptr?");
960 DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(Val: V);
961 if (VNIt == ValueToNumber.end())
962 return std::nullopt;
963 return VNIt->second;
964 }
965
966 /// Finds the Value associate with \p Num if it exists.
967 /// \param [in] Num - the number to find.
968 /// \returns The Value associated with the number.
969 /// \returns std::nullopt if not present.
970 std::optional<Value *> fromGVN(unsigned Num) {
971 DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Val: Num);
972 if (VNIt == NumberToValue.end())
973 return std::nullopt;
974 assert(VNIt->second != nullptr && "Found value is a nullptr!");
975 return VNIt->second;
976 }
977
978 /// Find the canonical number from the global value number \p N stored in the
979 /// candidate.
980 ///
981 /// \param N - The global value number to find the canonical number for.
982 /// \returns An optional containing the value, and std::nullopt if it could
983 /// not be found.
984 std::optional<unsigned> getCanonicalNum(unsigned N) {
985 DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(Val: N);
986 if (NCIt == NumberToCanonNum.end())
987 return std::nullopt;
988 return NCIt->second;
989 }
990
991 /// Find the global value number from the canonical number \p N stored in the
992 /// candidate.
993 ///
994 /// \param N - The canonical number to find the global vlaue number for.
995 /// \returns An optional containing the value, and std::nullopt if it could
996 /// not be found.
997 std::optional<unsigned> fromCanonicalNum(unsigned N) {
998 DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(Val: N);
999 if (CNIt == CanonNumToNumber.end())
1000 return std::nullopt;
1001 return CNIt->second;
1002 }
1003
1004 /// \param RHS -The IRSimilarityCandidate to compare against
1005 /// \returns true if the IRSimilarityCandidate is occurs after the
1006 /// IRSimilarityCandidate in the program.
1007 bool operator<(const IRSimilarityCandidate &RHS) const {
1008 return getStartIdx() > RHS.getStartIdx();
1009 }
1010
1011 using iterator = IRInstructionDataList::iterator;
1012 iterator begin() const { return iterator(front()); }
1013 iterator end() const { return std::next(x: iterator(back())); }
1014};
1015
1016typedef DenseMap<IRSimilarityCandidate *,
1017 DenseMap<unsigned, DenseSet<unsigned>>>
1018 CandidateGVNMapping;
1019typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
1020typedef std::vector<SimilarityGroup> SimilarityGroupList;
1021
1022/// This class puts all the pieces of the IRInstructionData,
1023/// IRInstructionMapper, IRSimilarityCandidate together.
1024///
1025/// It first feeds the Module or vector of Modules into the IRInstructionMapper,
1026/// and puts all the mapped instructions into a single long list of
1027/// IRInstructionData.
1028///
1029/// The list of unsigned integers is given to the Suffix Tree or similar data
1030/// structure to find repeated subsequences. We construct an
1031/// IRSimilarityCandidate for each instance of the subsequence. We compare them
1032/// against one another since These repeated subsequences can have different
1033/// structure. For each different kind of structure found, we create a
1034/// similarity group.
1035///
1036/// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are
1037/// structurally similar to one another, while C is different we would have two
1038/// SimilarityGroups:
1039///
1040/// SimilarityGroup 1: SimilarityGroup 2
1041/// A, B, D C
1042///
1043/// A list of the different similarity groups is then returned after
1044/// analyzing the module.
1045class IRSimilarityIdentifier {
1046public:
1047 IRSimilarityIdentifier(bool MatchBranches = true,
1048 bool MatchIndirectCalls = true,
1049 bool MatchCallsWithName = false,
1050 bool MatchIntrinsics = true,
1051 bool MatchMustTailCalls = true)
1052 : Mapper(&InstDataAllocator, &InstDataListAllocator),
1053 EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
1054 EnableMatchingCallsByName(MatchCallsWithName),
1055 EnableIntrinsics(MatchIntrinsics),
1056 EnableMustTailCalls(MatchMustTailCalls) {}
1057
1058private:
1059 /// Map the instructions in the module to unsigned integers, using mapping
1060 /// already present in the Mapper if possible.
1061 ///
1062 /// \param [in] M Module - To map to integers.
1063 /// \param [in,out] InstrList - The vector to append IRInstructionData to.
1064 /// \param [in,out] IntegerMapping - The vector to append integers to.
1065 void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList,
1066 std::vector<unsigned> &IntegerMapping);
1067
1068 /// Map the instructions in the modules vector to unsigned integers, using
1069 /// mapping already present in the mapper if possible.
1070 ///
1071 /// \param [in] Modules - The list of modules to use to populate the mapper
1072 /// \param [in,out] InstrList - The vector to append IRInstructionData to.
1073 /// \param [in,out] IntegerMapping - The vector to append integers to.
1074 void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules,
1075 std::vector<IRInstructionData *> &InstrList,
1076 std::vector<unsigned> &IntegerMapping);
1077
1078 /// Find the similarity candidates in \p InstrList and corresponding
1079 /// \p UnsignedVec
1080 ///
1081 /// \param [in,out] InstrList - The vector to append IRInstructionData to.
1082 /// \param [in,out] IntegerMapping - The vector to append integers to.
1083 /// candidates found in the program.
1084 void findCandidates(std::vector<IRInstructionData *> &InstrList,
1085 std::vector<unsigned> &IntegerMapping);
1086
1087public:
1088 // Find the IRSimilarityCandidates in the \p Modules and group by structural
1089 // similarity in a SimilarityGroup, each group is returned in a
1090 // SimilarityGroupList.
1091 //
1092 // \param [in] Modules - the modules to analyze.
1093 // \returns The groups of similarity ranges found in the modules.
1094 SimilarityGroupList &
1095 findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules);
1096
1097 // Find the IRSimilarityCandidates in the given Module grouped by structural
1098 // similarity in a SimilarityGroup, contained inside a SimilarityGroupList.
1099 //
1100 // \param [in] M - the module to analyze.
1101 // \returns The groups of similarity ranges found in the module.
1102 SimilarityGroupList &findSimilarity(Module &M);
1103
1104 // Clears \ref SimilarityCandidates if it is already filled by a previous run.
1105 void resetSimilarityCandidates() {
1106 // If we've already analyzed a Module or set of Modules, so we must clear
1107 // the SimilarityCandidates to make sure we do not have only old values
1108 // hanging around.
1109 if (SimilarityCandidates)
1110 SimilarityCandidates->clear();
1111 else
1112 SimilarityCandidates = SimilarityGroupList();
1113 }
1114
1115 // \returns The groups of similarity ranges found in the most recently passed
1116 // set of modules.
1117 std::optional<SimilarityGroupList> &getSimilarity() {
1118 return SimilarityCandidates;
1119 }
1120
1121private:
1122 /// The allocator for IRInstructionData.
1123 SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
1124
1125 /// The allocator for IRInstructionDataLists.
1126 SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator;
1127
1128 /// Map Instructions to unsigned integers and wraps the Instruction in an
1129 /// instance of IRInstructionData.
1130 IRInstructionMapper Mapper;
1131
1132 /// The flag variable that marks whether we should check branches for
1133 /// similarity, or only look within basic blocks.
1134 bool EnableBranches = true;
1135
1136 /// The flag variable that marks whether we allow indirect calls to be checked
1137 /// for similarity, or exclude them as a legal instruction.
1138 bool EnableIndirectCalls = true;
1139
1140 /// The flag variable that marks whether we allow calls to be marked as
1141 /// similar if they do not have the same name, only the same calling
1142 /// convention, attributes and type signature.
1143 bool EnableMatchingCallsByName = true;
1144
1145 /// The flag variable that marks whether we should check intrinsics for
1146 /// similarity.
1147 bool EnableIntrinsics = true;
1148
1149 // The flag variable that marks whether we should allow tailcalls
1150 // to be checked for similarity.
1151 bool EnableMustTailCalls = false;
1152
1153 /// The SimilarityGroups found with the most recent run of \ref
1154 /// findSimilarity. std::nullopt if there is no recent run.
1155 std::optional<SimilarityGroupList> SimilarityCandidates;
1156};
1157
1158} // end namespace IRSimilarity
1159
1160/// An analysis pass based on legacy pass manager that runs and returns
1161/// IRSimilarityIdentifier run on the Module.
1162class IRSimilarityIdentifierWrapperPass : public ModulePass {
1163 std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI;
1164
1165public:
1166 static char ID;
1167 IRSimilarityIdentifierWrapperPass();
1168
1169 IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; }
1170 const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; }
1171
1172 bool doInitialization(Module &M) override;
1173 bool doFinalization(Module &M) override;
1174 bool runOnModule(Module &M) override;
1175 void getAnalysisUsage(AnalysisUsage &AU) const override {
1176 AU.setPreservesAll();
1177 }
1178};
1179
1180/// An analysis pass that runs and returns the IRSimilarityIdentifier run on the
1181/// Module.
1182class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> {
1183public:
1184 typedef IRSimilarity::IRSimilarityIdentifier Result;
1185
1186 Result run(Module &M, ModuleAnalysisManager &);
1187
1188private:
1189 friend AnalysisInfoMixin<IRSimilarityAnalysis>;
1190 static AnalysisKey Key;
1191};
1192
1193/// Printer pass that uses \c IRSimilarityAnalysis.
1194class IRSimilarityAnalysisPrinterPass
1195 : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> {
1196 raw_ostream &OS;
1197
1198public:
1199 explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
1200 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
1201 static bool isRequired() { return true; }
1202};
1203
1204} // end namespace llvm
1205
1206#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
1207

source code of llvm/include/llvm/Analysis/IRSimilarityIdentifier.h