1//===-- IRMutator.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/FuzzMutate/IRMutator.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SmallSet.h"
12#include "llvm/Analysis/TargetLibraryInfo.h"
13#include "llvm/Bitcode/BitcodeReader.h"
14#include "llvm/Bitcode/BitcodeWriter.h"
15#include "llvm/FuzzMutate/Operations.h"
16#include "llvm/FuzzMutate/Random.h"
17#include "llvm/FuzzMutate/RandomIRBuilder.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/FMF.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/InstIterator.h"
22#include "llvm/IR/Instructions.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/Operator.h"
25#include "llvm/IR/Verifier.h"
26#include "llvm/Support/MemoryBuffer.h"
27#include "llvm/Support/SourceMgr.h"
28#include "llvm/Transforms/Scalar/DCE.h"
29#include "llvm/Transforms/Utils/BasicBlockUtils.h"
30#include <map>
31#include <optional>
32
33using namespace llvm;
34
35void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
36 auto RS = makeSampler<Function *>(RandGen&: IB.Rand);
37 for (Function &F : M)
38 if (!F.isDeclaration())
39 RS.sample(Item: &F, /*Weight=*/1);
40
41 while (RS.totalWeight() < IB.MinFunctionNum) {
42 Function *F = IB.createFunctionDefinition(M);
43 RS.sample(Item: F, /*Weight=*/1);
44 }
45 mutate(F&: *RS.getSelection(), IB);
46}
47
48void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
49 auto Range = make_filter_range(Range: make_pointer_range(Range&: F),
50 Pred: [](BasicBlock *BB) { return !BB->isEHPad(); });
51
52 mutate(BB&: *makeSampler(RandGen&: IB.Rand, Items&: Range).getSelection(), IB);
53}
54
55void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
56 mutate(I&: *makeSampler(RandGen&: IB.Rand, Items: make_pointer_range(Range&: BB)).getSelection(), IB);
57}
58
59size_t llvm::IRMutator::getModuleSize(const Module &M) {
60 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
61}
62
63void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
64 std::vector<Type *> Types;
65 for (const auto &Getter : AllowedTypes)
66 Types.push_back(x: Getter(M.getContext()));
67 RandomIRBuilder IB(Seed, Types);
68
69 size_t CurSize = IRMutator::getModuleSize(M);
70 auto RS = makeSampler<IRMutationStrategy *>(RandGen&: IB.Rand);
71 for (const auto &Strategy : Strategies)
72 RS.sample(Item: Strategy.get(),
73 Weight: Strategy->getWeight(CurrentSize: CurSize, MaxSize, CurrentWeight: RS.totalWeight()));
74 if (RS.totalWeight() == 0)
75 return;
76 auto Strategy = RS.getSelection();
77
78 Strategy->mutate(M, IB);
79}
80
81static void eliminateDeadCode(Function &F) {
82 FunctionPassManager FPM;
83 FPM.addPass(Pass: DCEPass());
84 FunctionAnalysisManager FAM;
85 FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(); });
86 FAM.registerPass(PassBuilder: [&] { return PassInstrumentationAnalysis(); });
87 FPM.run(IR&: F, AM&: FAM);
88}
89
90void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
91 IRMutationStrategy::mutate(F, IB);
92 eliminateDeadCode(F);
93}
94
95std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
96 std::vector<fuzzerop::OpDescriptor> Ops;
97 describeFuzzerIntOps(Ops);
98 describeFuzzerFloatOps(Ops);
99 describeFuzzerControlFlowOps(Ops);
100 describeFuzzerPointerOps(Ops);
101 describeFuzzerAggregateOps(Ops);
102 describeFuzzerVectorOps(Ops);
103 return Ops;
104}
105
106std::optional<fuzzerop::OpDescriptor>
107InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
108 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
109 return Op.SourcePreds[0].matches(Cur: {}, New: Src);
110 };
111 auto RS = makeSampler(RandGen&: IB.Rand, Items: make_filter_range(Range&: Operations, Pred: OpMatchesPred));
112 if (RS.isEmpty())
113 return std::nullopt;
114 return *RS;
115}
116
117static inline iterator_range<BasicBlock::iterator>
118getInsertionRange(BasicBlock &BB) {
119 auto End = BB.getTerminatingMustTailCall() ? std::prev(x: BB.end()) : BB.end();
120 return make_range(x: BB.getFirstInsertionPt(), y: End);
121}
122
123void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
124 SmallVector<Instruction *, 32> Insts;
125 for (Instruction &I : getInsertionRange(BB))
126 Insts.push_back(Elt: &I);
127 if (Insts.size() < 1)
128 return;
129
130 // Choose an insertion point for our new instruction.
131 size_t IP = uniform<size_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
132
133 auto InstsBefore = ArrayRef(Insts).slice(N: 0, M: IP);
134 auto InstsAfter = ArrayRef(Insts).slice(N: IP);
135
136 // Choose a source, which will be used to constrain the operation selection.
137 SmallVector<Value *, 2> Srcs;
138 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore));
139
140 // Choose an operation that's constrained to be valid for the type of the
141 // source, collect any other sources it needs, and then build it.
142 auto OpDesc = chooseOperation(Src: Srcs[0], IB);
143 // Bail if no operation was found
144 if (!OpDesc)
145 return;
146
147 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(N: 1))
148 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore, Srcs, Pred));
149
150 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
151 // Find a sink and wire up the results of the operation.
152 IB.connectToSink(BB, Insts: InstsAfter, V: Op);
153 }
154}
155
156uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
157 uint64_t CurrentWeight) {
158 // If we have less than 200 bytes, panic and try to always delete.
159 if (CurrentSize > MaxSize - 200)
160 return CurrentWeight ? CurrentWeight * 100 : 1;
161 // Draw a line starting from when we only have 1k left and increasing linearly
162 // to double the current weight.
163 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
164 (static_cast<int64_t>(MaxSize) -
165 static_cast<int64_t>(CurrentSize) - 1000) /
166 1000;
167 // Clamp negative weights to zero.
168 if (Line < 0)
169 return 0;
170 return Line;
171}
172
173void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
174 auto RS = makeSampler<Instruction *>(RandGen&: IB.Rand);
175 for (Instruction &Inst : instructions(F)) {
176 // TODO: We can't handle these instructions.
177 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
178 isa<PHINode>(Val: Inst))
179 continue;
180
181 RS.sample(Item: &Inst, /*Weight=*/1);
182 }
183 if (RS.isEmpty())
184 return;
185
186 // Delete the instruction.
187 mutate(Inst&: *RS.getSelection(), IB);
188 // Clean up any dead code that's left over after removing the instruction.
189 eliminateDeadCode(F);
190}
191
192void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
193 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
194
195 if (Inst.getType()->isVoidTy()) {
196 // Instructions with void type (ie, store) have no uses to worry about. Just
197 // erase it and move on.
198 Inst.eraseFromParent();
199 return;
200 }
201
202 // Otherwise we need to find some other value with the right type to keep the
203 // users happy.
204 auto Pred = fuzzerop::onlyType(Only: Inst.getType());
205 auto RS = makeSampler<Value *>(RandGen&: IB.Rand);
206 SmallVector<Instruction *, 32> InstsBefore;
207 BasicBlock *BB = Inst.getParent();
208 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
209 ++I) {
210 if (Pred.matches(Cur: {}, New: &*I))
211 RS.sample(Item: &*I, /*Weight=*/1);
212 InstsBefore.push_back(Elt: &*I);
213 }
214 if (!RS)
215 RS.sample(Item: IB.newSource(BB&: *BB, Insts: InstsBefore, Srcs: {}, Pred), /*Weight=*/1);
216
217 Inst.replaceAllUsesWith(V: RS.getSelection());
218 Inst.eraseFromParent();
219}
220
221void InstModificationIRStrategy::mutate(Instruction &Inst,
222 RandomIRBuilder &IB) {
223 SmallVector<std::function<void()>, 8> Modifications;
224 CmpInst *CI = nullptr;
225 GetElementPtrInst *GEP = nullptr;
226 switch (Inst.getOpcode()) {
227 default:
228 break;
229 // Add nsw, nuw flag
230 case Instruction::Add:
231 case Instruction::Mul:
232 case Instruction::Sub:
233 case Instruction::Shl:
234 Modifications.push_back(
235 Elt: [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
236 Modifications.push_back(
237 Elt: [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
238 break;
239 case Instruction::ICmp:
240 CI = cast<ICmpInst>(Val: &Inst);
241 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
242 p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
243 Modifications.push_back(
244 Elt: [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
245 }
246 break;
247 // Add inbound flag.
248 case Instruction::GetElementPtr:
249 GEP = cast<GetElementPtrInst>(Val: &Inst);
250 Modifications.push_back(
251 Elt: [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
252 break;
253 // Add exact flag.
254 case Instruction::UDiv:
255 case Instruction::SDiv:
256 case Instruction::LShr:
257 case Instruction::AShr:
258 Modifications.push_back(Elt: [&Inst] { Inst.setIsExact(!Inst.isExact()); });
259 break;
260
261 case Instruction::FCmp:
262 CI = cast<FCmpInst>(Val: &Inst);
263 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
264 p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
265 Modifications.push_back(
266 Elt: [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
267 }
268 break;
269 }
270
271 // Add fast math flag if possible.
272 if (isa<FPMathOperator>(Val: &Inst)) {
273 // Try setting everything unless they are already on.
274 Modifications.push_back(
275 Elt: [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
276 // Try unsetting everything unless they are already off.
277 Modifications.push_back(
278 Elt: [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
279 // Individual setting by flipping the bit
280 Modifications.push_back(
281 Elt: [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
282 Modifications.push_back(Elt: [&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
283 Modifications.push_back(Elt: [&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
284 Modifications.push_back(
285 Elt: [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
286 Modifications.push_back(
287 Elt: [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
288 Modifications.push_back(
289 Elt: [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
290 Modifications.push_back(
291 Elt: [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
292 }
293
294 // Randomly switch operands of instructions
295 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
296 switch (Inst.getOpcode()) {
297 case Instruction::SDiv:
298 case Instruction::UDiv:
299 case Instruction::SRem:
300 case Instruction::URem:
301 case Instruction::FDiv:
302 case Instruction::FRem: {
303 // Verify that the after shuffle the second operand is not
304 // constant 0.
305 Value *Operand = Inst.getOperand(i: 0);
306 if (Constant *C = dyn_cast<Constant>(Val: Operand)) {
307 if (!C->isZeroValue()) {
308 ShuffleItems = {0, 1};
309 }
310 }
311 break;
312 }
313 case Instruction::Select:
314 ShuffleItems = {1, 2};
315 break;
316 case Instruction::Add:
317 case Instruction::Sub:
318 case Instruction::Mul:
319 case Instruction::Shl:
320 case Instruction::LShr:
321 case Instruction::AShr:
322 case Instruction::And:
323 case Instruction::Or:
324 case Instruction::Xor:
325 case Instruction::FAdd:
326 case Instruction::FSub:
327 case Instruction::FMul:
328 case Instruction::ICmp:
329 case Instruction::FCmp:
330 case Instruction::ShuffleVector:
331 ShuffleItems = {0, 1};
332 break;
333 }
334 if (ShuffleItems != NoneItem) {
335 Modifications.push_back(Elt: [&Inst, &ShuffleItems]() {
336 Value *Op0 = Inst.getOperand(i: ShuffleItems.first);
337 Inst.setOperand(i: ShuffleItems.first, Val: Inst.getOperand(i: ShuffleItems.second));
338 Inst.setOperand(i: ShuffleItems.second, Val: Op0);
339 });
340 }
341
342 auto RS = makeSampler(RandGen&: IB.Rand, Items&: Modifications);
343 if (RS)
344 RS.getSelection()();
345}
346
347/// Return a case value that is not already taken to make sure we don't have two
348/// cases with same value.
349static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
350 uint64_t MaxValue, RandomIRBuilder &IB) {
351 uint64_t tmp;
352 do {
353 tmp = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: MaxValue);
354 } while (CasesTaken.count(V: tmp) != 0);
355 CasesTaken.insert(V: tmp);
356 return tmp;
357}
358
359void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
360 Module *M = BB.getParent()->getParent();
361 // If nullptr is selected, we will create a new function declaration.
362 SmallVector<Function *, 32> Functions({nullptr});
363 for (Function &F : M->functions()) {
364 Functions.push_back(Elt: &F);
365 }
366
367 auto RS = makeSampler(RandGen&: IB.Rand, Items&: Functions);
368 Function *F = RS.getSelection();
369 // Some functions accept metadata type or token type as arguments.
370 // We don't call those functions for now.
371 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
372 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
373 auto IsUnsupportedTy = [](Type *T) {
374 return T->isMetadataTy() || T->isTokenTy();
375 };
376 if (!F || IsUnsupportedTy(F->getReturnType()) ||
377 any_of(Range: F->getFunctionType()->params(), P: IsUnsupportedTy)) {
378 F = IB.createFunctionDeclaration(M&: *M);
379 }
380
381 FunctionType *FTy = F->getFunctionType();
382 SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
383 if (!F->arg_empty()) {
384 for (Type *ArgTy : FTy->params()) {
385 SourcePreds.push_back(Elt: fuzzerop::onlyType(Only: ArgTy));
386 }
387 }
388 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(C&: M->getContext()));
389 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
390 Instruction *Inst) {
391 StringRef Name = isRetVoid ? nullptr : "C";
392 CallInst *Call = CallInst::Create(Ty: FTy, Func: F, Args: Srcs, NameStr: Name, InsertBefore: Inst);
393 // Don't return this call inst if it return void as it can't be sinked.
394 return isRetVoid ? nullptr : Call;
395 };
396
397 SmallVector<Instruction *, 32> Insts;
398 for (Instruction &I : getInsertionRange(BB))
399 Insts.push_back(Elt: &I);
400 if (Insts.size() < 1)
401 return;
402
403 // Choose an insertion point for our new call instruction.
404 uint64_t IP = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
405
406 auto InstsBefore = ArrayRef(Insts).slice(N: 0, M: IP);
407 auto InstsAfter = ArrayRef(Insts).slice(N: IP);
408
409 // Choose a source, which will be used to constrain the operation selection.
410 SmallVector<Value *, 2> Srcs;
411
412 for (const auto &Pred : ArrayRef(SourcePreds)) {
413 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore, Srcs, Pred));
414 }
415
416 if (Value *Op = BuilderFunc(Srcs, Insts[IP])) {
417 // Find a sink and wire up the results of the operation.
418 IB.connectToSink(BB, Insts: InstsAfter, V: Op);
419 }
420}
421
422void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
423 SmallVector<Instruction *, 32> Insts;
424 for (Instruction &I : getInsertionRange(BB))
425 Insts.push_back(Elt: &I);
426 if (Insts.size() < 1)
427 return;
428
429 // Choose a point where we split the block.
430 uint64_t IP = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
431 auto InstsBeforeSplit = ArrayRef(Insts).slice(N: 0, M: IP);
432
433 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
434 // directly jumps to `Sink`. Here, we have to create a new terminator for
435 // `Source`.
436 BasicBlock *Block = Insts[IP]->getParent();
437 BasicBlock *Source = Block;
438 BasicBlock *Sink = Block->splitBasicBlock(I: Insts[IP], BBName: "BB");
439
440 Function *F = BB.getParent();
441 LLVMContext &C = F->getParent()->getContext();
442 // A coin decides if it is branch or switch
443 if (uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: 1)) {
444 // Branch
445 BasicBlock *IfTrue = BasicBlock::Create(Context&: C, Name: "T", Parent: F);
446 BasicBlock *IfFalse = BasicBlock::Create(Context&: C, Name: "F", Parent: F);
447 Value *Cond =
448 IB.findOrCreateSource(BB&: *Source, Insts: InstsBeforeSplit, Srcs: {},
449 Pred: fuzzerop::onlyType(Only: Type::getInt1Ty(C)), allowConstant: false);
450 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
451 // Remove the old terminator.
452 ReplaceInstWithInst(From: Source->getTerminator(), To: Branch);
453 // Connect these blocks to `Sink`
454 connectBlocksToSink(Blocks: {IfTrue, IfFalse}, Sink, IB);
455 } else {
456 // Switch
457 // Determine Integer type, it IS possible we use a boolean to switch.
458 auto RS =
459 makeSampler(RandGen&: IB.Rand, Items: make_filter_range(Range&: IB.KnownTypes, Pred: [](Type *Ty) {
460 return Ty->isIntegerTy();
461 }));
462 assert(RS && "There is no integer type in all allowed types, is the "
463 "setting correct?");
464 Type *Ty = RS.getSelection();
465 IntegerType *IntTy = cast<IntegerType>(Val: Ty);
466
467 uint64_t BitSize = IntTy->getBitWidth();
468 uint64_t MaxCaseVal =
469 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
470 // Create Switch inst in Block
471 Value *Cond = IB.findOrCreateSource(BB&: *Source, Insts: InstsBeforeSplit, Srcs: {},
472 Pred: fuzzerop::onlyType(Only: IntTy), allowConstant: false);
473 BasicBlock *DefaultBlock = BasicBlock::Create(Context&: C, Name: "SW_D", Parent: F);
474 uint64_t NumCases = uniform<uint64_t>(Gen&: IB.Rand, Min: 1, Max: MaxNumCases);
475 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
476 SwitchInst *Switch = SwitchInst::Create(Value: Cond, Default: DefaultBlock, NumCases);
477 // Remove the old terminator.
478 ReplaceInstWithInst(From: Source->getTerminator(), To: Switch);
479
480 // Create blocks, for each block assign a case value.
481 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
482 SmallSet<uint64_t, 4> CasesTaken;
483 for (uint64_t i = 0; i < NumCases; i++) {
484 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxValue: MaxCaseVal, IB);
485 BasicBlock *CaseBlock = BasicBlock::Create(Context&: C, Name: "SW_C", Parent: F);
486 ConstantInt *OnValue = ConstantInt::get(Ty: IntTy, V: CaseVal);
487 Switch->addCase(OnVal: OnValue, Dest: CaseBlock);
488 Blocks.push_back(Elt: CaseBlock);
489 }
490
491 // Connect these blocks to `Sink`
492 connectBlocksToSink(Blocks, Sink, IB);
493 }
494}
495
496/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
497/// even have terminator.
498void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
499 BasicBlock *Sink,
500 RandomIRBuilder &IB) {
501 uint64_t DirectSinkIdx = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Blocks.size() - 1);
502 for (uint64_t i = 0; i < Blocks.size(); i++) {
503 // We have at least one block that directly goes to sink.
504 CFGToSink ToSink = (i == DirectSinkIdx)
505 ? CFGToSink::DirectSink
506 : static_cast<CFGToSink>(uniform<uint64_t>(
507 Gen&: IB.Rand, Min: 0, Max: CFGToSink::EndOfCFGToLink - 1));
508 BasicBlock *BB = Blocks[i];
509 Function *F = BB->getParent();
510 LLVMContext &C = F->getParent()->getContext();
511 switch (ToSink) {
512 case CFGToSink::Return: {
513 Type *RetTy = F->getReturnType();
514 Value *RetValue = nullptr;
515 if (!RetTy->isVoidTy())
516 RetValue =
517 IB.findOrCreateSource(BB&: *BB, Insts: {}, Srcs: {}, Pred: fuzzerop::onlyType(Only: RetTy));
518 ReturnInst::Create(C, retVal: RetValue, InsertAtEnd: BB);
519 break;
520 }
521 case CFGToSink::DirectSink: {
522 BranchInst::Create(IfTrue: Sink, InsertAtEnd: BB);
523 break;
524 }
525 case CFGToSink::SinkOrSelfLoop: {
526 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
527 // A coin decides which block is true branch.
528 uint64_t coin = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: 1);
529 Value *Cond = IB.findOrCreateSource(
530 BB&: *BB, Insts: {}, Srcs: {}, Pred: fuzzerop::onlyType(Only: Type::getInt1Ty(C)), allowConstant: false);
531 BranchInst::Create(IfTrue: Branches[coin], IfFalse: Branches[1 - coin], Cond, InsertAtEnd: BB);
532 break;
533 }
534 case CFGToSink::EndOfCFGToLink:
535 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
536 }
537 }
538}
539
540void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
541 // Can't insert PHI node to entry node.
542 if (&BB == &BB.getParent()->getEntryBlock())
543 return;
544 Type *Ty = IB.randomType();
545 PHINode *PHI = PHINode::Create(Ty, NumReservedValues: llvm::pred_size(BB: &BB), NameStr: "", InsertBefore: &BB.front());
546
547 // Use a map to make sure the same incoming basic block has the same value.
548 DenseMap<BasicBlock *, Value *> IncomingValues;
549 for (BasicBlock *Pred : predecessors(BB: &BB)) {
550 Value *Src = IncomingValues[Pred];
551 // If `Pred` is not in the map yet, we'll get a nullptr.
552 if (!Src) {
553 SmallVector<Instruction *, 32> Insts;
554 for (auto I = Pred->begin(); I != Pred->end(); ++I)
555 Insts.push_back(Elt: &*I);
556 // There is no need to inform IB what previously used values are if we are
557 // using `onlyType`
558 Src = IB.findOrCreateSource(BB&: *Pred, Insts, Srcs: {}, Pred: fuzzerop::onlyType(Only: Ty));
559 IncomingValues[Pred] = Src;
560 }
561 PHI->addIncoming(V: Src, BB: Pred);
562 }
563 SmallVector<Instruction *, 32> InstsAfter;
564 for (Instruction &I : getInsertionRange(BB))
565 InstsAfter.push_back(Elt: &I);
566 IB.connectToSink(BB, Insts: InstsAfter, V: PHI);
567}
568
569void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
570 for (BasicBlock &BB : F) {
571 this->mutate(BB, IB);
572 }
573}
574void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
575 SmallVector<Instruction *, 32> Insts;
576 for (Instruction &I : getInsertionRange(BB))
577 Insts.push_back(Elt: &I);
578 if (Insts.size() < 1)
579 return;
580 // Choose an Instruction to mutate.
581 uint64_t Idx = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
582 Instruction *Inst = Insts[Idx];
583 // `Idx + 1` so we don't sink to ourselves.
584 auto InstsAfter = ArrayRef(Insts).slice(N: Idx + 1);
585 Type *Ty = Inst->getType();
586 // Don't sink terminators, void function calls, token, etc.
587 if (!Ty->isVoidTy() && !Ty->isTokenTy())
588 // Find a new sink and wire up the results of the operation.
589 IB.connectToSink(BB, Insts: InstsAfter, V: Inst);
590}
591
592void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
593 // A deterministic alternative to SmallPtrSet with the same lookup
594 // performance.
595 std::map<size_t, Instruction *> AliveInsts;
596 std::map<Instruction *, size_t> AliveInstsLookup;
597 size_t InsertIdx = 0;
598 for (auto &I : make_early_inc_range(Range: make_range(
599 x: BB.getFirstInsertionPt(), y: BB.getTerminator()->getIterator()))) {
600 // First gather all instructions that can be shuffled. Don't take
601 // terminator.
602 AliveInsts.insert(x: {InsertIdx, &I});
603 AliveInstsLookup.insert(x: {&I, InsertIdx++});
604 // Then remove these instructions from the block
605 I.removeFromParent();
606 }
607
608 // Shuffle these instructions using topological sort.
609 // Returns false if all current instruction's dependencies in this block have
610 // been shuffled. If so, this instruction can be shuffled too.
611 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
612 for (Value *O : AliveInsts[Index]->operands()) {
613 Instruction *P = dyn_cast<Instruction>(Val: O);
614 if (P && AliveInstsLookup.count(x: P))
615 return true;
616 }
617 return false;
618 };
619 // Get all alive instructions that depend on the current instruction.
620 // Takes Instruction* instead of index because the instruction is already
621 // shuffled.
622 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
623 SmallSetVector<size_t, 8> Children;
624 for (Value *U : I->users()) {
625 Instruction *P = dyn_cast<Instruction>(Val: U);
626 if (P && AliveInstsLookup.count(x: P))
627 Children.insert(X: AliveInstsLookup[P]);
628 }
629 return Children;
630 };
631 SmallSet<size_t, 8> RootIndices;
632 SmallVector<Instruction *, 8> Insts;
633 for (const auto &[Index, Inst] : AliveInsts) {
634 if (!hasAliveParent(Index))
635 RootIndices.insert(V: Index);
636 }
637 // Topological sort by randomly selecting a node without a parent, or root.
638 while (!RootIndices.empty()) {
639 auto RS = makeSampler<size_t>(RandGen&: IB.Rand);
640 for (size_t RootIdx : RootIndices)
641 RS.sample(Item: RootIdx, Weight: 1);
642 size_t RootIdx = RS.getSelection();
643
644 RootIndices.erase(V: RootIdx);
645 Instruction *Root = AliveInsts[RootIdx];
646 AliveInsts.erase(x: RootIdx);
647 AliveInstsLookup.erase(x: Root);
648 Insts.push_back(Elt: Root);
649
650 for (size_t Child : getAliveChildren(Root)) {
651 if (!hasAliveParent(Child)) {
652 RootIndices.insert(V: Child);
653 }
654 }
655 }
656
657 Instruction *Terminator = BB.getTerminator();
658 // Then put instructions back.
659 for (Instruction *I : Insts) {
660 I->insertBefore(InsertPos: Terminator);
661 }
662}
663
664std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
665 LLVMContext &Context) {
666
667 if (Size <= 1)
668 // We get bogus data given an empty corpus - just create a new module.
669 return std::make_unique<Module>(args: "M", args&: Context);
670
671 auto Buffer = MemoryBuffer::getMemBuffer(
672 InputData: StringRef(reinterpret_cast<const char *>(Data), Size), BufferName: "Fuzzer input",
673 /*RequiresNullTerminator=*/false);
674
675 SMDiagnostic Err;
676 auto M = parseBitcodeFile(Buffer: Buffer->getMemBufferRef(), Context);
677 if (Error E = M.takeError()) {
678 errs() << toString(E: std::move(E)) << "\n";
679 return nullptr;
680 }
681 return std::move(M.get());
682}
683
684size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
685 std::string Buf;
686 {
687 raw_string_ostream OS(Buf);
688 WriteBitcodeToFile(M, Out&: OS);
689 }
690 if (Buf.size() > MaxSize)
691 return 0;
692 memcpy(dest: Dest, src: Buf.data(), n: Buf.size());
693 return Buf.size();
694}
695
696std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
697 LLVMContext &Context) {
698 auto M = parseModule(Data, Size, Context);
699 if (!M || verifyModule(M: *M, OS: &errs()))
700 return nullptr;
701
702 return M;
703}
704

source code of llvm/lib/FuzzMutate/IRMutator.cpp