1 | //===-- llvm-mc-disassemble-fuzzer.cpp - Fuzzer for the MC layer ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #include "llvm-c/Disassembler.h" |
12 | #include "llvm-c/Target.h" |
13 | #include "llvm/Support/CommandLine.h" |
14 | #include "llvm/Support/raw_ostream.h" |
15 | #include "llvm/TargetParser/Host.h" |
16 | #include "llvm/TargetParser/SubtargetFeature.h" |
17 | |
18 | using namespace llvm; |
19 | |
20 | const unsigned AssemblyTextBufSize = 80; |
21 | |
22 | static cl::opt<std::string> |
23 | TripleName("triple" , cl::desc("Target triple to assemble for, " |
24 | "see -version for available targets" )); |
25 | |
26 | static cl::opt<std::string> |
27 | MCPU("mcpu" , |
28 | cl::desc("Target a specific cpu type (-mcpu=help for details)" ), |
29 | cl::value_desc("cpu-name" ), cl::init(Val: "" )); |
30 | |
31 | // This is useful for variable-length instruction sets. |
32 | static cl::opt<unsigned> InsnLimit( |
33 | "insn-limit" , |
34 | cl::desc("Limit the number of instructions to process (0 for no limit)" ), |
35 | cl::value_desc("count" ), cl::init(Val: 0)); |
36 | |
37 | static cl::list<std::string> |
38 | MAttrs("mattr" , cl::CommaSeparated, |
39 | cl::desc("Target specific attributes (-mattr=help for details)" ), |
40 | cl::value_desc("a1,+a2,-a3,..." )); |
41 | // The feature string derived from -mattr's values. |
42 | std::string FeaturesStr; |
43 | |
44 | static cl::list<std::string> |
45 | FuzzerArgs("fuzzer-args" , cl::Positional, |
46 | cl::desc("Options to pass to the fuzzer" ), |
47 | cl::PositionalEatsArgs); |
48 | static std::vector<char *> ModifiedArgv; |
49 | |
50 | int DisassembleOneInput(const uint8_t *Data, size_t Size) { |
51 | char AssemblyText[AssemblyTextBufSize]; |
52 | |
53 | std::vector<uint8_t> DataCopy(Data, Data + Size); |
54 | |
55 | LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures( |
56 | Triple: TripleName.c_str(), CPU: MCPU.c_str(), Features: FeaturesStr.c_str(), DisInfo: nullptr, TagType: 0, |
57 | GetOpInfo: nullptr, SymbolLookUp: nullptr); |
58 | assert(Ctx); |
59 | uint8_t *p = DataCopy.data(); |
60 | unsigned Consumed; |
61 | unsigned InstructionsProcessed = 0; |
62 | do { |
63 | Consumed = LLVMDisasmInstruction(DC: Ctx, Bytes: p, BytesSize: Size, PC: 0, OutString: AssemblyText, |
64 | OutStringSize: AssemblyTextBufSize); |
65 | Size -= Consumed; |
66 | p += Consumed; |
67 | |
68 | InstructionsProcessed ++; |
69 | if (InsnLimit != 0 && InstructionsProcessed < InsnLimit) |
70 | break; |
71 | } while (Consumed != 0); |
72 | LLVMDisasmDispose(DC: Ctx); |
73 | return 0; |
74 | } |
75 | |
76 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { |
77 | return DisassembleOneInput(Data, Size); |
78 | } |
79 | |
80 | extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, |
81 | char ***argv) { |
82 | // The command line is unusual compared to other fuzzers due to the need to |
83 | // specify the target. Options like -triple, -mcpu, and -mattr work like |
84 | // their counterparts in llvm-mc, while -fuzzer-args collects options for the |
85 | // fuzzer itself. |
86 | // |
87 | // Examples: |
88 | // |
89 | // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to |
90 | // 4-bytes each and use the contents of ./corpus as the test corpus: |
91 | // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ |
92 | // -fuzzer-args -max_len=4 -runs=100000 ./corpus |
93 | // |
94 | // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA |
95 | // feature enabled using up to 64-byte inputs: |
96 | // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ |
97 | // -disassemble -fuzzer-args ./corpus |
98 | // |
99 | // If your aim is to find instructions that are not tested, then it is |
100 | // advisable to constrain the maximum input size to a single instruction |
101 | // using -max_len as in the first example. This results in a test corpus of |
102 | // individual instructions that test unique paths. Without this constraint, |
103 | // there will be considerable redundancy in the corpus. |
104 | |
105 | char **OriginalArgv = *argv; |
106 | |
107 | LLVMInitializeAllTargetInfos(); |
108 | LLVMInitializeAllTargetMCs(); |
109 | LLVMInitializeAllDisassemblers(); |
110 | |
111 | cl::ParseCommandLineOptions(argc: *argc, argv: OriginalArgv); |
112 | |
113 | // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that |
114 | // the driver can parse its arguments. |
115 | // |
116 | // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. |
117 | // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a |
118 | // non-const buffer to avoid the need to clean up when the fuzzer terminates. |
119 | ModifiedArgv.push_back(x: OriginalArgv[0]); |
120 | for (const auto &FuzzerArg : FuzzerArgs) { |
121 | for (int i = 1; i < *argc; ++i) { |
122 | if (FuzzerArg == OriginalArgv[i]) |
123 | ModifiedArgv.push_back(x: OriginalArgv[i]); |
124 | } |
125 | } |
126 | *argc = ModifiedArgv.size(); |
127 | *argv = ModifiedArgv.data(); |
128 | |
129 | // Package up features to be passed to target/subtarget |
130 | // We have to pass it via a global since the callback doesn't |
131 | // permit any user data. |
132 | if (MAttrs.size()) { |
133 | SubtargetFeatures Features; |
134 | for (unsigned i = 0; i != MAttrs.size(); ++i) |
135 | Features.AddFeature(String: MAttrs[i]); |
136 | FeaturesStr = Features.getString(); |
137 | } |
138 | |
139 | if (TripleName.empty()) |
140 | TripleName = sys::getDefaultTargetTriple(); |
141 | |
142 | return 0; |
143 | } |
144 | |