1 | //===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #include "llvm-c/Target.h" |
12 | #include "llvm/MC/MCAsmBackend.h" |
13 | #include "llvm/MC/MCAsmInfo.h" |
14 | #include "llvm/MC/MCCodeEmitter.h" |
15 | #include "llvm/MC/MCContext.h" |
16 | #include "llvm/MC/MCInstPrinter.h" |
17 | #include "llvm/MC/MCInstrInfo.h" |
18 | #include "llvm/MC/MCObjectFileInfo.h" |
19 | #include "llvm/MC/MCObjectWriter.h" |
20 | #include "llvm/MC/MCParser/AsmLexer.h" |
21 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
22 | #include "llvm/MC/MCRegisterInfo.h" |
23 | #include "llvm/MC/MCSectionMachO.h" |
24 | #include "llvm/MC/MCStreamer.h" |
25 | #include "llvm/MC/MCSubtargetInfo.h" |
26 | #include "llvm/MC/MCTargetOptionsCommandFlags.h" |
27 | #include "llvm/MC/TargetRegistry.h" |
28 | #include "llvm/Support/CommandLine.h" |
29 | #include "llvm/Support/FileUtilities.h" |
30 | #include "llvm/Support/MemoryBuffer.h" |
31 | #include "llvm/Support/SourceMgr.h" |
32 | #include "llvm/Support/TargetSelect.h" |
33 | #include "llvm/Support/ToolOutputFile.h" |
34 | #include "llvm/Support/raw_ostream.h" |
35 | #include "llvm/TargetParser/Host.h" |
36 | #include "llvm/TargetParser/SubtargetFeature.h" |
37 | |
38 | using namespace llvm; |
39 | |
40 | static mc::RegisterMCTargetOptionsFlags MOF; |
41 | |
42 | static cl::opt<std::string> |
43 | TripleName("triple" , cl::desc("Target triple to assemble for, " |
44 | "see -version for available targets" )); |
45 | |
46 | static cl::opt<std::string> |
47 | MCPU("mcpu" , |
48 | cl::desc("Target a specific cpu type (-mcpu=help for details)" ), |
49 | cl::value_desc("cpu-name" ), cl::init(Val: "" )); |
50 | |
51 | // This is useful for variable-length instruction sets. |
52 | static cl::opt<unsigned> InsnLimit( |
53 | "insn-limit" , |
54 | cl::desc("Limit the number of instructions to process (0 for no limit)" ), |
55 | cl::value_desc("count" ), cl::init(Val: 0)); |
56 | |
57 | static cl::list<std::string> |
58 | MAttrs("mattr" , cl::CommaSeparated, |
59 | cl::desc("Target specific attributes (-mattr=help for details)" ), |
60 | cl::value_desc("a1,+a2,-a3,..." )); |
61 | // The feature string derived from -mattr's values. |
62 | std::string FeaturesStr; |
63 | |
64 | static cl::list<std::string> |
65 | FuzzerArgs("fuzzer-args" , cl::Positional, |
66 | cl::desc("Options to pass to the fuzzer" ), |
67 | cl::PositionalEatsArgs); |
68 | static std::vector<char *> ModifiedArgv; |
69 | |
70 | enum OutputFileType { |
71 | OFT_Null, |
72 | OFT_AssemblyFile, |
73 | OFT_ObjectFile |
74 | }; |
75 | static cl::opt<OutputFileType> |
76 | FileType("filetype" , cl::init(Val: OFT_AssemblyFile), |
77 | cl::desc("Choose an output file type:" ), |
78 | cl::values( |
79 | clEnumValN(OFT_AssemblyFile, "asm" , |
80 | "Emit an assembly ('.s') file" ), |
81 | clEnumValN(OFT_Null, "null" , |
82 | "Don't emit anything (for timing purposes)" ), |
83 | clEnumValN(OFT_ObjectFile, "obj" , |
84 | "Emit a native object ('.o') file" ))); |
85 | |
86 | |
87 | class LLVMFuzzerInputBuffer : public MemoryBuffer |
88 | { |
89 | public: |
90 | LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_) |
91 | : Data(reinterpret_cast<const char *>(data_)), |
92 | Size(size_) { |
93 | init(BufStart: Data, BufEnd: Data+Size, RequiresNullTerminator: false); |
94 | } |
95 | |
96 | |
97 | virtual BufferKind getBufferKind() const { |
98 | return MemoryBuffer_Malloc; // it's not disk-backed so I think that's |
99 | // the intent ... though AFAIK it |
100 | // probably came from an mmap or sbrk |
101 | } |
102 | |
103 | private: |
104 | const char *Data; |
105 | size_t Size; |
106 | }; |
107 | |
108 | static int AssembleInput(const char *ProgName, const Target *TheTarget, |
109 | SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, |
110 | MCAsmInfo &MAI, MCSubtargetInfo &STI, |
111 | MCInstrInfo &MCII, MCTargetOptions &MCOptions) { |
112 | static const bool NoInitialTextSection = false; |
113 | |
114 | std::unique_ptr<MCAsmParser> Parser( |
115 | createMCAsmParser(SrcMgr, Ctx, Str, MAI)); |
116 | |
117 | std::unique_ptr<MCTargetAsmParser> TAP( |
118 | TheTarget->createMCAsmParser(STI, Parser&: *Parser, MII: MCII, Options: MCOptions)); |
119 | |
120 | if (!TAP) { |
121 | errs() << ProgName |
122 | << ": error: this target '" << TripleName |
123 | << "', does not support assembly parsing.\n" ; |
124 | abort(); |
125 | } |
126 | |
127 | Parser->setTargetParser(*TAP); |
128 | |
129 | return Parser->Run(NoInitialTextSection); |
130 | } |
131 | |
132 | |
133 | int AssembleOneInput(const uint8_t *Data, size_t Size) { |
134 | const bool ShowInst = false; |
135 | const bool AsmVerbose = false; |
136 | const bool UseDwarfDirectory = true; |
137 | |
138 | Triple TheTriple(Triple::normalize(Str: TripleName)); |
139 | |
140 | SourceMgr SrcMgr; |
141 | |
142 | std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size)); |
143 | |
144 | // Tell SrcMgr about this buffer, which is what the parser will pick up. |
145 | SrcMgr.AddNewSourceBuffer(F: std::move(BufferPtr), IncludeLoc: SMLoc()); |
146 | |
147 | static const std::vector<std::string> NoIncludeDirs; |
148 | SrcMgr.setIncludeDirs(NoIncludeDirs); |
149 | |
150 | static std::string ArchName; |
151 | std::string Error; |
152 | const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, |
153 | Error); |
154 | if (!TheTarget) { |
155 | errs() << "error: this target '" << TheTriple.normalize() |
156 | << "/" << ArchName << "', was not found: '" << Error << "'\n" ; |
157 | |
158 | abort(); |
159 | } |
160 | |
161 | std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT: TripleName)); |
162 | if (!MRI) { |
163 | errs() << "Unable to create target register info!" ; |
164 | abort(); |
165 | } |
166 | |
167 | MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); |
168 | std::unique_ptr<MCAsmInfo> MAI( |
169 | TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TripleName, Options: MCOptions)); |
170 | if (!MAI) { |
171 | errs() << "Unable to create target asm info!" ; |
172 | abort(); |
173 | } |
174 | |
175 | std::unique_ptr<MCSubtargetInfo> STI( |
176 | TheTarget->createMCSubtargetInfo(TheTriple: TripleName, CPU: MCPU, Features: FeaturesStr)); |
177 | |
178 | MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr); |
179 | std::unique_ptr<MCObjectFileInfo> MOFI( |
180 | TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); |
181 | Ctx.setObjectFileInfo(MOFI.get()); |
182 | |
183 | const unsigned OutputAsmVariant = 0; |
184 | std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo()); |
185 | MCInstPrinter *IP = TheTarget->createMCInstPrinter(T: Triple(TripleName), SyntaxVariant: OutputAsmVariant, |
186 | MAI: *MAI, MII: *MCII, MRI: *MRI); |
187 | if (!IP) { |
188 | errs() |
189 | << "error: unable to create instruction printer for target triple '" |
190 | << TheTriple.normalize() << "' with assembly variant " |
191 | << OutputAsmVariant << ".\n" ; |
192 | |
193 | abort(); |
194 | } |
195 | |
196 | const char *ProgName = "llvm-mc-fuzzer" ; |
197 | std::unique_ptr<MCCodeEmitter> CE = nullptr; |
198 | std::unique_ptr<MCAsmBackend> MAB = nullptr; |
199 | |
200 | std::string OutputString; |
201 | raw_string_ostream Out(OutputString); |
202 | auto FOut = std::make_unique<formatted_raw_ostream>(args&: Out); |
203 | |
204 | std::unique_ptr<MCStreamer> Str; |
205 | |
206 | if (FileType == OFT_AssemblyFile) { |
207 | Str.reset(p: TheTarget->createAsmStreamer(Ctx, OS: std::move(FOut), IsVerboseAsm: AsmVerbose, |
208 | UseDwarfDirectory, InstPrint: IP, CE: std::move(CE), |
209 | TAB: std::move(MAB), ShowInst)); |
210 | } else { |
211 | assert(FileType == OFT_ObjectFile && "Invalid file type!" ); |
212 | |
213 | std::error_code EC; |
214 | const std::string OutputFilename = "-" ; |
215 | auto Out = |
216 | std::make_unique<ToolOutputFile>(args: OutputFilename, args&: EC, args: sys::fs::OF_None); |
217 | if (EC) { |
218 | errs() << EC.message() << '\n'; |
219 | abort(); |
220 | } |
221 | |
222 | // Don't waste memory on names of temp labels. |
223 | Ctx.setUseNamesOnTempLabels(false); |
224 | |
225 | std::unique_ptr<buffer_ostream> BOS; |
226 | raw_pwrite_stream *OS = &Out->os(); |
227 | if (!Out->os().supportsSeeking()) { |
228 | BOS = std::make_unique<buffer_ostream>(args&: Out->os()); |
229 | OS = BOS.get(); |
230 | } |
231 | |
232 | MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(II: *MCII, Ctx); |
233 | MCAsmBackend *MAB = TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCOptions); |
234 | Str.reset(p: TheTarget->createMCObjectStreamer( |
235 | T: TheTriple, Ctx, TAB: std::unique_ptr<MCAsmBackend>(MAB), |
236 | OW: MAB->createObjectWriter(OS&: *OS), Emitter: std::unique_ptr<MCCodeEmitter>(CE), STI: *STI, |
237 | RelaxAll: MCOptions.MCRelaxAll, IncrementalLinkerCompatible: MCOptions.MCIncrementalLinkerCompatible, |
238 | /*DWARFMustBeAtTheEnd*/ false)); |
239 | } |
240 | const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, Str&: *Str, MAI&: *MAI, STI&: *STI, |
241 | MCII&: *MCII, MCOptions); |
242 | |
243 | (void) Res; |
244 | |
245 | return 0; |
246 | } |
247 | |
248 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { |
249 | return AssembleOneInput(Data, Size); |
250 | } |
251 | |
252 | extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, |
253 | char ***argv) { |
254 | // The command line is unusual compared to other fuzzers due to the need to |
255 | // specify the target. Options like -triple, -mcpu, and -mattr work like |
256 | // their counterparts in llvm-mc, while -fuzzer-args collects options for the |
257 | // fuzzer itself. |
258 | // |
259 | // Examples: |
260 | // |
261 | // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to |
262 | // 4-bytes each and use the contents of ./corpus as the test corpus: |
263 | // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ |
264 | // -fuzzer-args -max_len=4 -runs=100000 ./corpus |
265 | // |
266 | // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA |
267 | // feature enabled using up to 64-byte inputs: |
268 | // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ |
269 | // -disassemble -fuzzer-args ./corpus |
270 | // |
271 | // If your aim is to find instructions that are not tested, then it is |
272 | // advisable to constrain the maximum input size to a single instruction |
273 | // using -max_len as in the first example. This results in a test corpus of |
274 | // individual instructions that test unique paths. Without this constraint, |
275 | // there will be considerable redundancy in the corpus. |
276 | |
277 | char **OriginalArgv = *argv; |
278 | |
279 | LLVMInitializeAllTargetInfos(); |
280 | LLVMInitializeAllTargetMCs(); |
281 | LLVMInitializeAllAsmParsers(); |
282 | |
283 | cl::ParseCommandLineOptions(argc: *argc, argv: OriginalArgv); |
284 | |
285 | // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that |
286 | // the driver can parse its arguments. |
287 | // |
288 | // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. |
289 | // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a |
290 | // non-const buffer to avoid the need to clean up when the fuzzer terminates. |
291 | ModifiedArgv.push_back(x: OriginalArgv[0]); |
292 | for (const auto &FuzzerArg : FuzzerArgs) { |
293 | for (int i = 1; i < *argc; ++i) { |
294 | if (FuzzerArg == OriginalArgv[i]) |
295 | ModifiedArgv.push_back(x: OriginalArgv[i]); |
296 | } |
297 | } |
298 | *argc = ModifiedArgv.size(); |
299 | *argv = ModifiedArgv.data(); |
300 | |
301 | // Package up features to be passed to target/subtarget |
302 | // We have to pass it via a global since the callback doesn't |
303 | // permit any user data. |
304 | if (MAttrs.size()) { |
305 | SubtargetFeatures Features; |
306 | for (unsigned i = 0; i != MAttrs.size(); ++i) |
307 | Features.AddFeature(String: MAttrs[i]); |
308 | FeaturesStr = Features.getString(); |
309 | } |
310 | |
311 | if (TripleName.empty()) |
312 | TripleName = sys::getDefaultTargetTriple(); |
313 | |
314 | return 0; |
315 | } |
316 | |