llvm-mc-disassemble-fuzzer.cpp source code [llvm/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp]

1	//===-- llvm-mc-disassemble-fuzzer.cpp - Fuzzer for the MC layer ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	//===----------------------------------------------------------------------===//
10
11	#include "llvm-c/Disassembler.h"
12	#include "llvm-c/Target.h"
13	#include "llvm/Support/CommandLine.h"
14	#include "llvm/Support/raw_ostream.h"
15	#include "llvm/TargetParser/Host.h"
16	#include "llvm/TargetParser/SubtargetFeature.h"
17
18	using namespace llvm;
19
20	const unsigned AssemblyTextBufSize = `80`;
21
22	static cl::opt<std::string>
23	TripleName("triple", cl::desc ("Target triple to assemble for, "
24	"see -version for available targets"));
25
26	static cl::opt<std::string>
27	MCPU("mcpu",
28	cl::desc ("Target a specific cpu type (-mcpu=help for details)"),
29	cl::value_desc ("cpu-name"), cl::init(Val: ""));
30
31	// This is useful for variable-length instruction sets.
32	static cl::opt<unsigned> InsnLimit(
33	"insn-limit",
34	cl::desc ("Limit the number of instructions to process (0 for no limit)"),
35	cl::value_desc ("count"), cl::init(Val: `0`));
36
37	static cl::list<std::string>
38	MAttrs("mattr", cl::CommaSeparated,
39	cl::desc ("Target specific attributes (-mattr=help for details)"),
40	cl::value_desc ("a1,+a2,-a3,..."));
41	// The feature string derived from -mattr's values.
42	std::string FeaturesStr;
43
44	static cl::list<std::string>
45	FuzzerArgs("fuzzer-args", cl::Positional,
46	cl::desc ("Options to pass to the fuzzer"),
47	cl::PositionalEatsArgs);
48	static std::vector<char *> ModifiedArgv;
49
50	int DisassembleOneInput(const uint8_t *Data, size_t Size) {
51	char AssemblyText[AssemblyTextBufSize];
52
53	std::vector<uint8_t> DataCopy(Data, Data + Size);
54
55	LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
56	Triple: TripleName.c_str(), CPU: MCPU.c_str(), Features: FeaturesStr.c_str(), DisInfo: nullptr, TagType: `0`,
57	GetOpInfo: nullptr, SymbolLookUp: nullptr);
58	assert(Ctx);
59	uint8_t *p = DataCopy.data();
60	unsigned Consumed;
61	unsigned InstructionsProcessed = `0`;
62	do {
63	Consumed = LLVMDisasmInstruction(DC: Ctx, Bytes: p, BytesSize: Size, PC: `0`, OutString: AssemblyText,
64	OutStringSize: AssemblyTextBufSize);
65	Size -= Consumed;
66	p += Consumed;
67
68	InstructionsProcessed ++;
69	if (InsnLimit != `0` && InstructionsProcessed < InsnLimit)
70	break;
71	} while (Consumed != `0`);
72	LLVMDisasmDispose(DC: Ctx);
73	return `0`;
74	}
75
76	extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
77	return DisassembleOneInput(Data, Size);
78	}
79
80	extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
81	char ***argv) {
82	// The command line is unusual compared to other fuzzers due to the need to
83	// specify the target. Options like -triple, -mcpu, and -mattr work like
84	// their counterparts in llvm-mc, while -fuzzer-args collects options for the
85	// fuzzer itself.
86	//
87	// Examples:
88	//
89	// Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
90	// 4-bytes each and use the contents of ./corpus as the test corpus:
91	// llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
92	// -fuzzer-args -max_len=4 -runs=100000 ./corpus
93	//
94	// Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
95	// feature enabled using up to 64-byte inputs:
96	// llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
97	// -disassemble -fuzzer-args ./corpus
98	//
99	// If your aim is to find instructions that are not tested, then it is
100	// advisable to constrain the maximum input size to a single instruction
101	// using -max_len as in the first example. This results in a test corpus of
102	// individual instructions that test unique paths. Without this constraint,
103	// there will be considerable redundancy in the corpus.
104
105	char *OriginalArgv = argv;
106
107	LLVMInitializeAllTargetInfos();
108	LLVMInitializeAllTargetMCs();
109	LLVMInitializeAllDisassemblers();
110
111	cl::ParseCommandLineOptions(argc: *argc, argv: OriginalArgv);
112
113	// Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
114	// the driver can parse its arguments.
115	//
116	// FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
117	// Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
118	// non-const buffer to avoid the need to clean up when the fuzzer terminates.
119	ModifiedArgv.push_back(x: OriginalArgv[`0`]);
120	for (const auto &FuzzerArg : FuzzerArgs) {
121	for (int i = `1`; i < *argc; ++i) {
122	if (FuzzerArg == OriginalArgv[i])
123	ModifiedArgv.push_back(x: OriginalArgv[i]);
124	}
125	}
126	*argc = ModifiedArgv.size();
127	*argv = ModifiedArgv.data();
128
129	// Package up features to be passed to target/subtarget
130	// We have to pass it via a global since the callback doesn't
131	// permit any user data.
132	if (MAttrs.size()) {
133	SubtargetFeatures Features;
134	for (unsigned i = `0`; i != MAttrs.size(); ++i)
135	Features.AddFeature(String: MAttrs [i]);
136	FeaturesStr = Features.getString();
137	}
138
139	if (TripleName.empty())
140	TripleName = sys::getDefaultTargetTriple();
141
142	return `0`;
143	}
144

source code of llvm/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp