llvm-exegesis.cpp source code [llvm/tools/llvm-exegesis/llvm-exegesis.cpp]

1	//===-- llvm-exegesis.cpp ---------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Measures execution properties (latencies/uops) of an instruction.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "lib/Analysis.h"
15	#include "lib/BenchmarkResult.h"
16	#include "lib/BenchmarkRunner.h"
17	#include "lib/Clustering.h"
18	#include "lib/CodeTemplate.h"
19	#include "lib/Error.h"
20	#include "lib/LlvmState.h"
21	#include "lib/PerfHelper.h"
22	#include "lib/ProgressMeter.h"
23	#include "lib/ResultAggregator.h"
24	#include "lib/SnippetFile.h"
25	#include "lib/SnippetRepetitor.h"
26	#include "lib/Target.h"
27	#include "lib/TargetSelect.h"
28	#include "lib/ValidationEvent.h"
29	#include "llvm/ADT/StringExtras.h"
30	#include "llvm/ADT/Twine.h"
31	#include "llvm/MC/MCInstBuilder.h"
32	#include "llvm/MC/MCObjectFileInfo.h"
33	#include "llvm/MC/MCParser/MCAsmParser.h"
34	#include "llvm/MC/MCParser/MCTargetAsmParser.h"
35	#include "llvm/MC/MCRegisterInfo.h"
36	#include "llvm/MC/MCSubtargetInfo.h"
37	#include "llvm/MC/TargetRegistry.h"
38	#include "llvm/Object/ObjectFile.h"
39	#include "llvm/Support/CommandLine.h"
40	#include "llvm/Support/FileSystem.h"
41	#include "llvm/Support/Format.h"
42	#include "llvm/Support/InitLLVM.h"
43	#include "llvm/Support/Path.h"
44	#include "llvm/Support/SourceMgr.h"
45	#include "llvm/Support/TargetSelect.h"
46	#include "llvm/TargetParser/Host.h"
47	#include <algorithm>
48	#include <string>
49
50	namespace llvm {
51	namespace exegesis {
52
53	static cl::opt<int> OpcodeIndex(
54	"opcode-index",
55	cl::desc ("opcode to measure, by index, or -1 to measure all opcodes"),
56	cl::cat (BenchmarkOptions), cl::init(Val: `0`));
57
58	static cl::opt<std::string>
59	OpcodeNames("opcode-name",
60	cl::desc ("comma-separated list of opcodes to measure, by name"),
61	cl::cat (BenchmarkOptions), cl::init(Val: ""));
62
63	static cl::opt<std::string> SnippetsFile("snippets-file",
64	cl::desc ("code snippets to measure"),
65	cl::cat (BenchmarkOptions),
66	cl::init(Val: ""));
67
68	static cl::opt<std::string>
69	BenchmarkFile("benchmarks-file",
70	cl::desc ("File to read (analysis mode) or write "
71	"(latency/uops/inverse_throughput modes) benchmark "
72	"results. “-” uses stdin/stdout."),
73	cl::cat (Options), cl::init(Val: ""));
74
75	static cl::opt<Benchmark::ModeE> BenchmarkMode(
76	"mode", cl::desc ("the mode to run"), cl::cat (Options),
77	cl::values(clEnumValN(Benchmark::Latency, "latency", "Instruction Latency"),
78	clEnumValN(Benchmark::InverseThroughput, "inverse_throughput",
79	"Instruction Inverse Throughput"),
80	clEnumValN(Benchmark::Uops, "uops", "Uop Decomposition"),
81	// When not asking for a specific benchmark mode,
82	// we'll analyse the results.
83	clEnumValN(Benchmark::Unknown, "analysis", "Analysis")));
84
85	static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode(
86	"result-aggregation-mode", cl::desc ("How to aggregate multi-values result"),
87	cl::cat (BenchmarkOptions),
88	cl::values(clEnumValN(Benchmark::Min, "min", "Keep min reading"),
89	clEnumValN(Benchmark::Max, "max", "Keep max reading"),
90	clEnumValN(Benchmark::Mean, "mean",
91	"Compute mean of all readings"),
92	clEnumValN(Benchmark::MinVariance, "min-variance",
93	"Keep readings set with min-variance")),
94	cl::init(Val: Benchmark::Min));
95
96	static cl::opt<Benchmark::RepetitionModeE> RepetitionMode(
97	"repetition-mode", cl::desc ("how to repeat the instruction snippet"),
98	cl::cat (BenchmarkOptions),
99	cl::values(
100	clEnumValN(Benchmark::Duplicate, "duplicate", "Duplicate the snippet"),
101	clEnumValN(Benchmark::Loop, "loop", "Loop over the snippet"),
102	clEnumValN(Benchmark::AggregateMin, "min",
103	"All of the above and take the minimum of measurements"),
104	clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate",
105	"Middle half duplicate mode"),
106	clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop",
107	"Middle half loop mode")),
108	cl::init(Val: Benchmark::Duplicate));
109
110	static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
111	"measurements-print-progress",
112	cl::desc ("Produce progress indicator when performing measurements"),
113	cl::cat (BenchmarkOptions), cl::init(Val: false));
114
115	static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
116	"benchmark-phase",
117	cl::desc (
118	"it is possible to stop the benchmarking process after some phase"),
119	cl::cat (BenchmarkOptions),
120	cl::values(
121	clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",
122	"Only generate the minimal instruction sequence"),
123	clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet,
124	"prepare-and-assemble-snippet",
125	"Same as prepare-snippet, but also dumps an excerpt of the "
126	"sequence (hex encoded)"),
127	clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode,
128	"assemble-measured-code",
129	"Same as prepare-and-assemble-snippet, but also creates the "
130	"full sequence "
131	"that can be dumped to a file using --dump-object-to-disk"),
132	clEnumValN(
133	BenchmarkPhaseSelectorE::Measure, "measure",
134	"Same as prepare-measured-code, but also runs the measurement "
135	"(default)")),
136	cl::init(Val: BenchmarkPhaseSelectorE::Measure));
137
138	static cl::opt<bool>
139	UseDummyPerfCounters("use-dummy-perf-counters",
140	cl::desc ("Do not read real performance counters, use "
141	"dummy values (for testing)"),
142	cl::cat (BenchmarkOptions), cl::init(Val: false));
143
144	static cl::opt<unsigned>
145	MinInstructions("min-instructions",
146	cl::desc ("The minimum number of instructions that should "
147	"be included in the snippet"),
148	cl::cat (BenchmarkOptions), cl::init(Val: `10000`));
149
150	static cl::opt<unsigned>
151	LoopBodySize("loop-body-size",
152	cl::desc ("when repeating the instruction snippet by looping "
153	"over it, duplicate the snippet until the loop body "
154	"contains at least this many instruction"),
155	cl::cat (BenchmarkOptions), cl::init(Val: `0`));
156
157	static cl::opt<unsigned> MaxConfigsPerOpcode(
158	"max-configs-per-opcode",
159	cl::desc (
160	"allow to snippet generator to generate at most that many configs"),
161	cl::cat (BenchmarkOptions), cl::init(Val: `1`));
162
163	static cl::opt<bool> IgnoreInvalidSchedClass(
164	"ignore-invalid-sched-class",
165	cl::desc ("ignore instructions that do not define a sched class"),
166	cl::cat (BenchmarkOptions), cl::init(Val: false));
167
168	static cl::opt<BenchmarkFilter> AnalysisSnippetFilter(
169	"analysis-filter", cl::desc ("Filter the benchmarks before analysing them"),
170	cl::cat (BenchmarkOptions),
171	cl::values(
172	clEnumValN(BenchmarkFilter::All, "all",
173	"Keep all benchmarks (default)"),
174	clEnumValN(BenchmarkFilter::RegOnly, "reg-only",
175	"Keep only those benchmarks that do NOT involve memory"),
176	clEnumValN(BenchmarkFilter::WithMem, "mem-only",
177	"Keep only the benchmarks that DO involve memory")),
178	cl::init(Val: BenchmarkFilter::All));
179
180	static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm(
181	"analysis-clustering", cl::desc ("the clustering algorithm to use"),
182	cl::cat (AnalysisOptions),
183	cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan",
184	"use DBSCAN/OPTICS algorithm"),
185	clEnumValN(BenchmarkClustering::Naive, "naive",
186	"one cluster per opcode")),
187	cl::init(Val: BenchmarkClustering::Dbscan));
188
189	static cl::opt<unsigned> AnalysisDbscanNumPoints(
190	"analysis-numpoints",
191	cl::desc ("minimum number of points in an analysis cluster (dbscan only)"),
192	cl::cat (AnalysisOptions), cl::init(Val: `3`));
193
194	static cl::opt<float> AnalysisClusteringEpsilon(
195	"analysis-clustering-epsilon",
196	cl::desc ("epsilon for benchmark point clustering"),
197	cl::cat (AnalysisOptions), cl::init(Val: `0.1`));
198
199	static cl::opt<float> AnalysisInconsistencyEpsilon(
200	"analysis-inconsistency-epsilon",
201	cl::desc ("epsilon for detection of when the cluster is different from the "
202	"LLVM schedule profile values"),
203	cl::cat (AnalysisOptions), cl::init(Val: `0.1`));
204
205	static cl::opt<std::string>
206	AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc (""),
207	cl::cat (AnalysisOptions), cl::init(Val: ""));
208	static cl::opt<std::string>
209	AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
210	cl::desc (""), cl::cat (AnalysisOptions),
211	cl::init(Val: ""));
212
213	static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
214	"analysis-display-unstable-clusters",
215	cl::desc ("if there is more than one benchmark for an opcode, said "
216	"benchmarks may end up not being clustered into the same cluster "
217	"if the measured performance characteristics are different. by "
218	"default all such opcodes are filtered out. this flag will "
219	"instead show only such unstable opcodes"),
220	cl::cat (AnalysisOptions), cl::init(Val: false));
221
222	static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(
223	"analysis-override-benchmark-triple-and-cpu",
224	cl::desc ("By default, we analyze the benchmarks for the triple/CPU they "
225	"were measured for, but if you want to analyze them for some "
226	"other combination (specified via -mtriple/-mcpu), you can "
227	"pass this flag."),
228	cl::cat (AnalysisOptions), cl::init(Val: false));
229
230	static cl::opt<std::string>
231	TripleName("mtriple",
232	cl::desc ("Target triple. See -version for available targets"),
233	cl::cat (Options));
234
235	static cl::opt<std::string>
236	MCPU("mcpu",
237	cl::desc ("Target a specific cpu type (-mcpu=help for details)"),
238	cl::value_desc ("cpu-name"), cl::cat (Options), cl::init(Val: "native"));
239
240	static cl::opt<std::string>
241	DumpObjectToDisk("dump-object-to-disk",
242	cl::desc ("dumps the generated benchmark object to disk "
243	"and prints a message to access it"),
244	cl::ValueOptional, cl::cat (BenchmarkOptions));
245
246	static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode(
247	"execution-mode",
248	cl::desc ("Selects the execution mode to use for running snippets"),
249	cl::cat (BenchmarkOptions),
250	cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess,
251	"inprocess",
252	"Executes the snippets within the same process"),
253	clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess,
254	"subprocess",
255	"Spawns a subprocess for each snippet execution, "
256	"allows for the use of memory annotations")),
257	cl::init(Val: BenchmarkRunner::ExecutionModeE::InProcess));
258
259	static cl::opt<unsigned> BenchmarkRepeatCount(
260	"benchmark-repeat-count",
261	cl::desc ("The number of times to repeat measurements on the benchmark k "
262	"before aggregating the results"),
263	cl::cat (BenchmarkOptions), cl::init(Val: `30`));
264
265	static cl::list<ValidationEvent> ValidationCounters(
266	"validation-counter",
267	cl::desc (
268	"The name of a validation counter to run concurrently with the main "
269	"counter to validate benchmarking assumptions"),
270	cl::CommaSeparated, cl::cat (BenchmarkOptions), ValidationEventOptions ());
271
272	static ExitOnError ExitOnErr("llvm-exegesis error: ");
273
274	// Helper function that logs the error(s) and exits.
275	template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) {
276	ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...));
277	}
278
279	// Check Err. If it's in a failure state log the file error(s) and exit.
280	static void ExitOnFileError(const Twine &FileName, Error Err) {
281	if (Err) {
282	ExitOnErr (createFileError(F: FileName, E: std::move(Err)));
283	}
284	}
285
286	// Check E. If it's in a success state then return the contained value.
287	// If it's in a failure state log the file error(s) and exit.
288	template <typename T>
289	T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
290	ExitOnFileError(FileName, E.takeError());
291	return std::move(*E);
292	}
293
294	// Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
295	// and returns the opcode indices or {} if snippets should be read from
296	// `SnippetsFile`.
297	static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) {
298	const size_t NumSetFlags = (OpcodeNames.empty() ? `0` : `1`) +
299	(OpcodeIndex == `0` ? `0` : `1`) +
300	(SnippetsFile.empty() ? `0` : `1`);
301	const auto &ET = State.getExegesisTarget();
302	const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits();
303
304	if (NumSetFlags != `1`) {
305	ExitOnErr.setBanner("llvm-exegesis: ");
306	ExitWithError(Args: "please provide one and only one of 'opcode-index', "
307	"'opcode-name' or 'snippets-file'");
308	}
309	if (!SnippetsFile.empty())
310	return {};
311	if (OpcodeIndex > `0`)
312	return {static_cast<unsigned>(OpcodeIndex)};
313	if (OpcodeIndex < `0`) {
314	std::vector<unsigned> Result;
315	unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes();
316	Result.reserve(n: NumOpcodes);
317	for (unsigned I = `0`, E = NumOpcodes; I < E; ++I) {
318	if (!ET.isOpcodeAvailable(Opcode: I, Features: AvailableFeatures))
319	continue;
320	Result.push_back(x: I);
321	}
322	return Result;
323	}
324	// Resolve opcode name -> opcode.
325	const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned {
326	const auto &Map = State.getOpcodeNameToOpcodeIdxMapping();
327	auto I = Map.find(Val: OpcodeName);
328	if (I != Map.end())
329	return I ->getSecond();
330	return `0u`;
331	};
332	SmallVector<StringRef, `2`> Pieces;
333	StringRef (OpcodeNames.getValue())
334	.split(A&: Pieces, Separator: ",", / MaxSplit / -`1`, / KeepEmpty / false);
335	std::vector<unsigned> Result;
336	Result.reserve(n: Pieces.size());
337	for (const StringRef &OpcodeName : Pieces) {
338	if (unsigned Opcode = ResolveName (OpcodeName))
339	Result.push_back(x: Opcode);
340	else
341	ExitWithError(Args: Twine ("unknown opcode ").concat(Suffix: OpcodeName));
342	}
343	return Result;
344	}
345
346	// Generates code snippets for opcode `Opcode`.
347	static Expected<std::vector<BenchmarkCode>>
348	generateSnippets(const LLVMState &State, unsigned Opcode,
349	const BitVector &ForbiddenRegs) {
350	const Instruction &Instr = State.getIC().getInstr(Opcode);
351	const MCInstrDesc &InstrDesc = Instr.Description;
352	// Ignore instructions that we cannot run.
353	if (InstrDesc.isPseudo() \|\| InstrDesc.usesCustomInsertionHook())
354	return make_error<Failure>(
355	Args: "Unsupported opcode: isPseudo/usesCustomInserter");
356	if (InstrDesc.isBranch() \|\| InstrDesc.isIndirectBranch())
357	return make_error<Failure>(Args: "Unsupported opcode: isBranch/isIndirectBranch");
358	if (InstrDesc.isCall() \|\| InstrDesc.isReturn())
359	return make_error<Failure>(Args: "Unsupported opcode: isCall/isReturn");
360
361	const std::vector<InstructionTemplate> InstructionVariants =
362	State.getExegesisTarget().generateInstructionVariants(
363	Instr, MaxConfigsPerOpcode);
364
365	SnippetGenerator::Options SnippetOptions;
366	SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode;
367	const std::unique_ptr<SnippetGenerator> Generator =
368	State.getExegesisTarget().createSnippetGenerator(Mode: BenchmarkMode, State,
369	Opts: SnippetOptions);
370	if (!Generator)
371	ExitWithError(Args: "cannot create snippet generator");
372
373	std::vector<BenchmarkCode> Benchmarks;
374	for (const InstructionTemplate &Variant : InstructionVariants) {
375	if (Benchmarks.size() >= MaxConfigsPerOpcode)
376	break;
377	if (auto Err = Generator ->generateConfigurations(Variant, Benchmarks,
378	ExtraForbiddenRegs: ForbiddenRegs))
379	return std::move(Err);
380	}
381	return Benchmarks;
382	}
383
384	static void runBenchmarkConfigurations(
385	const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
386	ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
387	const BenchmarkRunner &Runner) {
388	assert(!Configurations.empty() && "Don't have any configurations to run.");
389	std::optional<raw_fd_ostream> FileOstr;
390	if (BenchmarkFile != "-") {
391	int ResultFD = `0`;
392	// Create output file or open existing file and truncate it, once.
393	ExitOnErr (errorCodeToError(EC: openFileForWrite(Name: BenchmarkFile, ResultFD,
394	Disp: sys::fs::CD_CreateAlways,
395	Flags: sys::fs::OF_TextWithCRLF)));
396	FileOstr.emplace(args&: ResultFD, args: true /shouldClose/);
397	}
398	raw_ostream &Ostr = FileOstr ? *FileOstr : outs();
399
400	std::optional<ProgressMeter<>> Meter;
401	if (BenchmarkMeasurementsPrintProgress)
402	Meter.emplace(args: Configurations.size());
403
404	SmallVector<unsigned, `2`> MinInstructionCounts = {MinInstructions};
405	if (RepetitionMode == Benchmark::MiddleHalfDuplicate \|\|
406	RepetitionMode == Benchmark::MiddleHalfLoop)
407	MinInstructionCounts.push_back(Elt: MinInstructions * `2`);
408
409	for (const BenchmarkCode &Conf : Configurations) {
410	ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &Meter : nullptr*);
411	SmallVector<Benchmark, `2`> AllResults;
412
413	for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
414	Repetitors) {
415	for (unsigned IterationRepetitions : MinInstructionCounts) {
416	auto RC = ExitOnErr (Runner.getRunnableConfiguration(
417	Configuration: Conf, MinInstructions: IterationRepetitions, LoopUnrollFactor: LoopBodySize, Repetitor: *Repetitor));
418	std::optional<StringRef> DumpFile;
419	if (DumpObjectToDisk.getNumOccurrences())
420	DumpFile = DumpObjectToDisk;
421	auto [Err, BenchmarkResult] =
422	Runner.runConfiguration(RC: std::move(RC), DumpFile);
423	if (Err) {
424	// Errors from executing the snippets are fine.
425	// All other errors are a framework issue and should fail.
426	if (!Err.isA<SnippetExecutionFailure>())
427	ExitOnErr (std::move(Err));
428
429	BenchmarkResult.Error = toString(E: std::move(Err));
430	}
431	AllResults.push_back(Elt: std::move(BenchmarkResult));
432	}
433	}
434
435	Benchmark &Result = AllResults.front();
436
437	// If any of our measurements failed, pretend they all have failed.
438	if (AllResults.size() > `1` &&
439	any_of(Range&: AllResults, P: [](const Benchmark &R) {
440	return R.Measurements.empty();
441	}))
442	Result.Measurements.clear();
443
444	std::unique_ptr<ResultAggregator> ResultAgg =
445	ResultAggregator::CreateAggregator(RepetitionMode);
446	ResultAgg ->AggregateResults(Result,
447	OtherResults: ArrayRef<Benchmark>(AllResults).drop_front());
448
449	// With dummy counters, measurements are rather meaningless,
450	// so drop them altogether.
451	if (UseDummyPerfCounters)
452	Result.Measurements.clear();
453
454	ExitOnFileError(FileName: BenchmarkFile, Err: Result.writeYamlTo(State, S&: Ostr));
455	}
456	}
457
458	void benchmarkMain() {
459	if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
460	!UseDummyPerfCounters) {
461	#ifndef HAVE_LIBPFM
462	ExitWithError(
463	Args: "benchmarking unavailable, LLVM was built without libpfm. You can "
464	"pass --benchmark-phase=... to skip the actual benchmarking or "
465	"--use-dummy-perf-counters to not query the kernel for real event "
466	"counts.");
467	#else
468	if (pfm::pfmInitialize())
469	ExitWithError("cannot initialize libpfm");
470	#endif
471	}
472
473	InitializeAllAsmPrinters();
474	InitializeAllAsmParsers();
475	InitializeAllExegesisTargets();
476
477	const LLVMState State =
478	ExitOnErr (LLVMState::Create(TripleName, CpuName: MCPU, Features: "", UseDummyPerfCounters));
479
480	// Preliminary check to ensure features needed for requested
481	// benchmark mode are present on target CPU and/or OS.
482	if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)
483	ExitOnErr (State.getExegesisTarget().checkFeatureSupport());
484
485	if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess &&
486	UseDummyPerfCounters)
487	ExitWithError(Args: "Dummy perf counters are not supported in the subprocess "
488	"execution mode.");
489
490	const std::unique_ptr<BenchmarkRunner> Runner =
491	ExitOnErr (State.getExegesisTarget().createBenchmarkRunner(
492	Mode: BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode,
493	BenchmarkRepeatCount, ValidationCounters, ResultAggMode));
494	if (!Runner) {
495	ExitWithError(Args: "cannot create benchmark runner");
496	}
497
498	const auto Opcodes = getOpcodesOrDie(State);
499	std::vector<BenchmarkCode> Configurations;
500
501	unsigned LoopRegister =
502	State.getExegesisTarget().getDefaultLoopCounterRegister(
503	State.getTargetMachine().getTargetTriple());
504
505	if (Opcodes.empty()) {
506	Configurations = ExitOnErr (readSnippets(State, Filename: SnippetsFile));
507	for (const auto &Configuration : Configurations) {
508	if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
509	(Configuration.Key.MemoryMappings.size() != `0` \|\|
510	Configuration.Key.MemoryValues.size() != `0` \|\|
511	Configuration.Key.SnippetAddress != `0`))
512	ExitWithError(Args: "Memory and snippet address annotations are only "
513	"supported in subprocess "
514	"execution mode");
515	}
516	LoopRegister = Configurations [`0`].Key.LoopRegister;
517	}
518
519	SmallVector<std::unique_ptr<const SnippetRepetitor>, `2`> Repetitors;
520	if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
521	Repetitors.emplace_back(
522	Args: SnippetRepetitor::Create(Mode: RepetitionMode, State, LoopRegister));
523	else {
524	for (Benchmark::RepetitionModeE RepMode :
525	{Benchmark::RepetitionModeE::Duplicate,
526	Benchmark::RepetitionModeE::Loop})
527	Repetitors.emplace_back(
528	Args: SnippetRepetitor::Create(Mode: RepMode, State, LoopRegister));
529	}
530
531	BitVector AllReservedRegs;
532	for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
533	AllReservedRegs \|= Repetitor ->getReservedRegs();
534
535	if (!Opcodes.empty()) {
536	for (const unsigned Opcode : Opcodes) {
537	// Ignore instructions without a sched class if
538	// -ignore-invalid-sched-class is passed.
539	if (IgnoreInvalidSchedClass &&
540	State.getInstrInfo().get(Opcode).getSchedClass() == `0`) {
541	errs() << State.getInstrInfo().getName(Opcode)
542	<< ": ignoring instruction without sched class\n";
543	continue;
544	}
545
546	auto ConfigsForInstr = generateSnippets(State, Opcode, ForbiddenRegs: AllReservedRegs);
547	if (!ConfigsForInstr) {
548	logAllUnhandledErrors(
549	E: ConfigsForInstr.takeError(), OS&: errs(),
550	ErrorBanner: Twine (State.getInstrInfo().getName(Opcode)).concat(Suffix: ": "));
551	continue;
552	}
553	std::move(first: ConfigsForInstr ->begin(), last: ConfigsForInstr ->end(),
554	result: std::back_inserter(x&: Configurations));
555	}
556	}
557
558	if (MinInstructions == `0`) {
559	ExitOnErr.setBanner("llvm-exegesis: ");
560	ExitWithError(Args: "--min-instructions must be greater than zero");
561	}
562
563	// Write to standard output if file is not set.
564	if (BenchmarkFile.empty())
565	BenchmarkFile = "-";
566
567	if (!Configurations.empty())
568	runBenchmarkConfigurations(State, Configurations, Repetitors, Runner: *Runner);
569
570	pfm::pfmTerminate();
571	}
572
573	// Prints the results of running analysis pass `Pass` to file `OutputFilename`
574	// if OutputFilename is non-empty.
575	template <typename Pass>
576	static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
577	const std::string &OutputFilename) {
578	if (OutputFilename.empty())
579	return;
580	if (OutputFilename != "-") {
581	errs() << "Printing " << Name << " results to file '" << OutputFilename
582	<< "'\n";
583	}
584	std::error_code ErrorCode;
585	raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
586	sys::fs::FA_Read \| sys::fs::FA_Write);
587	if (ErrorCode)
588	ExitOnFileError(FileName: OutputFilename, Err: errorCodeToError(EC: ErrorCode));
589	if (auto Err = Analyzer.run<Pass>(ClustersOS))
590	ExitOnFileError(OutputFilename, std::move(Err));
591	}
592
593	static void filterPoints(MutableArrayRef<Benchmark> Points,
594	const MCInstrInfo &MCII) {
595	if (AnalysisSnippetFilter == BenchmarkFilter::All)
596	return;
597
598	bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem;
599	for (Benchmark &Point : Points) {
600	if (!Point.Error.empty())
601	continue;
602	if (WantPointsWithMemOps ==
603	any_of(Range&: Point.Key.Instructions, P: [&MCII](const MCInst &Inst) {
604	const MCInstrDesc &MCDesc = MCII.get(Opcode: Inst.getOpcode());
605	return MCDesc.mayLoad() \|\| MCDesc.mayStore();
606	}))
607	continue;
608	Point.Error = "filtered out by user";
609	}
610	}
611
612	static void analysisMain() {
613	ExitOnErr.setBanner("llvm-exegesis: ");
614	if (BenchmarkFile.empty())
615	ExitWithError(Args: "--benchmarks-file must be set");
616
617	if (AnalysisClustersOutputFile.empty() &&
618	AnalysisInconsistenciesOutputFile.empty()) {
619	ExitWithError(
620	Args: "for --mode=analysis: At least one of --analysis-clusters-output-file "
621	"and --analysis-inconsistencies-output-file must be specified");
622	}
623
624	InitializeAllAsmPrinters();
625	InitializeAllDisassemblers();
626	InitializeAllExegesisTargets();
627
628	auto MemoryBuffer = ExitOnFileError(
629	FileName: BenchmarkFile,
630	E: errorOrToExpected(EO: MemoryBuffer::getFile(Filename: BenchmarkFile, /IsText=/true)));
631
632	const auto TriplesAndCpus = ExitOnFileError(
633	FileName: BenchmarkFile,
634	E: Benchmark::readTriplesAndCpusFromYamls(Buffer: *MemoryBuffer));
635	if (TriplesAndCpus.empty()) {
636	errs() << "no benchmarks to analyze\n";
637	return;
638	}
639	if (TriplesAndCpus.size() > `1`) {
640	ExitWithError(Args: "analysis file contains benchmarks from several CPUs. This "
641	"is unsupported.");
642	}
643	auto TripleAndCpu = *TriplesAndCpus.begin();
644	if (AnalysisOverrideBenchmarksTripleAndCpu) {
645	errs() << "overridding file CPU name (" << TripleAndCpu.CpuName
646	<< ") with provided tripled (" << TripleName << ") and CPU name ("
647	<< MCPU << ")\n";
648	TripleAndCpu.LLVMTriple = TripleName;
649	TripleAndCpu.CpuName = MCPU;
650	}
651	errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '"
652	<< TripleAndCpu.CpuName << "'\n";
653
654	// Read benchmarks.
655	const LLVMState State = ExitOnErr (
656	LLVMState::Create(TripleName: TripleAndCpu.LLVMTriple, CpuName: TripleAndCpu.CpuName));
657	std::vector<Benchmark> Points = ExitOnFileError(
658	FileName: BenchmarkFile, E: Benchmark::readYamls(State, Buffer: *MemoryBuffer));
659
660	outs() << "Parsed " << Points.size() << " benchmark points\n";
661	if (Points.empty()) {
662	errs() << "no benchmarks to analyze\n";
663	return;
664	}
665	// FIXME: Merge points from several runs (latency and uops).
666
667	filterPoints(Points, MCII: State.getInstrInfo());
668
669	const auto Clustering = ExitOnErr (BenchmarkClustering::create(
670	Points, Mode: AnalysisClusteringAlgorithm, DbscanMinPts: AnalysisDbscanNumPoints,
671	AnalysisClusteringEpsilon, SubtargetInfo: &State.getSubtargetInfo(),
672	InstrInfo: &State.getInstrInfo()));
673
674	const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon,
675	AnalysisDisplayUnstableOpcodes);
676
677	maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, Name: "analysis clusters",
678	OutputFilename: AnalysisClustersOutputFile);
679	maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
680	Analyzer, Name: "sched class consistency analysis",
681	OutputFilename: AnalysisInconsistenciesOutputFile);
682	}
683
684	} // namespace exegesis
685	} // namespace llvm
686
687	int main(int Argc, char **Argv) {
688	using namespace llvm;
689
690	InitLLVM X(Argc, Argv);
691
692	// Initialize targets so we can print them when flag --version is specified.
693	InitializeAllTargetInfos();
694	InitializeAllTargets();
695	InitializeAllTargetMCs();
696
697	// Register the Target and CPU printer for --version.
698	cl::AddExtraVersionPrinter(func: sys::printDefaultTargetAndDetectedCPU);
699
700	// Enable printing of available targets when flag --version is specified.
701	cl::AddExtraVersionPrinter(func: TargetRegistry::printRegisteredTargetsForVersion);
702
703	cl::HideUnrelatedOptions(Categories: {&exegesis::Options, &exegesis::BenchmarkOptions,
704	&exegesis::AnalysisOptions});
705
706	cl::ParseCommandLineOptions(argc: Argc, argv: Argv,
707	Overview: "llvm host machine instruction characteristics "
708	"measurment and analysis.\n");
709
710	exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) {
711	if (Err.isA<exegesis::ClusteringError>())
712	return EXIT_SUCCESS;
713	return EXIT_FAILURE;
714	});
715
716	if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) {
717	exegesis::analysisMain();
718	} else {
719	exegesis::benchmarkMain();
720	}
721	return EXIT_SUCCESS;
722	}
723

source code of llvm/tools/llvm-exegesis/llvm-exegesis.cpp