InstrBuilder.cpp source code [llvm/lib/MCA/InstrBuilder.cpp]

1	//===--------------------- InstrBuilder.cpp ---------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	///
10	/// This file implements the InstrBuilder interface.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/MCA/InstrBuilder.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/DenseMap.h"
17	#include "llvm/ADT/Statistic.h"
18	#include "llvm/MC/MCInst.h"
19	#include "llvm/Support/Debug.h"
20	#include "llvm/Support/WithColor.h"
21	#include "llvm/Support/raw_ostream.h"
22
23	#define DEBUG_TYPE "llvm-mca-instrbuilder"
24
25	namespace llvm {
26	namespace mca {
27
28	char RecycledInstErr::ID = `0`;
29
30	InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
31	const llvm::MCInstrInfo &mcii,
32	const llvm::MCRegisterInfo &mri,
33	const llvm::MCInstrAnalysis *mcia,
34	const mca::InstrumentManager &im)
35	: STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
36	FirstReturnInst(true) {
37	const MCSchedModel &SM = STI.getSchedModel();
38	ProcResourceMasks.resize(N: SM.getNumProcResourceKinds());
39	computeProcResourceMasks(SM: STI.getSchedModel(), Masks: ProcResourceMasks);
40	}
41
42	static void initializeUsedResources(InstrDesc &ID,
43	const MCSchedClassDesc &SCDesc,
44	const MCSubtargetInfo &STI,
45	ArrayRef<uint64_t> ProcResourceMasks) {
46	const MCSchedModel &SM = STI.getSchedModel();
47
48	// Populate resources consumed.
49	using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
50	SmallVector<ResourcePlusCycles, `4`> Worklist;
51
52	// Track cycles contributed by resources that are in a "Super" relationship.
53	// This is required if we want to correctly match the behavior of method
54	// SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
55	// of "consumed" processor resources and resource cycles, the logic in
56	// ExpandProcResource() doesn't update the number of resource cycles
57	// contributed by a "Super" resource to a group.
58	// We need to take this into account when we find that a processor resource is
59	// part of a group, and it is also used as the "Super" of other resources.
60	// This map stores the number of cycles contributed by sub-resources that are
61	// part of a "Super" resource. The key value is the "Super" resource mask ID.
62	DenseMap<uint64_t, unsigned> SuperResources;
63
64	unsigned NumProcResources = SM.getNumProcResourceKinds();
65	APInt Buffers(NumProcResources, `0`);
66
67	bool AllInOrderResources = true;
68	bool AnyDispatchHazards = false;
69	for (unsigned I = `0`, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
70	const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(SC: &SCDesc) + I;
71	const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: PRE->ProcResourceIdx);
72	if (!PRE->ReleaseAtCycle) {
73	#ifndef NDEBUG
74	WithColor::warning()
75	<< "Ignoring invalid write of zero cycles on processor resource "
76	<< PR.Name << "\n";
77	WithColor::note() << "found in scheduling class " << SCDesc.Name
78	<< " (write index #" << I << ")\n";
79	#endif
80	continue;
81	}
82
83	uint64_t Mask = ProcResourceMasks [PRE->ProcResourceIdx];
84	if (PR.BufferSize < `0`) {
85	AllInOrderResources = false;
86	} else {
87	Buffers.setBit(getResourceStateIndex(Mask));
88	AnyDispatchHazards \|= (PR.BufferSize == `0`);
89	AllInOrderResources &= (PR.BufferSize <= `1`);
90	}
91
92	CycleSegment RCy(`0`, PRE->ReleaseAtCycle, false);
93	Worklist.emplace_back(Args: ResourcePlusCycles (Mask, ResourceUsage (RCy)));
94	if (PR.SuperIdx) {
95	uint64_t Super = ProcResourceMasks [PR.SuperIdx];
96	SuperResources [Super] += PRE->ReleaseAtCycle;
97	}
98	}
99
100	ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
101
102	// Sort elements by mask popcount, so that we prioritize resource units over
103	// resource groups, and smaller groups over larger groups.
104	sort(C&: Worklist, Comp: [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
105	unsigned popcntA = llvm::popcount(Value: A.first);
106	unsigned popcntB = llvm::popcount(Value: B.first);
107	if (popcntA < popcntB)
108	return true;
109	if (popcntA > popcntB)
110	return false;
111	return A.first < B.first;
112	});
113
114	uint64_t UsedResourceUnits = `0`;
115	uint64_t UsedResourceGroups = `0`;
116	uint64_t UnitsFromResourceGroups = `0`;
117
118	// Remove cycles contributed by smaller resources, and check if there
119	// are partially overlapping resource groups.
120	ID.HasPartiallyOverlappingGroups = false;
121
122	for (unsigned I = `0`, E = Worklist.size(); I < E; ++I) {
123	ResourcePlusCycles &A = Worklist [I];
124	if (!A.second.size()) {
125	assert(llvm::popcount(A.first) > `1` && "Expected a group!");
126	UsedResourceGroups \|= llvm::bit_floor(Value: A.first);
127	continue;
128	}
129
130	ID.Resources.emplace_back(Args&: A);
131	uint64_t NormalizedMask = A.first;
132
133	if (llvm::popcount(Value: A.first) == `1`) {
134	UsedResourceUnits \|= A.first;
135	} else {
136	// Remove the leading 1 from the resource group mask.
137	NormalizedMask ^= llvm::bit_floor(Value: NormalizedMask);
138	if (UnitsFromResourceGroups & NormalizedMask)
139	ID.HasPartiallyOverlappingGroups = true;
140
141	UnitsFromResourceGroups \|= NormalizedMask;
142	UsedResourceGroups \|= (A.first ^ NormalizedMask);
143	}
144
145	for (unsigned J = I + `1`; J < E; ++J) {
146	ResourcePlusCycles &B = Worklist [J];
147	if ((NormalizedMask & B.first) == NormalizedMask) {
148	B.second.CS.subtract(Cycles: A.second.size() - SuperResources [A.first]);
149	if (llvm::popcount(Value: B.first) > `1`)
150	B.second.NumUnits++;
151	}
152	}
153	}
154
155	// A SchedWrite may specify a number of cycles in which a resource group
156	// is reserved. For example (on target x86; cpu Haswell):
157	//
158	// SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
159	// let ReleaseAtCycles = [2, 2, 3];
160	// }
161	//
162	// This means:
163	// Resource units HWPort0 and HWPort1 are both used for 2cy.
164	// Resource group HWPort01 is the union of HWPort0 and HWPort1.
165	// Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
166	// will not be usable for 2 entire cycles from instruction issue.
167	//
168	// On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
169	// of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
170	// extra delay on top of the 2 cycles latency.
171	// During those extra cycles, HWPort01 is not usable by other instructions.
172	for (ResourcePlusCycles &RPC : ID.Resources) {
173	if (llvm::popcount(Value: RPC.first) > `1` && !RPC.second.isReserved()) {
174	// Remove the leading 1 from the resource group mask.
175	uint64_t Mask = RPC.first ^ llvm::bit_floor(Value: RPC.first);
176	uint64_t MaxResourceUnits = llvm::popcount(Value: Mask);
177	if (RPC.second.NumUnits > (unsigned)llvm::popcount(Value: Mask)) {
178	RPC.second.setReserved();
179	RPC.second.NumUnits = MaxResourceUnits;
180	}
181	}
182	}
183
184	// Identify extra buffers that are consumed through super resources.
185	for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
186	for (unsigned I = `1`, E = NumProcResources; I < E; ++I) {
187	const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: I);
188	if (PR.BufferSize == -`1`)
189	continue;
190
191	uint64_t Mask = ProcResourceMasks [I];
192	if (Mask != SR.first && ((Mask & SR.first) == SR.first))
193	Buffers.setBit(getResourceStateIndex(Mask));
194	}
195	}
196
197	ID.UsedBuffers = Buffers.getZExtValue();
198	ID.UsedProcResUnits = UsedResourceUnits;
199	ID.UsedProcResGroups = UsedResourceGroups;
200
201	LLVM_DEBUG({
202	for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
203	dbgs() << "\t\tResource Mask=" << format_hex(R.first, `16`) << ", "
204	<< "Reserved=" << R.second.isReserved() << ", "
205	<< "#Units=" << R.second.NumUnits << ", "
206	<< "cy=" << R.second.size() << `'\n'`;
207	uint64_t BufferIDs = ID.UsedBuffers;
208	while (BufferIDs) {
209	uint64_t Current = BufferIDs & (-BufferIDs);
210	dbgs() << "\t\tBuffer Mask=" << format_hex(Current, `16`) << `'\n'`;
211	BufferIDs ^= Current;
212	}
213	dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, `16`) << `'\n'`;
214	dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, `16`)
215	<< `'\n'`;
216	dbgs() << "\t\tHasPartiallyOverlappingGroups="
217	<< ID.HasPartiallyOverlappingGroups << `'\n'`;
218	});
219	}
220
221	static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
222	const MCSchedClassDesc &SCDesc,
223	const MCSubtargetInfo &STI) {
224	if (MCDesc.isCall()) {
225	// We cannot estimate how long this call will take.
226	// Artificially set an arbitrarily high latency (100cy).
227	ID.MaxLatency = `100U`;
228	return;
229	}
230
231	int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
232	// If latency is unknown, then conservatively assume a MaxLatency of 100cy.
233	ID.MaxLatency = Latency < `0` ? `100U` : static_cast<unsigned>(Latency);
234	}
235
236	static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
237	// Count register definitions, and skip non register operands in the process.
238	unsigned I, E;
239	unsigned NumExplicitDefs = MCDesc.getNumDefs();
240	for (I = `0`, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
241	const MCOperand &Op = MCI.getOperand(i: I);
242	if (Op.isReg())
243	--NumExplicitDefs;
244	}
245
246	if (NumExplicitDefs) {
247	return make_error<InstructionError<MCInst>>(
248	Args: "Expected more register operand definitions.", Args: MCI);
249	}
250
251	if (MCDesc.hasOptionalDef()) {
252	// Always assume that the optional definition is the last operand.
253	const MCOperand &Op = MCI.getOperand(i: MCDesc.getNumOperands() - `1`);
254	if (I == MCI.getNumOperands() \|\| !Op.isReg()) {
255	std::string Message =
256	"expected a register operand for an optional definition. Instruction "
257	"has not been correctly analyzed.";
258	return make_error<InstructionError<MCInst>>(Args&: Message, Args: MCI);
259	}
260	}
261
262	return ErrorSuccess ();
263	}
264
265	void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
266	unsigned SchedClassID) {
267	const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode());
268	const MCSchedModel &SM = STI.getSchedModel();
269	const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID);
270
271	// Assumptions made by this algorithm:
272	// 1. The number of explicit and implicit register definitions in a MCInst
273	// matches the number of explicit and implicit definitions according to
274	// the opcode descriptor (MCInstrDesc).
275	// 2. Uses start at index #(MCDesc.getNumDefs()).
276	// 3. There can only be a single optional register definition, an it is
277	// either the last operand of the sequence (excluding extra operands
278	// contributed by variadic opcodes) or one of the explicit register
279	// definitions. The latter occurs for some Thumb1 instructions.
280	//
281	// These assumptions work quite well for most out-of-order in-tree targets
282	// like x86. This is mainly because the vast majority of instructions is
283	// expanded to MCInst using a straightforward lowering logic that preserves
284	// the ordering of the operands.
285	//
286	// About assumption 1.
287	// The algorithm allows non-register operands between register operand
288	// definitions. This helps to handle some special ARM instructions with
289	// implicit operand increment (-mtriple=armv7):
290	//
291	// vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
292	// @ <MCOperand Reg:59>
293	// @ <MCOperand Imm:0> (!!)
294	// @ <MCOperand Reg:67>
295	// @ <MCOperand Imm:0>
296	// @ <MCOperand Imm:14>
297	// @ <MCOperand Reg:0>>
298	//
299	// MCDesc reports:
300	// 6 explicit operands.
301	// 1 optional definition
302	// 2 explicit definitions (!!)
303	//
304	// The presence of an 'Imm' operand between the two register definitions
305	// breaks the assumption that "register definitions are always at the
306	// beginning of the operand sequence".
307	//
308	// To workaround this issue, this algorithm ignores (i.e. skips) any
309	// non-register operands between register definitions. The optional
310	// definition is still at index #(NumOperands-1).
311	//
312	// According to assumption 2. register reads start at #(NumExplicitDefs-1).
313	// That means, register R1 from the example is both read and written.
314	unsigned NumExplicitDefs = MCDesc.getNumDefs();
315	unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
316	unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
317	unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
318	if (MCDesc.hasOptionalDef())
319	TotalDefs++;
320
321	unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
322	ID.Writes.resize(N: TotalDefs + NumVariadicOps);
323	// Iterate over the operands list, and skip non-register operands.
324	// The first NumExplicitDefs register operands are expected to be register
325	// definitions.
326	unsigned CurrentDef = `0`;
327	unsigned OptionalDefIdx = MCDesc.getNumOperands() - `1`;
328	unsigned i = `0`;
329	for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
330	const MCOperand &Op = MCI.getOperand(i);
331	if (!Op.isReg())
332	continue;
333
334	if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
335	OptionalDefIdx = CurrentDef++;
336	continue;
337	}
338
339	WriteDescriptor &Write = ID.Writes [CurrentDef];
340	Write.OpIndex = i;
341	if (CurrentDef < NumWriteLatencyEntries) {
342	const MCWriteLatencyEntry &WLE =
343	*STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: CurrentDef);
344	// Conservatively default to MaxLatency.
345	Write.Latency =
346	WLE.Cycles < `0` ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
347	Write.SClassOrWriteResourceID = WLE.WriteResourceID;
348	} else {
349	// Assign a default latency for this write.
350	Write.Latency = ID.MaxLatency;
351	Write.SClassOrWriteResourceID = `0`;
352	}
353	Write.IsOptionalDef = false;
354	LLVM_DEBUG({
355	dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
356	<< ", Latency=" << Write.Latency
357	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
358	});
359	CurrentDef++;
360	}
361
362	assert(CurrentDef == NumExplicitDefs &&
363	"Expected more register operand definitions.");
364	for (CurrentDef = `0`; CurrentDef < NumImplicitDefs; ++CurrentDef) {
365	unsigned Index = NumExplicitDefs + CurrentDef;
366	WriteDescriptor &Write = ID.Writes [Index];
367	Write.OpIndex = ~CurrentDef;
368	Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
369	if (Index < NumWriteLatencyEntries) {
370	const MCWriteLatencyEntry &WLE =
371	*STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: Index);
372	// Conservatively default to MaxLatency.
373	Write.Latency =
374	WLE.Cycles < `0` ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
375	Write.SClassOrWriteResourceID = WLE.WriteResourceID;
376	} else {
377	// Assign a default latency for this write.
378	Write.Latency = ID.MaxLatency;
379	Write.SClassOrWriteResourceID = `0`;
380	}
381
382	Write.IsOptionalDef = false;
383	assert(Write.RegisterID != `0` && "Expected a valid phys register!");
384	LLVM_DEBUG({
385	dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
386	<< ", PhysReg=" << MRI.getName(Write.RegisterID)
387	<< ", Latency=" << Write.Latency
388	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
389	});
390	}
391
392	if (MCDesc.hasOptionalDef()) {
393	WriteDescriptor &Write = ID.Writes [NumExplicitDefs + NumImplicitDefs];
394	Write.OpIndex = OptionalDefIdx;
395	// Assign a default latency for this write.
396	Write.Latency = ID.MaxLatency;
397	Write.SClassOrWriteResourceID = `0`;
398	Write.IsOptionalDef = true;
399	LLVM_DEBUG({
400	dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
401	<< ", Latency=" << Write.Latency
402	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
403	});
404	}
405
406	if (!NumVariadicOps)
407	return;
408
409	bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
410	CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
411	for (unsigned I = `0`, OpIndex = MCDesc.getNumOperands();
412	I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
413	const MCOperand &Op = MCI.getOperand(i: OpIndex);
414	if (!Op.isReg())
415	continue;
416
417	WriteDescriptor &Write = ID.Writes [CurrentDef];
418	Write.OpIndex = OpIndex;
419	// Assign a default latency for this write.
420	Write.Latency = ID.MaxLatency;
421	Write.SClassOrWriteResourceID = `0`;
422	Write.IsOptionalDef = false;
423	++CurrentDef;
424	LLVM_DEBUG({
425	dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
426	<< ", Latency=" << Write.Latency
427	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
428	});
429	}
430
431	ID.Writes.resize(N: CurrentDef);
432	}
433
434	void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
435	unsigned SchedClassID) {
436	const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode());
437	unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
438	unsigned NumImplicitUses = MCDesc.implicit_uses().size();
439	// Remove the optional definition.
440	if (MCDesc.hasOptionalDef())
441	--NumExplicitUses;
442	unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
443	unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
444	ID.Reads.resize(N: TotalUses);
445	unsigned CurrentUse = `0`;
446	for (unsigned I = `0`, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
447	++I, ++OpIndex) {
448	const MCOperand &Op = MCI.getOperand(i: OpIndex);
449	if (!Op.isReg())
450	continue;
451
452	ReadDescriptor &Read = ID.Reads [CurrentUse];
453	Read.OpIndex = OpIndex;
454	Read.UseIndex = I;
455	Read.SchedClassID = SchedClassID;
456	++CurrentUse;
457	LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
458	<< ", UseIndex=" << Read.UseIndex << `'\n'`);
459	}
460
461	// For the purpose of ReadAdvance, implicit uses come directly after explicit
462	// uses. The "UseIndex" must be updated according to that implicit layout.
463	for (unsigned I = `0`; I < NumImplicitUses; ++I) {
464	ReadDescriptor &Read = ID.Reads [CurrentUse + I];
465	Read.OpIndex = ~I;
466	Read.UseIndex = NumExplicitUses + I;
467	Read.RegisterID = MCDesc.implicit_uses()[I];
468	Read.SchedClassID = SchedClassID;
469	LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
470	<< ", UseIndex=" << Read.UseIndex << ", RegisterID="
471	<< MRI.getName(Read.RegisterID) << `'\n'`);
472	}
473
474	CurrentUse += NumImplicitUses;
475
476	bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
477	for (unsigned I = `0`, OpIndex = MCDesc.getNumOperands();
478	I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
479	const MCOperand &Op = MCI.getOperand(i: OpIndex);
480	if (!Op.isReg())
481	continue;
482
483	ReadDescriptor &Read = ID.Reads [CurrentUse];
484	Read.OpIndex = OpIndex;
485	Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
486	Read.SchedClassID = SchedClassID;
487	++CurrentUse;
488	LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
489	<< ", UseIndex=" << Read.UseIndex << `'\n'`);
490	}
491
492	ID.Reads.resize(N: CurrentUse);
493	}
494
495	Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
496	const MCInst &MCI) const {
497	if (ID.NumMicroOps != `0`)
498	return ErrorSuccess ();
499
500	bool UsesBuffers = ID.UsedBuffers;
501	bool UsesResources = !ID.Resources.empty();
502	if (!UsesBuffers && !UsesResources)
503	return ErrorSuccess ();
504
505	// FIXME: see PR44797. We should revisit these checks and possibly move them
506	// in CodeGenSchedule.cpp.
507	StringRef Message = "found an inconsistent instruction that decodes to zero "
508	"opcodes and that consumes scheduler resources.";
509	return make_error<InstructionError<MCInst>>(Args: std::string (Message), Args: MCI);
510	}
511
512	Expected<const InstrDesc &>
513	InstrBuilder::createInstrDescImpl(const MCInst &MCI,
514	const SmallVector<Instrument *> &IVec) {
515	assert(STI.getSchedModel().hasInstrSchedModel() &&
516	"Itineraries are not yet supported!");
517
518	// Obtain the instruction descriptor from the opcode.
519	unsigned short Opcode = MCI.getOpcode();
520	const MCInstrDesc &MCDesc = MCII.get(Opcode);
521	const MCSchedModel &SM = STI.getSchedModel();
522
523	// Then obtain the scheduling class information from the instruction.
524	// Allow InstrumentManager to override and use a different SchedClassID
525	unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
526	bool IsVariant = SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant();
527
528	// Try to solve variant scheduling classes.
529	if (IsVariant) {
530	unsigned CPUID = SM.getProcessorID();
531	while (SchedClassID && SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant())
532	SchedClassID =
533	STI.resolveVariantSchedClass(SchedClass: SchedClassID, MI: &MCI, MCII: &MCII, CPUID);
534
535	if (!SchedClassID) {
536	return make_error<InstructionError<MCInst>>(
537	Args: "unable to resolve scheduling class for write variant.", Args: MCI);
538	}
539	}
540
541	// Check if this instruction is supported. Otherwise, report an error.
542	const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID);
543	if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
544	return make_error<InstructionError<MCInst>>(
545	Args: "found an unsupported instruction in the input assembly sequence.",
546	Args: MCI);
547	}
548
549	LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << `'\n'`);
550	LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << `'\n'`);
551	LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << `'\n'`);
552
553	// Create a new empty descriptor.
554	std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
555	ID ->NumMicroOps = SCDesc.NumMicroOps;
556	ID ->SchedClassID = SchedClassID;
557
558	if (MCDesc.isCall() && FirstCallInst) {
559	// We don't correctly model calls.
560	WithColor::warning() << "found a call in the input assembly sequence.\n";
561	WithColor::note() << "call instructions are not correctly modeled. "
562	<< "Assume a latency of 100cy.\n";
563	FirstCallInst = false;
564	}
565
566	if (MCDesc.isReturn() && FirstReturnInst) {
567	WithColor::warning() << "found a return instruction in the input"
568	<< " assembly sequence.\n";
569	WithColor::note() << "program counter updates are ignored.\n";
570	FirstReturnInst = false;
571	}
572
573	initializeUsedResources(ID&: *ID, SCDesc, STI, ProcResourceMasks);
574	computeMaxLatency(ID&: *ID, MCDesc, SCDesc, STI);
575
576	if (Error Err = verifyOperands(MCDesc, MCI))
577	return std::move(Err);
578
579	populateWrites(ID&: *ID, MCI, SchedClassID);
580	populateReads(ID&: *ID, MCI, SchedClassID);
581
582	LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID ->MaxLatency << `'\n'`);
583	LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID ->NumMicroOps << `'\n'`);
584
585	// Validation check on the instruction descriptor.
586	if (Error Err = verifyInstrDesc(ID: *ID, MCI))
587	return std::move(Err);
588
589	// Now add the new descriptor.
590	bool IsVariadic = MCDesc.isVariadic();
591	if ((ID ->IsRecyclable = !IsVariadic && !IsVariant)) {
592	auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID);
593	Descriptors [DKey] = std::move(ID);
594	return *Descriptors [DKey];
595	}
596
597	auto VDKey = std::make_pair(x: &MCI, y&: SchedClassID);
598	VariantDescriptors [VDKey] = std::move(ID);
599	return *VariantDescriptors [VDKey];
600	}
601
602	Expected<const InstrDesc &>
603	InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
604	const SmallVector<Instrument *> &IVec) {
605	// Cache lookup using SchedClassID from Instrumentation
606	unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
607
608	auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID);
609	if (Descriptors.find_as(Val: DKey) != Descriptors.end())
610	return *Descriptors [DKey];
611
612	unsigned CPUID = STI.getSchedModel().getProcessorID();
613	SchedClassID = STI.resolveVariantSchedClass(SchedClass: SchedClassID, MI: &MCI, MCII: &MCII, CPUID);
614	auto VDKey = std::make_pair(x: &MCI, y&: SchedClassID);
615	if (VariantDescriptors.contains(Val: VDKey))
616	return *VariantDescriptors [VDKey];
617
618	return createInstrDescImpl(MCI, IVec);
619	}
620
621	STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
622
623	Expected<std::unique_ptr<Instruction>>
624	InstrBuilder::createInstruction(const MCInst &MCI,
625	const SmallVector<Instrument *> &IVec) {
626	Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
627	if (!DescOrErr)
628	return DescOrErr.takeError();
629	const InstrDesc &D = *DescOrErr;
630	Instruction NewIS = nullptr*;
631	std::unique_ptr<Instruction> CreatedIS;
632	bool IsInstRecycled = false;
633
634	if (!D.IsRecyclable)
635	++NumVariantInst;
636
637	if (D.IsRecyclable && InstRecycleCB) {
638	if (auto *I = InstRecycleCB (D)) {
639	NewIS = I;
640	NewIS->reset();
641	IsInstRecycled = true;
642	}
643	}
644	if (!IsInstRecycled) {
645	CreatedIS = std::make_unique<Instruction>(args: D, args: MCI.getOpcode());
646	NewIS = CreatedIS.get();
647	}
648
649	const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode());
650	const MCSchedClassDesc &SCDesc =
651	*STI.getSchedModel().getSchedClassDesc(SchedClassIdx: D.SchedClassID);
652
653	NewIS->setMayLoad(MCDesc.mayLoad());
654	NewIS->setMayStore(MCDesc.mayStore());
655	NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());
656	NewIS->setBeginGroup(SCDesc.BeginGroup);
657	NewIS->setEndGroup(SCDesc.EndGroup);
658	NewIS->setRetireOOO(SCDesc.RetireOOO);
659
660	// Check if this is a dependency breaking instruction.
661	APInt Mask;
662
663	bool IsZeroIdiom = false;
664	bool IsDepBreaking = false;
665	if (MCIA) {
666	unsigned ProcID = STI.getSchedModel().getProcessorID();
667	IsZeroIdiom = MCIA->isZeroIdiom(MI: MCI, Mask, CPUID: ProcID);
668	IsDepBreaking =
669	IsZeroIdiom \|\| MCIA->isDependencyBreaking(MI: MCI, Mask, CPUID: ProcID);
670	if (MCIA->isOptimizableRegisterMove(MI: MCI, CPUID: ProcID))
671	NewIS->setOptimizableMove();
672	}
673
674	// Initialize Reads first.
675	MCPhysReg RegID = `0`;
676	size_t Idx = `0U`;
677	for (const ReadDescriptor &RD : D.Reads) {
678	if (!RD.isImplicitRead()) {
679	// explicit read.
680	const MCOperand &Op = MCI.getOperand(i: RD.OpIndex);
681	// Skip non-register operands.
682	if (!Op.isReg())
683	continue;
684	RegID = Op.getReg();
685	} else {
686	// Implicit read.
687	RegID = RD.RegisterID;
688	}
689
690	// Skip invalid register operands.
691	if (!RegID)
692	continue;
693
694	// Okay, this is a register operand. Create a ReadState for it.
695	ReadState RS = nullptr*;
696	if (IsInstRecycled && Idx < NewIS->getUses().size()) {
697	NewIS->getUses()[Idx] = ReadState (RD, RegID);
698	RS = &NewIS->getUses()[Idx++];
699	} else {
700	NewIS->getUses().emplace_back(Args: RD, Args&: RegID);
701	RS = &NewIS->getUses().back();
702	++Idx;
703	}
704
705	if (IsDepBreaking) {
706	// A mask of all zeroes means: explicit input operands are not
707	// independent.
708	if (Mask.isZero()) {
709	if (!RD.isImplicitRead())
710	RS->setIndependentFromDef();
711	} else {
712	// Check if this register operand is independent according to `Mask`.
713	// Note that Mask may not have enough bits to describe all explicit and
714	// implicit input operands. If this register operand doesn't have a
715	// corresponding bit in Mask, then conservatively assume that it is
716	// dependent.
717	if (Mask.getBitWidth() > RD.UseIndex) {
718	// Okay. This map describe register use `RD.UseIndex`.
719	if (Mask [RD.UseIndex])
720	RS->setIndependentFromDef();
721	}
722	}
723	}
724	}
725	if (IsInstRecycled && Idx < NewIS->getUses().size())
726	NewIS->getUses().pop_back_n(NumItems: NewIS->getUses().size() - Idx);
727
728	// Early exit if there are no writes.
729	if (D.Writes.empty()) {
730	if (IsInstRecycled)
731	return llvm::make_error<RecycledInstErr>(Args&: NewIS);
732	else
733	return std::move(CreatedIS);
734	}
735
736	// Track register writes that implicitly clear the upper portion of the
737	// underlying super-registers using an APInt.
738	APInt WriteMask(D.Writes.size(), `0`);
739
740	// Now query the MCInstrAnalysis object to obtain information about which
741	// register writes implicitly clear the upper portion of a super-register.
742	if (MCIA)
743	MCIA->clearsSuperRegisters(MRI, Inst: MCI, Writes&: WriteMask);
744
745	// Initialize writes.
746	unsigned WriteIndex = `0`;
747	Idx = `0U`;
748	for (const WriteDescriptor &WD : D.Writes) {
749	RegID = WD.isImplicitWrite() ? WD.RegisterID
750	: MCI.getOperand(i: WD.OpIndex).getReg();
751	// Check if this is a optional definition that references NoReg.
752	if (WD.IsOptionalDef && !RegID) {
753	++WriteIndex;
754	continue;
755	}
756
757	assert(RegID && "Expected a valid register ID!");
758	if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
759	NewIS->getDefs()[Idx++] =
760	WriteState (WD, RegID,
761	/ ClearsSuperRegs / WriteMask [WriteIndex],
762	/ WritesZero / IsZeroIdiom);
763	} else {
764	NewIS->getDefs().emplace_back(Args: WD, Args&: RegID,
765	/ ClearsSuperRegs / Args: WriteMask [WriteIndex],
766	/ WritesZero / Args&: IsZeroIdiom);
767	++Idx;
768	}
769	++WriteIndex;
770	}
771	if (IsInstRecycled && Idx < NewIS->getDefs().size())
772	NewIS->getDefs().pop_back_n(NumItems: NewIS->getDefs().size() - Idx);
773
774	if (IsInstRecycled)
775	return llvm::make_error<RecycledInstErr>(Args&: NewIS);
776	else
777	return std::move(CreatedIS);
778	}
779	} // namespace mca
780	} // namespace llvm
781

source code of llvm/lib/MCA/InstrBuilder.cpp